HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
30 
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
33 #endif
34 
36 
37 #if defined(__has_attribute)
38  #if __has_attribute(ext_vector_type)
39  #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
40  #else
41  #define __NATIVE_VECTOR__(n, T) T[n]
42  #endif
43 
44 #if defined(__cplusplus)
45  #include <array>
46  #include <iosfwd>
47  #include <type_traits>
48 
49  namespace hip_impl {
50  template<typename, typename, unsigned int> struct Scalar_accessor;
51  } // Namespace hip_impl.
52 
53  namespace std {
54  template<typename T, typename U, unsigned int n>
55  struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
56  : is_integral<T> {};
57  template<typename T, typename U, unsigned int n>
58  struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
59  : is_floating_point<T> {};
60  } // Namespace std.
61 
62  namespace hip_impl {
63  template<typename T, typename Vector, unsigned int idx>
64  struct Scalar_accessor {
65  struct Address {
66  const Scalar_accessor* p;
67 
68  __host__ __device__
69  operator const T*() const noexcept {
70  return &reinterpret_cast<const T*>(p)[idx];
71  }
72  __host__ __device__
73  operator const T*() const volatile noexcept {
74  return &reinterpret_cast<const T*>(p)[idx];
75  }
76  __host__ __device__
77  operator T*() noexcept {
78  return &reinterpret_cast<T*>(
79  const_cast<Scalar_accessor*>(p))[idx];
80  }
81  __host__ __device__
82  operator T*() volatile noexcept {
83  return &reinterpret_cast<T*>(
84  const_cast<Scalar_accessor*>(p))[idx];
85  }
86  };
87 
88  friend
89  inline
90  std::ostream& operator<<(std::ostream& os,
91  const Scalar_accessor& x) noexcept {
92  return os << x.data[idx];
93  }
94  friend
95  inline
96  std::istream& operator>>(std::istream& is,
97  Scalar_accessor& x) noexcept {
98  T tmp;
99  is >> tmp;
100  x.data[idx] = tmp;
101 
102  return is;
103  }
104 
105  // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
106  Vector data;
107 
108  __host__ __device__
109  operator T() const noexcept { return data[idx]; }
110  __host__ __device__
111  operator T() const volatile noexcept { return data[idx]; }
112 
113 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
114  // The conversions to enum are fairly ghastly, but unfortunately used in
115  // some pre-existing, difficult to modify, code.
116  template<
117  typename U,
118  typename std::enable_if<
119  !std::is_same<U, T>{} &&
120  std::is_enum<U>{} &&
121  std::is_convertible<
122  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
123  __host__ __device__
124  operator U() const noexcept { return static_cast<U>(data[idx]); }
125  template<
126  typename U,
127  typename std::enable_if<
128  !std::is_same<U, T>{} &&
129  std::is_enum<U>{} &&
130  std::is_convertible<
131  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
132  __host__ __device__
133  operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
134 #endif
135 
136  __host__ __device__
137  operator T&() noexcept {
138  return reinterpret_cast<
139  T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
140  }
141  __host__ __device__
142  operator volatile T&() volatile noexcept {
143  return reinterpret_cast<
144  volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
145  }
146 
147  __host__ __device__
148  Address operator&() const noexcept { return Address{this}; }
149 
150  __host__ __device__
151  Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
152  data[idx] = x.data[idx];
153 
154  return *this;
155  }
156  __host__ __device__
157  Scalar_accessor& operator=(T x) noexcept {
158  data[idx] = x;
159 
160  return *this;
161  }
162  __host__ __device__
163  volatile Scalar_accessor& operator=(T x) volatile noexcept {
164  data[idx] = x;
165 
166  return *this;
167  }
168 
169  __host__ __device__
170  Scalar_accessor& operator++() noexcept {
171  ++data[idx];
172  return *this;
173  }
174  __host__ __device__
175  T operator++(int) noexcept {
176  auto r{data[idx]};
177  ++data[idx];
178  return *this;
179  }
180  __host__ __device__
181  Scalar_accessor& operator--() noexcept {
182  --data[idx];
183  return *this;
184  }
185  __host__ __device__
186  T operator--(int) noexcept {
187  auto r{data[idx]};
188  --data[idx];
189  return *this;
190  }
191 
192  // TODO: convertibility is too restrictive, constraint should be on
193  // the operator being invocable with a value of type U.
194  template<
195  typename U,
196  typename std::enable_if<
197  std::is_convertible<U, T>{}>::type* = nullptr>
198  __host__ __device__
199  Scalar_accessor& operator+=(U x) noexcept {
200  data[idx] += x;
201  return *this;
202  }
203  template<
204  typename U,
205  typename std::enable_if<
206  std::is_convertible<U, T>{}>::type* = nullptr>
207  __host__ __device__
208  Scalar_accessor& operator-=(U x) noexcept {
209  data[idx] -= x;
210  return *this;
211  }
212 
213  template<
214  typename U,
215  typename std::enable_if<
216  std::is_convertible<U, T>{}>::type* = nullptr>
217  __host__ __device__
218  Scalar_accessor& operator*=(U x) noexcept {
219  data[idx] *= x;
220  return *this;
221  }
222  template<
223  typename U,
224  typename std::enable_if<
225  std::is_convertible<U, T>{}>::type* = nullptr>
226  __host__ __device__
227  Scalar_accessor& operator/=(U x) noexcept {
228  data[idx] /= x;
229  return *this;
230  }
231  template<
232  typename U = T,
233  typename std::enable_if<std::is_convertible<U, T>{} &&
234  std::is_integral<U>{}>::type* = nullptr>
235  __host__ __device__
236  Scalar_accessor& operator%=(U x) noexcept {
237  data[idx] %= x;
238  return *this;
239  }
240 
241  template<
242  typename U = T,
243  typename std::enable_if<std::is_convertible<U, T>{} &&
244  std::is_integral<U>{}>::type* = nullptr>
245  __host__ __device__
246  Scalar_accessor& operator>>=(U x) noexcept {
247  data[idx] >>= x;
248  return *this;
249  }
250  template<
251  typename U = T,
252  typename std::enable_if<std::is_convertible<U, T>{} &&
253  std::is_integral<U>{}>::type* = nullptr>
254  __host__ __device__
255  Scalar_accessor& operator<<=(U x) noexcept {
256  data[idx] <<= x;
257  return *this;
258  }
259  template<
260  typename U = T,
261  typename std::enable_if<std::is_convertible<U, T>{} &&
262  std::is_integral<U>{}>::type* = nullptr>
263  __host__ __device__
264  Scalar_accessor& operator&=(U x) noexcept {
265  data[idx] &= x;
266  return *this;
267  }
268  template<
269  typename U = T,
270  typename std::enable_if<std::is_convertible<U, T>{} &&
271  std::is_integral<U>{}>::type* = nullptr>
272  __host__ __device__
273  Scalar_accessor& operator|=(U x) noexcept {
274  data[idx] |= x;
275  return *this;
276  }
277  template<
278  typename U = T,
279  typename std::enable_if<std::is_convertible<U, T>{} &&
280  std::is_integral<U>{}>::type* = nullptr>
281  __host__ __device__
282  Scalar_accessor& operator^=(U x) noexcept {
283  data[idx] ^= x;
284  return *this;
285  }
286  };
287 
288  inline
289  constexpr
290  unsigned int next_pot(unsigned int x) {
291  // Precondition: x > 1.
292  return 1u << (32u - __builtin_clz(x - 1u));
293  }
294  } // Namespace hip_impl.
295 
296  template<typename T, unsigned int n> struct HIP_vector_base;
297 
298  template<typename T>
299  struct HIP_vector_base<T, 1> {
300  using Native_vec_ = __NATIVE_VECTOR__(1, T);
301 
302  union {
303  Native_vec_ data;
304 #if __HIP_CLANG_ONLY__
305  struct {
306  T x;
307  };
308 #else
309  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
310 #endif
311  };
312 
313  using value_type = T;
314 
315  __host__ __device__
316  HIP_vector_base() = default;
317  __host__ __device__
318  explicit
319  constexpr
320  HIP_vector_base(T x) noexcept : data{x} {}
321  __host__ __device__
322  constexpr
323  HIP_vector_base(const HIP_vector_base&) = default;
324  __host__ __device__
325  constexpr
326  HIP_vector_base(HIP_vector_base&&) = default;
327  __host__ __device__
328  ~HIP_vector_base() = default;
329 
330  __host__ __device__
331  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
332  #if __has_attribute(ext_vector_type)
333  data = x.data;
334  #else
335  data[0] = x.data[0];
336  #endif
337 
338  return *this;
339  }
340  };
341 
342  template<typename T>
343  struct HIP_vector_base<T, 2> {
344  using Native_vec_ = __NATIVE_VECTOR__(2, T);
345 
346  union
347  #if !__has_attribute(ext_vector_type)
348  alignas(hip_impl::next_pot(2 * sizeof(T)))
349  #endif
350  {
351  Native_vec_ data;
352 #if __HIP_CLANG_ONLY__
353  struct {
354  T x;
355  T y;
356  };
357 #else
358  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
359  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
360 #endif
361  };
362 
363  using value_type = T;
364 
365  __host__ __device__
366  HIP_vector_base() = default;
367  __host__ __device__
368  explicit
369  constexpr
370  HIP_vector_base(T x) noexcept : data{x, x} {}
371  __host__ __device__
372  constexpr
373  HIP_vector_base(T x, T y) noexcept : data{x, y} {}
374  __host__ __device__
375  constexpr
376  HIP_vector_base(const HIP_vector_base&) = default;
377  __host__ __device__
378  constexpr
379  HIP_vector_base(HIP_vector_base&&) = default;
380  __host__ __device__
381  ~HIP_vector_base() = default;
382 
383  __host__ __device__
384  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
385  #if __has_attribute(ext_vector_type)
386  data = x.data;
387  #else
388  data[0] = x.data[0];
389  data[1] = x.data[1];
390  #endif
391 
392  return *this;
393  }
394  };
395 
396  template<typename T>
397  struct HIP_vector_base<T, 3> {
398  struct Native_vec_ {
399  T d[3];
400 
401  __host__ __device__
402  Native_vec_() = default;
403 
404  __host__ __device__
405  explicit
406  constexpr
407  Native_vec_(T x) noexcept : d{x, x, x} {}
408  __host__ __device__
409  constexpr
410  Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
411  __host__ __device__
412  constexpr
413  Native_vec_(const Native_vec_&) = default;
414  __host__ __device__
415  constexpr
416  Native_vec_(Native_vec_&&) = default;
417  __host__ __device__
418  ~Native_vec_() = default;
419 
420  __host__ __device__
421  Native_vec_& operator=(const Native_vec_&) = default;
422  __host__ __device__
423  Native_vec_& operator=(Native_vec_&&) = default;
424 
425  __host__ __device__
426  T& operator[](unsigned int idx) noexcept { return d[idx]; }
427  __host__ __device__
428  T operator[](unsigned int idx) const noexcept { return d[idx]; }
429 
430  __host__ __device__
431  Native_vec_& operator+=(const Native_vec_& x) noexcept
432  {
433  for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
434  return *this;
435  }
436  __host__ __device__
437  Native_vec_& operator-=(const Native_vec_& x) noexcept
438  {
439  for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
440  return *this;
441  }
442 
443  __host__ __device__
444  Native_vec_& operator*=(const Native_vec_& x) noexcept
445  {
446  for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
447  return *this;
448  }
449  __host__ __device__
450  Native_vec_& operator/=(const Native_vec_& x) noexcept
451  {
452  for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
453  return *this;
454  }
455 
456  template<
457  typename U = T,
458  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
459  __host__ __device__
460  Native_vec_ operator-() const noexcept
461  {
462  auto r{*this};
463  for (auto&& x : r.d) x = -x;
464  return r;
465  }
466 
467  template<
468  typename U = T,
469  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
470  __host__ __device__
471  Native_vec_ operator~() const noexcept
472  {
473  auto r{*this};
474  for (auto&& x : r.d) x = ~x;
475  return r;
476  }
477  template<
478  typename U = T,
479  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
480  __host__ __device__
481  Native_vec_& operator%=(const Native_vec_& x) noexcept
482  {
483  for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
484  return *this;
485  }
486  template<
487  typename U = T,
488  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
489  __host__ __device__
490  Native_vec_& operator^=(const Native_vec_& x) noexcept
491  {
492  for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
493  return *this;
494  }
495  template<
496  typename U = T,
497  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
498  __host__ __device__
499  Native_vec_& operator|=(const Native_vec_& x) noexcept
500  {
501  for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
502  return *this;
503  }
504  template<
505  typename U = T,
506  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
507  __host__ __device__
508  Native_vec_& operator&=(const Native_vec_& x) noexcept
509  {
510  for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
511  return *this;
512  }
513  template<
514  typename U = T,
515  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
516  __host__ __device__
517  Native_vec_& operator>>=(const Native_vec_& x) noexcept
518  {
519  for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
520  return *this;
521  }
522  template<
523  typename U = T,
524  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
525  __host__ __device__
526  Native_vec_& operator<<=(const Native_vec_& x) noexcept
527  {
528  for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
529  return *this;
530  }
531 
532  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
533  __host__ __device__
534  Vec3_cmp operator==(const Native_vec_& x) const noexcept
535  {
536  return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
537  }
538  };
539 
540  union {
541  Native_vec_ data;
542  struct {
543  T x;
544  T y;
545  T z;
546  };
547  };
548 
549  using value_type = T;
550 
551  __host__ __device__
552  HIP_vector_base() = default;
553  __host__ __device__
554  explicit
555  constexpr
556  HIP_vector_base(T x) noexcept : data{x, x, x} {}
557  __host__ __device__
558  constexpr
559  HIP_vector_base(T x, T y, T z) noexcept : data{x, y, z} {}
560  __host__ __device__
561  constexpr
562  HIP_vector_base(const HIP_vector_base&) = default;
563  __host__ __device__
564  constexpr
565  HIP_vector_base(HIP_vector_base&&) = default;
566  __host__ __device__
567  ~HIP_vector_base() = default;
568 
569  __host__ __device__
570  HIP_vector_base& operator=(const HIP_vector_base&) = default;
571  __host__ __device__
572  HIP_vector_base& operator=(HIP_vector_base&&) = default;
573  };
574 
575  template<typename T>
576  struct HIP_vector_base<T, 4> {
577  using Native_vec_ = __NATIVE_VECTOR__(4, T);
578 
579  union
580  #if !__has_attribute(ext_vector_type)
581  alignas(hip_impl::next_pot(4 * sizeof(T)))
582  #endif
583  {
584  Native_vec_ data;
585 #if __HIP_CLANG_ONLY__
586  struct {
587  T x;
588  T y;
589  T z;
590  T w;
591  };
592 #else
593  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
594  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
595  hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
596  hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
597 #endif
598  };
599 
600  using value_type = T;
601 
602  __host__ __device__
603  HIP_vector_base() = default;
604  __host__ __device__
605  explicit
606  constexpr
607  HIP_vector_base(T x) noexcept : data{x, x, x, x} {}
608  __host__ __device__
609  constexpr
610  HIP_vector_base(T x, T y, T z, T w) noexcept : data{x, y, z, w} {}
611  __host__ __device__
612  constexpr
613  HIP_vector_base(const HIP_vector_base&) = default;
614  __host__ __device__
615  constexpr
616  HIP_vector_base(HIP_vector_base&&) = default;
617  __host__ __device__
618  ~HIP_vector_base() = default;
619 
620  __host__ __device__
621  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
622  #if __has_attribute(ext_vector_type)
623  data = x.data;
624  #else
625  data[0] = x.data[0];
626  data[1] = x.data[1];
627  data[2] = x.data[2];
628  data[3] = x.data[3];
629  #endif
630 
631  return *this;
632  }
633  };
634 
635  template<typename T, unsigned int rank>
636  struct HIP_vector_type : public HIP_vector_base<T, rank> {
637  using HIP_vector_base<T, rank>::data;
638  using typename HIP_vector_base<T, rank>::Native_vec_;
639 
640  __host__ __device__
641  HIP_vector_type() = default;
642  template<
643  typename U,
644  typename std::enable_if<
645  std::is_convertible<U, T>{}>::type* = nullptr>
646  __host__ __device__
647  explicit
648  constexpr
649  HIP_vector_type(U x) noexcept
650  : HIP_vector_base<T, rank>{static_cast<T>(x)}
651  {}
652  template< // TODO: constrain based on type as well.
653  typename... Us,
654  typename std::enable_if<
655  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
656  __host__ __device__
657  constexpr
658  HIP_vector_type(Us... xs) noexcept
659  : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
660  {}
661  __host__ __device__
662  constexpr
663  HIP_vector_type(const HIP_vector_type&) = default;
664  __host__ __device__
665  constexpr
666  HIP_vector_type(HIP_vector_type&&) = default;
667  __host__ __device__
668  ~HIP_vector_type() = default;
669 
670  __host__ __device__
671  HIP_vector_type& operator=(const HIP_vector_type&) = default;
672  __host__ __device__
673  HIP_vector_type& operator=(HIP_vector_type&&) = default;
674 
675  // Operators
676  __host__ __device__
677  HIP_vector_type& operator++() noexcept
678  {
679  return *this += HIP_vector_type{1};
680  }
681  __host__ __device__
682  HIP_vector_type operator++(int) noexcept
683  {
684  auto tmp(*this);
685  ++*this;
686  return tmp;
687  }
688 
689  __host__ __device__
690  HIP_vector_type& operator--() noexcept
691  {
692  return *this -= HIP_vector_type{1};
693  }
694  __host__ __device__
695  HIP_vector_type operator--(int) noexcept
696  {
697  auto tmp(*this);
698  --*this;
699  return tmp;
700  }
701 
702  __host__ __device__
703  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
704  {
705  data += x.data;
706  return *this;
707  }
708  template<
709  typename U,
710  typename std::enable_if<
711  std::is_convertible<U, T>{}>::type* = nullptr>
712  __host__ __device__
713  HIP_vector_type& operator+=(U x) noexcept
714  {
715  return *this += HIP_vector_type{x};
716  }
717 
718  __host__ __device__
719  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
720  {
721  data -= x.data;
722  return *this;
723  }
724  template<
725  typename U,
726  typename std::enable_if<
727  std::is_convertible<U, T>{}>::type* = nullptr>
728  __host__ __device__
729  HIP_vector_type& operator-=(U x) noexcept
730  {
731  return *this -= HIP_vector_type{x};
732  }
733 
734  __host__ __device__
735  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
736  {
737  data *= x.data;
738  return *this;
739  }
740  template<
741  typename U,
742  typename std::enable_if<
743  std::is_convertible<U, T>{}>::type* = nullptr>
744  __host__ __device__
745  HIP_vector_type& operator*=(U x) noexcept
746  {
747  return *this *= HIP_vector_type{x};
748  }
749 
750  __host__ __device__
751  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
752  {
753  data /= x.data;
754  return *this;
755  }
756  template<
757  typename U,
758  typename std::enable_if<
759  std::is_convertible<U, T>{}>::type* = nullptr>
760  __host__ __device__
761  HIP_vector_type& operator/=(U x) noexcept
762  {
763  return *this /= HIP_vector_type{x};
764  }
765 
766  template<
767  typename U = T,
768  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
769  __host__ __device__
770  HIP_vector_type operator-() const noexcept
771  {
772  auto tmp(*this);
773  tmp.data = -tmp.data;
774  return tmp;
775  }
776 
777  template<
778  typename U = T,
779  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
780  __host__ __device__
781  HIP_vector_type operator~() const noexcept
782  {
783  HIP_vector_type r{*this};
784  r.data = ~r.data;
785  return r;
786  }
787 
788  template<
789  typename U = T,
790  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
791  __host__ __device__
792  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
793  {
794  data %= x.data;
795  return *this;
796  }
797 
798  template<
799  typename U = T,
800  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
801  __host__ __device__
802  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
803  {
804  data ^= x.data;
805  return *this;
806  }
807 
808  template<
809  typename U = T,
810  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
811  __host__ __device__
812  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
813  {
814  data |= x.data;
815  return *this;
816  }
817 
818  template<
819  typename U = T,
820  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
821  __host__ __device__
822  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
823  {
824  data &= x.data;
825  return *this;
826  }
827 
828  template<
829  typename U = T,
830  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
831  __host__ __device__
832  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
833  {
834  data >>= x.data;
835  return *this;
836  }
837 
838  template<
839  typename U = T,
840  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
841  __host__ __device__
842  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
843  {
844  data <<= x.data;
845  return *this;
846  }
847  };
848 
849  template<typename T, unsigned int n>
850  __host__ __device__
851  inline
852  constexpr
853  HIP_vector_type<T, n> operator+(
854  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
855  {
856  return HIP_vector_type<T, n>{x} += y;
857  }
858  template<typename T, unsigned int n, typename U>
859  __host__ __device__
860  inline
861  constexpr
862  HIP_vector_type<T, n> operator+(
863  const HIP_vector_type<T, n>& x, U y) noexcept
864  {
865  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
866  }
867  template<typename T, unsigned int n, typename U>
868  __host__ __device__
869  inline
870  constexpr
871  HIP_vector_type<T, n> operator+(
872  U x, const HIP_vector_type<T, n>& y) noexcept
873  {
874  return HIP_vector_type<T, n>{x} += y;
875  }
876 
877  template<typename T, unsigned int n>
878  __host__ __device__
879  inline
880  constexpr
881  HIP_vector_type<T, n> operator-(
882  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
883  {
884  return HIP_vector_type<T, n>{x} -= y;
885  }
886  template<typename T, unsigned int n, typename U>
887  __host__ __device__
888  inline
889  constexpr
890  HIP_vector_type<T, n> operator-(
891  const HIP_vector_type<T, n>& x, U y) noexcept
892  {
893  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
894  }
895  template<typename T, unsigned int n, typename U>
896  __host__ __device__
897  inline
898  constexpr
899  HIP_vector_type<T, n> operator-(
900  U x, const HIP_vector_type<T, n>& y) noexcept
901  {
902  return HIP_vector_type<T, n>{x} -= y;
903  }
904 
905  template<typename T, unsigned int n>
906  __host__ __device__
907  inline
908  constexpr
909  HIP_vector_type<T, n> operator*(
910  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
911  {
912  return HIP_vector_type<T, n>{x} *= y;
913  }
914  template<typename T, unsigned int n, typename U>
915  __host__ __device__
916  inline
917  constexpr
918  HIP_vector_type<T, n> operator*(
919  const HIP_vector_type<T, n>& x, U y) noexcept
920  {
921  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
922  }
923  template<typename T, unsigned int n, typename U>
924  __host__ __device__
925  inline
926  constexpr
927  HIP_vector_type<T, n> operator*(
928  U x, const HIP_vector_type<T, n>& y) noexcept
929  {
930  return HIP_vector_type<T, n>{x} *= y;
931  }
932 
933  template<typename T, unsigned int n>
934  __host__ __device__
935  inline
936  constexpr
937  HIP_vector_type<T, n> operator/(
938  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
939  {
940  return HIP_vector_type<T, n>{x} /= y;
941  }
942  template<typename T, unsigned int n, typename U>
943  __host__ __device__
944  inline
945  constexpr
946  HIP_vector_type<T, n> operator/(
947  const HIP_vector_type<T, n>& x, U y) noexcept
948  {
949  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
950  }
951  template<typename T, unsigned int n, typename U>
952  __host__ __device__
953  inline
954  constexpr
955  HIP_vector_type<T, n> operator/(
956  U x, const HIP_vector_type<T, n>& y) noexcept
957  {
958  return HIP_vector_type<T, n>{x} /= y;
959  }
960 
961  template<typename V>
962  __host__ __device__
963  inline
964  constexpr
965  bool _hip_any_zero(const V& x, int n) noexcept
966  {
967  return
968  (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
969  }
970 
971  template<typename T, unsigned int n>
972  __host__ __device__
973  inline
974  constexpr
975  bool operator==(
976  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
977  {
978  return _hip_any_zero(x.data == y.data, n - 1);
979  }
980  template<typename T, unsigned int n, typename U>
981  __host__ __device__
982  inline
983  constexpr
984  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
985  {
986  return x == HIP_vector_type<T, n>{y};
987  }
988  template<typename T, unsigned int n, typename U>
989  __host__ __device__
990  inline
991  constexpr
992  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
993  {
994  return HIP_vector_type<T, n>{x} == y;
995  }
996 
997  template<typename T, unsigned int n>
998  __host__ __device__
999  inline
1000  constexpr
1001  bool operator!=(
1002  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1003  {
1004  return !(x == y);
1005  }
1006  template<typename T, unsigned int n, typename U>
1007  __host__ __device__
1008  inline
1009  constexpr
1010  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
1011  {
1012  return !(x == y);
1013  }
1014  template<typename T, unsigned int n, typename U>
1015  __host__ __device__
1016  inline
1017  constexpr
1018  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
1019  {
1020  return !(x == y);
1021  }
1022 
1023  template<
1024  typename T,
1025  unsigned int n,
1026  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1027  __host__ __device__
1028  inline
1029  constexpr
1030  HIP_vector_type<T, n> operator%(
1031  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1032  {
1033  return HIP_vector_type<T, n>{x} %= y;
1034  }
1035  template<
1036  typename T,
1037  unsigned int n,
1038  typename U,
1039  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1040  __host__ __device__
1041  inline
1042  constexpr
1043  HIP_vector_type<T, n> operator%(
1044  const HIP_vector_type<T, n>& x, U y) noexcept
1045  {
1046  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
1047  }
1048  template<
1049  typename T,
1050  unsigned int n,
1051  typename U,
1052  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1053  __host__ __device__
1054  inline
1055  constexpr
1056  HIP_vector_type<T, n> operator%(
1057  U x, const HIP_vector_type<T, n>& y) noexcept
1058  {
1059  return HIP_vector_type<T, n>{x} %= y;
1060  }
1061 
1062  template<
1063  typename T,
1064  unsigned int n,
1065  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1066  __host__ __device__
1067  inline
1068  constexpr
1069  HIP_vector_type<T, n> operator^(
1070  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1071  {
1072  return HIP_vector_type<T, n>{x} ^= y;
1073  }
1074  template<
1075  typename T,
1076  unsigned int n,
1077  typename U,
1078  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1079  __host__ __device__
1080  inline
1081  constexpr
1082  HIP_vector_type<T, n> operator^(
1083  const HIP_vector_type<T, n>& x, U y) noexcept
1084  {
1085  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
1086  }
1087  template<
1088  typename T,
1089  unsigned int n,
1090  typename U,
1091  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1092  __host__ __device__
1093  inline
1094  constexpr
1095  HIP_vector_type<T, n> operator^(
1096  U x, const HIP_vector_type<T, n>& y) noexcept
1097  {
1098  return HIP_vector_type<T, n>{x} ^= y;
1099  }
1100 
1101  template<
1102  typename T,
1103  unsigned int n,
1104  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1105  __host__ __device__
1106  inline
1107  constexpr
1108  HIP_vector_type<T, n> operator|(
1109  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1110  {
1111  return HIP_vector_type<T, n>{x} |= y;
1112  }
1113  template<
1114  typename T,
1115  unsigned int n,
1116  typename U,
1117  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1118  __host__ __device__
1119  inline
1120  constexpr
1121  HIP_vector_type<T, n> operator|(
1122  const HIP_vector_type<T, n>& x, U y) noexcept
1123  {
1124  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
1125  }
1126  template<
1127  typename T,
1128  unsigned int n,
1129  typename U,
1130  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1131  __host__ __device__
1132  inline
1133  constexpr
1134  HIP_vector_type<T, n> operator|(
1135  U x, const HIP_vector_type<T, n>& y) noexcept
1136  {
1137  return HIP_vector_type<T, n>{x} |= y;
1138  }
1139 
1140  template<
1141  typename T,
1142  unsigned int n,
1143  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1144  __host__ __device__
1145  inline
1146  constexpr
1147  HIP_vector_type<T, n> operator&(
1148  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1149  {
1150  return HIP_vector_type<T, n>{x} &= y;
1151  }
1152  template<
1153  typename T,
1154  unsigned int n,
1155  typename U,
1156  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1157  __host__ __device__
1158  inline
1159  constexpr
1160  HIP_vector_type<T, n> operator&(
1161  const HIP_vector_type<T, n>& x, U y) noexcept
1162  {
1163  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1164  }
1165  template<
1166  typename T,
1167  unsigned int n,
1168  typename U,
1169  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1170  __host__ __device__
1171  inline
1172  constexpr
1173  HIP_vector_type<T, n> operator&(
1174  U x, const HIP_vector_type<T, n>& y) noexcept
1175  {
1176  return HIP_vector_type<T, n>{x} &= y;
1177  }
1178 
1179  template<
1180  typename T,
1181  unsigned int n,
1182  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1183  __host__ __device__
1184  inline
1185  constexpr
1186  HIP_vector_type<T, n> operator>>(
1187  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1188  {
1189  return HIP_vector_type<T, n>{x} >>= y;
1190  }
1191  template<
1192  typename T,
1193  unsigned int n,
1194  typename U,
1195  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1196  __host__ __device__
1197  inline
1198  constexpr
1199  HIP_vector_type<T, n> operator>>(
1200  const HIP_vector_type<T, n>& x, U y) noexcept
1201  {
1202  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1203  }
1204  template<
1205  typename T,
1206  unsigned int n,
1207  typename U,
1208  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1209  __host__ __device__
1210  inline
1211  constexpr
1212  HIP_vector_type<T, n> operator>>(
1213  U x, const HIP_vector_type<T, n>& y) noexcept
1214  {
1215  return HIP_vector_type<T, n>{x} >>= y;
1216  }
1217 
1218  template<
1219  typename T,
1220  unsigned int n,
1221  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1222  __host__ __device__
1223  inline
1224  constexpr
1225  HIP_vector_type<T, n> operator<<(
1226  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1227  {
1228  return HIP_vector_type<T, n>{x} <<= y;
1229  }
1230  template<
1231  typename T,
1232  unsigned int n,
1233  typename U,
1234  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1235  __host__ __device__
1236  inline
1237  constexpr
1238  HIP_vector_type<T, n> operator<<(
1239  const HIP_vector_type<T, n>& x, U y) noexcept
1240  {
1241  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1242  }
1243  template<
1244  typename T,
1245  unsigned int n,
1246  typename U,
1247  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1248  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1249  __host__ __device__
1250  inline
1251  constexpr
1252  HIP_vector_type<T, n> operator<<(
1253  U x, const HIP_vector_type<T, n>& y) noexcept
1254  {
1255  return HIP_vector_type<T, n>{x} <<= y;
1256  }
1257 
1258  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1259  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1260  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1261  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1262  using CUDA_name##4 = HIP_vector_type<T, 4>;
1263 #else
1264  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1265  typedef struct {\
1266  T x;\
1267  } CUDA_name##1;\
1268  typedef struct {\
1269  T x;\
1270  T y;\
1271  } CUDA_name##2;\
1272  typedef struct {\
1273  T x;\
1274  T y;\
1275  T z;\
1276  } CUDA_name##3;\
1277  typedef struct {\
1278  T x;\
1279  T y;\
1280  T z;\
1281  T w;\
1282  } CUDA_name##4;
1283 #endif
1284 
1285 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1286 __MAKE_VECTOR_TYPE__(char, char);
1287 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1288 __MAKE_VECTOR_TYPE__(short, short);
1289 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1290 __MAKE_VECTOR_TYPE__(int, int);
1291 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1292 __MAKE_VECTOR_TYPE__(long, long);
1293 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1294 __MAKE_VECTOR_TYPE__(longlong, long long);
1295 __MAKE_VECTOR_TYPE__(float, float);
1296 __MAKE_VECTOR_TYPE__(double, double);
1297 
1298 #ifdef __cplusplus
1299 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1300  static inline __device__ __host__ \
1301  type make_##type(comp x) { type r{x}; return r; }
1302 
1303 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1304  static inline __device__ __host__ \
1305  type make_##type(comp x, comp y) { type r{x, y}; return r; }
1306 
1307 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1308  static inline __device__ __host__ \
1309  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1310 
1311 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1312  static inline __device__ __host__ \
1313  type make_##type(comp x, comp y, comp z, comp w) { \
1314  type r{x, y, z, w}; \
1315  return r; \
1316  }
1317 #else
1318  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1319  static inline __device__ __host__ \
1320  type make_##type(comp x) { type r; r.x =x; return r; }
1321 
1322  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1323  static inline __device__ __host__ \
1324  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1325 
1326  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1327  static inline __device__ __host__ \
1328  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1329 
1330  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1331  static inline __device__ __host__ \
1332  type make_##type(comp x, comp y, comp z, comp w) { \
1333  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1334  return r; \
1335  }
1336 #endif
1337 
1338 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
1339 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
1340 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
1341 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
1342 
1343 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
1344 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
1345 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
1346 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
1347 
1348 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
1349 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
1350 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
1351 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
1352 
1353 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
1354 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
1355 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
1356 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
1357 
1358 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
1359 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
1360 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
1361 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
1362 
1363 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
1364 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
1365 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
1366 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
1367 
1368 DECLOP_MAKE_ONE_COMPONENT(float, float1);
1369 DECLOP_MAKE_TWO_COMPONENT(float, float2);
1370 DECLOP_MAKE_THREE_COMPONENT(float, float3);
1371 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
1372 
1373 DECLOP_MAKE_ONE_COMPONENT(double, double1);
1374 DECLOP_MAKE_TWO_COMPONENT(double, double2);
1375 DECLOP_MAKE_THREE_COMPONENT(double, double3);
1376 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
1377 
1378 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
1379 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
1380 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
1381 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
1382 
1383 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
1384 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
1385 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
1386 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
1387 
1388 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
1389 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
1390 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
1391 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
1392 
1393 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
1394 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
1395 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
1396 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
1397 #else // !defined(__has_attribute)
1398 
1399 #if defined(_MSC_VER)
1400 #include <mmintrin.h>
1401 #include <xmmintrin.h>
1402 #include <emmintrin.h>
1403 #include <immintrin.h>
1404 
1405 typedef union { char data; } char1;
1406 typedef union { char data[2]; } char2;
1407 typedef union { char data[4]; } char4;
1408 typedef union { char4 data; } char3;
1409 typedef union { __m64 data; } char8;
1410 typedef union { __m128i data; } char16;
1411 
1412 typedef union { unsigned char data; } uchar1;
1413 typedef union { unsigned char data[2]; } uchar2;
1414 typedef union { unsigned char data[4]; } uchar4;
1415 typedef union { uchar4 data; } uchar3;
1416 typedef union { __m64 data; } uchar8;
1417 typedef union { __m128i data; } uchar16;
1418 
1419 typedef union { short data; } short1;
1420 typedef union { short data[2]; } short2;
1421 typedef union { __m64 data; } short4;
1422 typedef union { short4 data; } short3;
1423 typedef union { __m128i data; } short8;
1424 typedef union { __m128i data[2]; } short16;
1425 
1426 typedef union { unsigned short data; } ushort1;
1427 typedef union { unsigned short data[2]; } ushort2;
1428 typedef union { __m64 data; } ushort4;
1429 typedef union { ushort4 data; } ushort3;
1430 typedef union { __m128i data; } ushort8;
1431 typedef union { __m128i data[2]; } ushort16;
1432 
1433 typedef union { int data; } int1;
1434 typedef union { __m64 data; } int2;
1435 typedef union { __m128i data; } int4;
1436 typedef union { int4 data; } int3;
1437 typedef union { __m128i data[2]; } int8;
1438 typedef union { __m128i data[4];} int16;
1439 
1440 typedef union { unsigned int data; } uint1;
1441 typedef union { __m64 data; } uint2;
1442 typedef union { __m128i data; } uint4;
1443 typedef union { uint4 data; } uint3;
1444 typedef union { __m128i data[2]; } uint8;
1445 typedef union { __m128i data[4]; } uint16;
1446 
1447 #if !defined(_WIN64)
1448 typedef union { int data; } long1;
1449 typedef union { __m64 data; } long2;
1450 typedef union { __m128i data; } long4;
1451 typedef union { long4 data; } long3;
1452 typedef union { __m128i data[2]; } long8;
1453 typedef union { __m128i data[4]; } long16;
1454 
1455 typedef union { unsigned int data; } ulong1;
1456 typedef union { __m64 data; } ulong2;
1457 typedef union { __m128i data; } ulong4;
1458 typedef union { ulong4 data; } ulong3;
1459 typedef union { __m128i data[2]; } ulong8;
1460 typedef union { __m128i data[4]; } ulong16;
1461 #else // defined(_WIN64)
1462 typedef union { __m64 data; } long1;
1463 typedef union { __m128i data; } long2;
1464 typedef union { __m128i data[2]; } long4;
1465 typedef union { long4 data; } long3;
1466 typedef union { __m128i data[4]; } long8;
1467 typedef union { __m128i data[8]; } long16;
1468 
1469 typedef union { __m64 data; } ulong1;
1470 typedef union { __m128i data; } ulong2;
1471 typedef union { __m128i data[2]; } ulong4;
1472 typedef union { ulong4 data; } ulong3;
1473 typedef union { __m128i data[4]; } ulong8;
1474 typedef union { __m128i data[8]; } ulong16;
1475 #endif // defined(_WIN64)
1476 
1477 typedef union { __m64 data; } longlong1;
1478 typedef union { __m128i data; } longlong2;
1479 typedef union { __m128i data[2]; } longlong4;
1480 typedef union { longlong4 data; } longlong3;
1481 typedef union { __m128i data[4]; } longlong8;
1482 typedef union { __m128i data[8]; } longlong16;
1483 
1484 typedef union { __m64 data; } ulonglong1;
1485 typedef union { __m128i data; } ulonglong2;
1486 typedef union { __m128i data[2]; } ulonglong4;
1487 typedef union { ulonglong4 data; } ulonglong3;
1488 typedef union { __m128i data[4]; } ulonglong8;
1489 typedef union { __m128i data[8]; } ulonglong16;
1490 
1491 typedef union { float data; } float1;
1492 typedef union { __m64 data; } float2;
1493 typedef union { __m128 data; } float4;
1494 typedef union { float4 data; } float3;
1495 typedef union { __m256 data; } float8;
1496 typedef union { __m256 data[2]; } float16;
1497 
1498 typedef union { double data; } double1;
1499 typedef union { __m128d data; } double2;
1500 typedef union { __m256d data; } double4;
1501 typedef union { double4 data; } double3;
1502 typedef union { __m256d data[2]; } double8;
1503 typedef union { __m256d data[4]; } double16;
1504 
1505 #else // !defined(_MSC_VER)
1506 
1507 typedef union { char data; } char1;
1508 typedef union { char data[2]; } char2;
1509 typedef union { char data[4]; } char4;
1510 typedef union { char data[8]; } char8;
1511 typedef union { char data[16]; } char16;
1512 typedef union { char4 data; } char3;
1513 
1514 typedef union { unsigned char data; } uchar1;
1515 typedef union { unsigned char data[2]; } uchar2;
1516 typedef union { unsigned char data[4]; } uchar4;
1517 typedef union { unsigned char data[8]; } uchar8;
1518 typedef union { unsigned char data[16]; } uchar16;
1519 typedef union { uchar4 data; } uchar3;
1520 
1521 typedef union { short data; } short1;
1522 typedef union { short data[2]; } short2;
1523 typedef union { short data[4]; } short4;
1524 typedef union { short data[8]; } short8;
1525 typedef union { short data[16]; } short16;
1526 typedef union { short4 data; } short3;
1527 
1528 typedef union { unsigned short data; } ushort1;
1529 typedef union { unsigned short data[2]; } ushort2;
1530 typedef union { unsigned short data[4]; } ushort4;
1531 typedef union { unsigned short data[8]; } ushort8;
1532 typedef union { unsigned short data[16]; } ushort16;
1533 typedef union { ushort4 data; } ushort3;
1534 
1535 typedef union { int data; } int1;
1536 typedef union { int data[2]; } int2;
1537 typedef union { int data[4]; } int4;
1538 typedef union { int data[8]; } int8;
1539 typedef union { int data[16]; } int16;
1540 typedef union { int4 data; } int3;
1541 
1542 typedef union { unsigned int data; } uint1;
1543 typedef union { unsigned int data[2]; } uint2;
1544 typedef union { unsigned int data[4]; } uint4;
1545 typedef union { unsigned int data[8]; } uint8;
1546 typedef union { unsigned int data[16]; } uint16;
1547 typedef union { uint4 data; } uint3;
1548 
1549 typedef union { long data; } long1;
1550 typedef union { long data[2]; } long2;
1551 typedef union { long data[4]; } long4;
1552 typedef union { long data[8]; } long8;
1553 typedef union { long data[16]; } long16;
1554 typedef union { long4 data; } long3;
1555 
1556 typedef union { unsigned long data; } ulong1;
1557 typedef union { unsigned long data[2]; } ulong2;
1558 typedef union { unsigned long data[4]; } ulong4;
1559 typedef union { unsigned long data[8]; } ulong8;
1560 typedef union { unsigned long data[16]; } ulong16;
1561 typedef union { ulong4 data; } ulong3;
1562 
1563 typedef union { long long data; } longlong1;
1564 typedef union { long long data[2]; } longlong2;
1565 typedef union { long long data[4]; } longlong4;
1566 typedef union { long long data[8]; } longlong8;
1567 typedef union { long long data[16]; } longlong16;
1568 typedef union { longlong4 data; } longlong3;
1569 
1570 typedef union { unsigned long long data; } ulonglong1;
1571 typedef union { unsigned long long data[2]; } ulonglong2;
1572 typedef union { unsigned long long data[4]; } ulonglong4;
1573 typedef union { unsigned long long data[8]; } ulonglong8;
1574 typedef union { unsigned long long data[16]; } ulonglong16;
1575 typedef union { ulonglong4 data; } ulonglong3;
1576 
1577 typedef union { float data; } float1;
1578 typedef union { float data[2]; } float2;
1579 typedef union { float data[4]; } float4;
1580 typedef union { float data[8]; } float8;
1581 typedef union { float data[16]; } float16;
1582 typedef union { float4 data; } float3;
1583 
1584 typedef union { double data; } double1;
1585 typedef union { double data[2]; } double2;
1586 typedef union { double data[4]; } double4;
1587 typedef union { double data[8]; } double8;
1588 typedef union { double data[16]; } double16;
1589 typedef union { double4 data; } double3;
1590 
1591 #endif // defined(_MSC_VER)
1592 #endif // defined(__has_attribute)
1593 #endif
uint2
Definition: hip_vector_types.h:1543
longlong2
Definition: hip_vector_types.h:1564
int4
Definition: hip_vector_types.h:1537
double8
Definition: hip_vector_types.h:1587
long2
Definition: hip_vector_types.h:1550
ulong16
Definition: hip_vector_types.h:1560
uchar2
Definition: hip_vector_types.h:1515
ulong4
Definition: hip_vector_types.h:1558
char4
Definition: hip_vector_types.h:1509
ushort1
Definition: hip_vector_types.h:1528
char2
Definition: hip_vector_types.h:1508
int8
Definition: hip_vector_types.h:1538
ulong1
Definition: hip_vector_types.h:1556
double2
Definition: hip_vector_types.h:1585
uint3
Definition: hip_vector_types.h:1547
long3
Definition: hip_vector_types.h:1554
ulong8
Definition: hip_vector_types.h:1559
long16
Definition: hip_vector_types.h:1553
uint1
Definition: hip_vector_types.h:1542
int3
Definition: hip_vector_types.h:1540
long1
Definition: hip_vector_types.h:1549
uint16
Definition: hip_vector_types.h:1546
float4
Definition: hip_vector_types.h:1579
double16
Definition: hip_vector_types.h:1588
char3
Definition: hip_vector_types.h:1512
short3
Definition: hip_vector_types.h:1526
char1
Definition: hip_vector_types.h:1507
longlong3
Definition: hip_vector_types.h:1568
ulong2
Definition: hip_vector_types.h:1557
ulonglong8
Definition: hip_vector_types.h:1573
__host__
#define __host__
Definition: host_defines.h:41
float8
Definition: hip_vector_types.h:1580
int2
Definition: hip_vector_types.h:1536
host_defines.h
TODO-doc.
float3
Definition: hip_vector_types.h:1582
ulonglong16
Definition: hip_vector_types.h:1574
uchar3
Definition: hip_vector_types.h:1519
ulonglong4
Definition: hip_vector_types.h:1572
float2
Definition: hip_vector_types.h:1578
ushort16
Definition: hip_vector_types.h:1532
short4
Definition: hip_vector_types.h:1523
longlong4
Definition: hip_vector_types.h:1565
uchar16
Definition: hip_vector_types.h:1518
ushort8
Definition: hip_vector_types.h:1531
short1
Definition: hip_vector_types.h:1521
int1
Definition: hip_vector_types.h:1535
double3
Definition: hip_vector_types.h:1589
char16
Definition: hip_vector_types.h:1511
ulonglong2
Definition: hip_vector_types.h:1571
short8
Definition: hip_vector_types.h:1524
longlong8
Definition: hip_vector_types.h:1566
uchar4
Definition: hip_vector_types.h:1516
ulonglong3
Definition: hip_vector_types.h:1575
ushort4
Definition: hip_vector_types.h:1530
float16
Definition: hip_vector_types.h:1581
float1
Definition: hip_vector_types.h:1577
short16
Definition: hip_vector_types.h:1525
longlong1
Definition: hip_vector_types.h:1563
uchar1
Definition: hip_vector_types.h:1514
uint8
Definition: hip_vector_types.h:1545
short2
Definition: hip_vector_types.h:1522
long8
Definition: hip_vector_types.h:1552
ulong3
Definition: hip_vector_types.h:1561
uchar8
Definition: hip_vector_types.h:1517
double4
Definition: hip_vector_types.h:1586
longlong16
Definition: hip_vector_types.h:1567
ulonglong1
Definition: hip_vector_types.h:1570
ushort2
Definition: hip_vector_types.h:1529
double1
Definition: hip_vector_types.h:1584
ushort3
Definition: hip_vector_types.h:1533
char8
Definition: hip_vector_types.h:1510
uint4
Definition: hip_vector_types.h:1544
int16
Definition: hip_vector_types.h:1539
long4
Definition: hip_vector_types.h:1551