HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
30 
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
33 #endif
34 
36 
37 #if !defined(_MSC_VER)
38  #if __has_attribute(ext_vector_type)
39  #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n)))
40  #else
41  #define __NATIVE_VECTOR__(n, ...) [n]
42 #endif
43 
44 #if defined(__cplusplus)
45  #include <array>
46  #include <iosfwd>
47  #include <type_traits>
48 
49  namespace hip_impl {
50  template<typename, typename, unsigned int> struct Scalar_accessor;
51  } // Namespace hip_impl.
52 
53  namespace std {
54  template<typename T, typename U, unsigned int n>
55  struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
56  : is_integral<T> {};
57  template<typename T, typename U, unsigned int n>
58  struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
59  : is_floating_point<T> {};
60  } // Namespace std.
61 
62  namespace hip_impl {
63  template<typename T, typename Vector, unsigned int idx>
64  struct Scalar_accessor {
65  struct Address {
66  const Scalar_accessor* p;
67 
68  __host__ __device__
69  operator const T*() const noexcept {
70  return &reinterpret_cast<const T*>(p)[idx];
71  }
72  __host__ __device__
73  operator const T*() const volatile noexcept {
74  return &reinterpret_cast<const T*>(p)[idx];
75  }
76  __host__ __device__
77  operator T*() noexcept {
78  return &reinterpret_cast<T*>(
79  const_cast<Scalar_accessor*>(p))[idx];
80  }
81  __host__ __device__
82  operator T*() volatile noexcept {
83  return &reinterpret_cast<T*>(
84  const_cast<Scalar_accessor*>(p))[idx];
85  }
86  };
87 
88  friend
89  inline
90  std::ostream& operator<<(std::ostream& os,
91  const Scalar_accessor& x) noexcept {
92  return os << x.data[idx];
93  }
94  friend
95  inline
96  std::istream& operator>>(std::istream& is,
97  Scalar_accessor& x) noexcept {
98  T tmp;
99  is >> tmp;
100  x.data[idx] = tmp;
101 
102  return is;
103  }
104 
105  // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
106  Vector data;
107 
108  __host__ __device__
109  operator T() const noexcept { return data[idx]; }
110  __host__ __device__
111  operator T() const volatile noexcept { return data[idx]; }
112 
113  // The conversions to enum are fairly ghastly, but unfortunately used in
114  // some pre-existing, difficult to modify, code.
115  template<
116  typename U,
117  typename std::enable_if<
118  !std::is_same<U, T>{} &&
119  std::is_enum<U>{} &&
120  std::is_convertible<
121  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
122  __host__ __device__
123  operator U() const noexcept { return static_cast<U>(data[idx]); }
124  template<
125  typename U,
126  typename std::enable_if<
127  !std::is_same<U, T>{} &&
128  std::is_enum<U>{} &&
129  std::is_convertible<
130  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
131  __host__ __device__
132  operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
133 
134  __host__ __device__
135  operator T&() noexcept {
136  return reinterpret_cast<
137  T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
138  }
139  __host__ __device__
140  operator volatile T&() volatile noexcept {
141  return reinterpret_cast<
142  volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
143  }
144 
145  __host__ __device__
146  Address operator&() const noexcept { return Address{this}; }
147 
148  __host__ __device__
149  Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
150  data[idx] = x.data[idx];
151 
152  return *this;
153  }
154  __host__ __device__
155  Scalar_accessor& operator=(T x) noexcept {
156  data[idx] = x;
157 
158  return *this;
159  }
160  __host__ __device__
161  volatile Scalar_accessor& operator=(T x) volatile noexcept {
162  data[idx] = x;
163 
164  return *this;
165  }
166 
167  __host__ __device__
168  Scalar_accessor& operator++() noexcept {
169  ++data[idx];
170  return *this;
171  }
172  __host__ __device__
173  T operator++(int) noexcept {
174  auto r{data[idx]};
175  ++data[idx];
176  return *this;
177  }
178  __host__ __device__
179  Scalar_accessor& operator--() noexcept {
180  --data[idx];
181  return *this;
182  }
183  __host__ __device__
184  T operator--(int) noexcept {
185  auto r{data[idx]};
186  --data[idx];
187  return *this;
188  }
189 
190  // TODO: convertibility is too restrictive, constraint should be on
191  // the operator being invocable with a value of type U.
192  template<
193  typename U,
194  typename std::enable_if<
195  std::is_convertible<U, T>{}>::type* = nullptr>
196  __host__ __device__
197  Scalar_accessor& operator+=(U x) noexcept {
198  data[idx] += x;
199  return *this;
200  }
201  template<
202  typename U,
203  typename std::enable_if<
204  std::is_convertible<U, T>{}>::type* = nullptr>
205  __host__ __device__
206  Scalar_accessor& operator-=(U x) noexcept {
207  data[idx] -= x;
208  return *this;
209  }
210 
211  template<
212  typename U,
213  typename std::enable_if<
214  std::is_convertible<U, T>{}>::type* = nullptr>
215  __host__ __device__
216  Scalar_accessor& operator*=(U x) noexcept {
217  data[idx] *= x;
218  return *this;
219  }
220  template<
221  typename U,
222  typename std::enable_if<
223  std::is_convertible<U, T>{}>::type* = nullptr>
224  __host__ __device__
225  Scalar_accessor& operator/=(U x) noexcept {
226  data[idx] /= x;
227  return *this;
228  }
229  template<
230  typename U = T,
231  typename std::enable_if<std::is_convertible<U, T>{} &&
232  std::is_integral<U>{}>::type* = nullptr>
233  __host__ __device__
234  Scalar_accessor& operator%=(U x) noexcept {
235  data[idx] %= x;
236  return *this;
237  }
238 
239  template<
240  typename U = T,
241  typename std::enable_if<std::is_convertible<U, T>{} &&
242  std::is_integral<U>{}>::type* = nullptr>
243  __host__ __device__
244  Scalar_accessor& operator>>=(U x) noexcept {
245  data[idx] >>= x;
246  return *this;
247  }
248  template<
249  typename U = T,
250  typename std::enable_if<std::is_convertible<U, T>{} &&
251  std::is_integral<U>{}>::type* = nullptr>
252  __host__ __device__
253  Scalar_accessor& operator<<=(U x) noexcept {
254  data[idx] <<= x;
255  return *this;
256  }
257  template<
258  typename U = T,
259  typename std::enable_if<std::is_convertible<U, T>{} &&
260  std::is_integral<U>{}>::type* = nullptr>
261  __host__ __device__
262  Scalar_accessor& operator&=(U x) noexcept {
263  data[idx] &= x;
264  return *this;
265  }
266  template<
267  typename U = T,
268  typename std::enable_if<std::is_convertible<U, T>{} &&
269  std::is_integral<U>{}>::type* = nullptr>
270  __host__ __device__
271  Scalar_accessor& operator|=(U x) noexcept {
272  data[idx] |= x;
273  return *this;
274  }
275  template<
276  typename U = T,
277  typename std::enable_if<std::is_convertible<U, T>{} &&
278  std::is_integral<U>{}>::type* = nullptr>
279  __host__ __device__
280  Scalar_accessor& operator^=(U x) noexcept {
281  data[idx] ^= x;
282  return *this;
283  }
284  };
285  } // Namespace hip_impl.
286 
287  template<typename T, unsigned int n> struct HIP_vector_base;
288 
289  template<typename T>
290  struct HIP_vector_base<T, 1> {
291  using Native_vec_ = T __NATIVE_VECTOR__(1, T);
292 
293  union {
294  Native_vec_ data;
295  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
296  };
297 
298  using value_type = T;
299 
300  __host__ __device__
301  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
302  #if __has_attribute(ext_vector_type)
303  data = x.data;
304  #else
305  data[0] = x.data[0];
306  #endif
307 
308  return *this;
309  }
310  };
311 
312  template<typename T>
313  struct HIP_vector_base<T, 2> {
314  using Native_vec_ = T __NATIVE_VECTOR__(2, T);
315 
316  union {
317  Native_vec_ data;
318  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
319  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
320  };
321 
322  using value_type = T;
323 
324  __host__ __device__
325  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
326  #if __has_attribute(ext_vector_type)
327  data = x.data;
328  #else
329  data[0] = x.data[0];
330  data[1] = x.data[1];
331  #endif
332 
333  return *this;
334  }
335  };
336 
337  template<typename T>
338  struct HIP_vector_base<T, 3> {
339  struct Native_vec_ {
340  T d[3];
341 
342  __host__ __device__
343  constexpr
344  Native_vec_() = default;
345  __host__ __device__
346  explicit
347  constexpr
348  Native_vec_(T x) noexcept : d{x, x, x} {}
349  __host__ __device__
350  constexpr
351  Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
352  __host__ __device__
353  constexpr
354  Native_vec_(const Native_vec_&) = default;
355  __host__ __device__
356  constexpr
357  Native_vec_(Native_vec_&&) = default;
358  __host__ __device__
359  ~Native_vec_() = default;
360 
361  __host__ __device__
362  Native_vec_& operator=(const Native_vec_&) = default;
363  __host__ __device__
364  Native_vec_& operator=(Native_vec_&&) = default;
365 
366  __host__ __device__
367  T& operator[](unsigned int idx) noexcept { return d[idx]; }
368  __host__ __device__
369  T operator[](unsigned int idx) const noexcept { return d[idx]; }
370 
371  __host__ __device__
372  Native_vec_& operator+=(const Native_vec_& x) noexcept
373  {
374  for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
375  return *this;
376  }
377  __host__ __device__
378  Native_vec_& operator-=(const Native_vec_& x) noexcept
379  {
380  for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
381  return *this;
382  }
383 
384  __host__ __device__
385  Native_vec_& operator*=(const Native_vec_& x) noexcept
386  {
387  for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
388  return *this;
389  }
390  __host__ __device__
391  Native_vec_& operator/=(const Native_vec_& x) noexcept
392  {
393  for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
394  return *this;
395  }
396 
397  template<
398  typename U = T,
399  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
400  __host__ __device__
401  Native_vec_ operator-() const noexcept
402  {
403  auto r{*this};
404  for (auto&& x : r.d) x = -x;
405  return r;
406  }
407 
408  template<
409  typename U = T,
410  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
411  __host__ __device__
412  Native_vec_ operator~() const noexcept
413  {
414  auto r{*this};
415  for (auto&& x : r.d) x = ~x;
416  return r;
417  }
418  template<
419  typename U = T,
420  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
421  __host__ __device__
422  Native_vec_& operator%=(const Native_vec_& x) noexcept
423  {
424  for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
425  return *this;
426  }
427  template<
428  typename U = T,
429  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
430  __host__ __device__
431  Native_vec_& operator^=(const Native_vec_& x) noexcept
432  {
433  for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
434  return *this;
435  }
436  template<
437  typename U = T,
438  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
439  __host__ __device__
440  Native_vec_& operator|=(const Native_vec_& x) noexcept
441  {
442  for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
443  return *this;
444  }
445  template<
446  typename U = T,
447  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
448  __host__ __device__
449  Native_vec_& operator&=(const Native_vec_& x) noexcept
450  {
451  for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
452  return *this;
453  }
454  template<
455  typename U = T,
456  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
457  __host__ __device__
458  Native_vec_& operator>>=(const Native_vec_& x) noexcept
459  {
460  for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
461  return *this;
462  }
463  template<
464  typename U = T,
465  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
466  __host__ __device__
467  Native_vec_& operator<<=(const Native_vec_& x) noexcept
468  {
469  for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
470  return *this;
471  }
472 
473  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
474  __host__ __device__
475  Vec3_cmp operator==(const Native_vec_& x) const noexcept
476  {
477  return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
478  }
479  };
480 
481  union {
482  Native_vec_ data;
483  struct {
484  T x;
485  T y;
486  T z;
487  };
488  };
489 
490  using value_type = T;
491  };
492 
493  template<typename T>
494  struct HIP_vector_base<T, 4> {
495  using Native_vec_ = T __NATIVE_VECTOR__(4, T);
496 
497  union {
498  Native_vec_ data;
499  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
500  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
501  hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
502  hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
503  };
504 
505  using value_type = T;
506 
507  __host__ __device__
508  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
509  #if __has_attribute(ext_vector_type)
510  data = x.data;
511  #else
512  data[0] = x.data[0];
513  data[1] = x.data[1];
514  data[2] = x.data[2];
515  data[3] = x.data[3];
516  #endif
517 
518  return *this;
519  }
520  };
521 
522  template<typename T, unsigned int rank>
523  struct HIP_vector_type : public HIP_vector_base<T, rank> {
524  using HIP_vector_base<T, rank>::data;
525  using typename HIP_vector_base<T, rank>::Native_vec_;
526 
527  inline __host__ __device__
528  HIP_vector_type() = default;
529  template<
530  typename U,
531  typename std::enable_if<
532  std::is_convertible<U, T>{}>::type* = nullptr>
533  explicit inline __host__ __device__
534  HIP_vector_type(U x) noexcept
535  {
536  for (auto i = 0u; i != rank; ++i) data[i] = x;
537  }
538  template< // TODO: constrain based on type as well.
539  typename... Us,
540  typename std::enable_if<
541  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
542  inline __host__ __device__
543  HIP_vector_type(Us... xs) noexcept
544  {
545  #if __has_attribute(ext_vector_type)
546  new (&data) Native_vec_{static_cast<T>(xs)...};
547  #else
548  new (&data) std::array<T, rank>{static_cast<T>(xs)...};
549  #endif
550  }
551  inline __host__ __device__
552  HIP_vector_type(const HIP_vector_type&) = default;
553  inline __host__ __device__
554  HIP_vector_type(HIP_vector_type&&) = default;
555  inline __host__ __device__
556  ~HIP_vector_type() = default;
557 
558  inline __host__ __device__
559  HIP_vector_type& operator=(const HIP_vector_type&) = default;
560  inline __host__ __device__
561  HIP_vector_type& operator=(HIP_vector_type&&) = default;
562 
563  // Operators
564  inline __host__ __device__
565  HIP_vector_type& operator++() noexcept
566  {
567  return *this += HIP_vector_type{1};
568  }
569  inline __host__ __device__
570  HIP_vector_type operator++(int) noexcept
571  {
572  auto tmp(*this);
573  ++*this;
574  return tmp;
575  }
576 
577  inline __host__ __device__
578  HIP_vector_type& operator--() noexcept
579  {
580  return *this -= HIP_vector_type{1};
581  }
582  inline __host__ __device__
583  HIP_vector_type operator--(int) noexcept
584  {
585  auto tmp(*this);
586  --*this;
587  return tmp;
588  }
589 
590  inline __host__ __device__
591  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
592  {
593  data += x.data;
594  return *this;
595  }
596  template<
597  typename U,
598  typename std::enable_if<
599  std::is_convertible<U, T>{}>::type* = nullptr>
600  inline __host__ __device__
601  HIP_vector_type& operator+=(U x) noexcept
602  {
603  return *this += HIP_vector_type{x};
604  }
605 
606  inline __host__ __device__
607  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
608  {
609  data -= x.data;
610  return *this;
611  }
612  template<
613  typename U,
614  typename std::enable_if<
615  std::is_convertible<U, T>{}>::type* = nullptr>
616  inline __host__ __device__
617  HIP_vector_type& operator-=(U x) noexcept
618  {
619  return *this -= HIP_vector_type{x};
620  }
621 
622  inline __host__ __device__
623  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
624  {
625  data *= x.data;
626  return *this;
627  }
628  template<
629  typename U,
630  typename std::enable_if<
631  std::is_convertible<U, T>{}>::type* = nullptr>
632  inline __host__ __device__
633  HIP_vector_type& operator*=(U x) noexcept
634  {
635  return *this *= HIP_vector_type{x};
636  }
637 
638  inline __host__ __device__
639  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
640  {
641  data /= x.data;
642  return *this;
643  }
644  template<
645  typename U,
646  typename std::enable_if<
647  std::is_convertible<U, T>{}>::type* = nullptr>
648  inline __host__ __device__
649  HIP_vector_type& operator/=(U x) noexcept
650  {
651  return *this /= HIP_vector_type{x};
652  }
653 
654  template<
655  typename U = T,
656  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
657  inline __host__ __device__
658  HIP_vector_type operator-() noexcept
659  {
660  auto tmp(*this);
661  tmp.data = -tmp.data;
662  return tmp;
663  }
664 
665  template<
666  typename U = T,
667  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
668  inline __host__ __device__
669  HIP_vector_type operator~() noexcept
670  {
671  HIP_vector_type r{*this};
672  r.data = ~r.data;
673  return r;
674  }
675 
676  template<
677  typename U = T,
678  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
679  inline __host__ __device__
680  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
681  {
682  data %= x.data;
683  return *this;
684  }
685 
686  template<
687  typename U = T,
688  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
689  inline __host__ __device__
690  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
691  {
692  data ^= x.data;
693  return *this;
694  }
695 
696  template<
697  typename U = T,
698  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
699  inline __host__ __device__
700  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
701  {
702  data |= x.data;
703  return *this;
704  }
705 
706  template<
707  typename U = T,
708  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
709  inline __host__ __device__
710  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
711  {
712  data &= x.data;
713  return *this;
714  }
715 
716  template<
717  typename U = T,
718  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
719  inline __host__ __device__
720  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
721  {
722  data >>= x.data;
723  return *this;
724  }
725 
726  template<
727  typename U = T,
728  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
729  inline __host__ __device__
730  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
731  {
732  data <<= x.data;
733  return *this;
734  }
735  };
736 
737  template<typename T, unsigned int n>
738  inline __host__ __device__
739  HIP_vector_type<T, n> operator+(
740  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
741  {
742  return HIP_vector_type<T, n>{x} += y;
743  }
744  template<typename T, unsigned int n, typename U>
745  inline __host__ __device__
746  HIP_vector_type<T, n> operator+(
747  const HIP_vector_type<T, n>& x, U y) noexcept
748  {
749  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
750  }
751  template<typename T, unsigned int n, typename U>
752  inline __host__ __device__
753  HIP_vector_type<T, n> operator+(
754  U x, const HIP_vector_type<T, n>& y) noexcept
755  {
756  return HIP_vector_type<T, n>{x} += y;
757  }
758 
759  template<typename T, unsigned int n>
760  inline __host__ __device__
761  HIP_vector_type<T, n> operator-(
762  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
763  {
764  return HIP_vector_type<T, n>{x} -= y;
765  }
766  template<typename T, unsigned int n, typename U>
767  inline __host__ __device__
768  HIP_vector_type<T, n> operator-(
769  const HIP_vector_type<T, n>& x, U y) noexcept
770  {
771  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
772  }
773  template<typename T, unsigned int n, typename U>
774  inline __host__ __device__
775  HIP_vector_type<T, n> operator-(
776  U x, const HIP_vector_type<T, n>& y) noexcept
777  {
778  return HIP_vector_type<T, n>{x} -= y;
779  }
780 
781  template<typename T, unsigned int n>
782  inline __host__ __device__
783  HIP_vector_type<T, n> operator*(
784  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
785  {
786  return HIP_vector_type<T, n>{x} *= y;
787  }
788  template<typename T, unsigned int n, typename U>
789  inline __host__ __device__
790  HIP_vector_type<T, n> operator*(
791  const HIP_vector_type<T, n>& x, U y) noexcept
792  {
793  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
794  }
795  template<typename T, unsigned int n, typename U>
796  inline __host__ __device__
797  HIP_vector_type<T, n> operator*(
798  U x, const HIP_vector_type<T, n>& y) noexcept
799  {
800  return HIP_vector_type<T, n>{x} *= y;
801  }
802 
803  template<typename T, unsigned int n>
804  inline __host__ __device__
805  HIP_vector_type<T, n> operator/(
806  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
807  {
808  return HIP_vector_type<T, n>{x} /= y;
809  }
810  template<typename T, unsigned int n, typename U>
811  inline __host__ __device__
812  HIP_vector_type<T, n> operator/(
813  const HIP_vector_type<T, n>& x, U y) noexcept
814  {
815  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
816  }
817  template<typename T, unsigned int n, typename U>
818  inline __host__ __device__
819  HIP_vector_type<T, n> operator/(
820  U x, const HIP_vector_type<T, n>& y) noexcept
821  {
822  return HIP_vector_type<T, n>{x} /= y;
823  }
824 
825  template<typename T, unsigned int n>
826  inline __host__ __device__
827  bool operator==(
828  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
829  {
830  auto tmp = x.data == y.data;
831  for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
832  return true;
833  }
834  template<typename T, unsigned int n, typename U>
835  inline __host__ __device__
836  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
837  {
838  return x == HIP_vector_type<T, n>{y};
839  }
840  template<typename T, unsigned int n, typename U>
841  inline __host__ __device__
842  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
843  {
844  return HIP_vector_type<T, n>{x} == y;
845  }
846 
847  template<typename T, unsigned int n>
848  inline __host__ __device__
849  bool operator!=(
850  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
851  {
852  return !(x == y);
853  }
854  template<typename T, unsigned int n, typename U>
855  inline __host__ __device__
856  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
857  {
858  return !(x == y);
859  }
860  template<typename T, unsigned int n, typename U>
861  inline __host__ __device__
862  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
863  {
864  return !(x == y);
865  }
866 
867  template<
868  typename T,
869  unsigned int n,
870  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
871  inline __host__ __device__
872  HIP_vector_type<T, n> operator%(
873  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
874  {
875  return HIP_vector_type<T, n>{x} %= y;
876  }
877  template<
878  typename T,
879  unsigned int n,
880  typename U,
881  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
882  inline __host__ __device__
883  HIP_vector_type<T, n> operator%(
884  const HIP_vector_type<T, n>& x, U y) noexcept
885  {
886  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
887  }
888  template<
889  typename T,
890  unsigned int n,
891  typename U,
892  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
893  inline __host__ __device__
894  HIP_vector_type<T, n> operator%(
895  U x, const HIP_vector_type<T, n>& y) noexcept
896  {
897  return HIP_vector_type<T, n>{x} %= y;
898  }
899 
900  template<
901  typename T,
902  unsigned int n,
903  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
904  inline __host__ __device__
905  HIP_vector_type<T, n> operator^(
906  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
907  {
908  return HIP_vector_type<T, n>{x} ^= y;
909  }
910  template<
911  typename T,
912  unsigned int n,
913  typename U,
914  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
915  inline __host__ __device__
916  HIP_vector_type<T, n> operator^(
917  const HIP_vector_type<T, n>& x, U y) noexcept
918  {
919  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
920  }
921  template<
922  typename T,
923  unsigned int n,
924  typename U,
925  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
926  inline __host__ __device__
927  HIP_vector_type<T, n> operator^(
928  U x, const HIP_vector_type<T, n>& y) noexcept
929  {
930  return HIP_vector_type<T, n>{x} ^= y;
931  }
932 
933  template<
934  typename T,
935  unsigned int n,
936  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
937  inline __host__ __device__
938  HIP_vector_type<T, n> operator|(
939  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
940  {
941  return HIP_vector_type<T, n>{x} |= y;
942  }
943  template<
944  typename T,
945  unsigned int n,
946  typename U,
947  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
948  inline __host__ __device__
949  HIP_vector_type<T, n> operator|(
950  const HIP_vector_type<T, n>& x, U y) noexcept
951  {
952  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
953  }
954  template<
955  typename T,
956  unsigned int n,
957  typename U,
958  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
959  inline __host__ __device__
960  HIP_vector_type<T, n> operator|(
961  U x, const HIP_vector_type<T, n>& y) noexcept
962  {
963  return HIP_vector_type<T, n>{x} |= y;
964  }
965 
966  template<
967  typename T,
968  unsigned int n,
969  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
970  inline __host__ __device__
971  HIP_vector_type<T, n> operator&(
972  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
973  {
974  return HIP_vector_type<T, n>{x} &= y;
975  }
976  template<
977  typename T,
978  unsigned int n,
979  typename U,
980  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
981  inline __host__ __device__
982  HIP_vector_type<T, n> operator&(
983  const HIP_vector_type<T, n>& x, U y) noexcept
984  {
985  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
986  }
987  template<
988  typename T,
989  unsigned int n,
990  typename U,
991  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
992  inline __host__ __device__
993  HIP_vector_type<T, n> operator&(
994  U x, const HIP_vector_type<T, n>& y) noexcept
995  {
996  return HIP_vector_type<T, n>{x} &= y;
997  }
998 
999  template<
1000  typename T,
1001  unsigned int n,
1002  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1003  inline __host__ __device__
1004  HIP_vector_type<T, n> operator>>(
1005  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1006  {
1007  return HIP_vector_type<T, n>{x} >>= y;
1008  }
1009  template<
1010  typename T,
1011  unsigned int n,
1012  typename U,
1013  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1014  inline __host__ __device__
1015  HIP_vector_type<T, n> operator>>(
1016  const HIP_vector_type<T, n>& x, U y) noexcept
1017  {
1018  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1019  }
1020  template<
1021  typename T,
1022  unsigned int n,
1023  typename U,
1024  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1025  inline __host__ __device__
1026  HIP_vector_type<T, n> operator>>(
1027  U x, const HIP_vector_type<T, n>& y) noexcept
1028  {
1029  return HIP_vector_type<T, n>{x} >>= y;
1030  }
1031 
1032  template<
1033  typename T,
1034  unsigned int n,
1035  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1036  inline __host__ __device__
1037  HIP_vector_type<T, n> operator<<(
1038  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1039  {
1040  return HIP_vector_type<T, n>{x} <<= y;
1041  }
1042  template<
1043  typename T,
1044  unsigned int n,
1045  typename U,
1046  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1047  inline __host__ __device__
1048  HIP_vector_type<T, n> operator<<(
1049  const HIP_vector_type<T, n>& x, U y) noexcept
1050  {
1051  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1052  }
1053  template<
1054  typename T,
1055  unsigned int n,
1056  typename U,
1057  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1058  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1059  inline __host__ __device__
1060  HIP_vector_type<T, n> operator<<(
1061  U x, const HIP_vector_type<T, n>& y) noexcept
1062  {
1063  return HIP_vector_type<T, n>{x} <<= y;
1064  }
1065 
1066  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1067  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1068  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1069  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1070  using CUDA_name##4 = HIP_vector_type<T, 4>;
1071 #else
1072  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1073  typedef struct {\
1074  T x;\
1075  } CUDA_name##1;\
1076  typedef struct {\
1077  T x;\
1078  T y;\
1079  } CUDA_name##2;\
1080  typedef struct {\
1081  T x;\
1082  T y;\
1083  T z;\
1084  } CUDA_name##3;\
1085  typedef struct {\
1086  T x;\
1087  T y;\
1088  T z;\
1089  T w;\
1090  } CUDA_name##4;
1091 #endif
1092 
1093 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1094 __MAKE_VECTOR_TYPE__(char, char);
1095 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1096 __MAKE_VECTOR_TYPE__(short, short);
1097 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1098 __MAKE_VECTOR_TYPE__(int, int);
1099 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1100 __MAKE_VECTOR_TYPE__(long, long);
1101 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1102 __MAKE_VECTOR_TYPE__(longlong, long long);
1103 __MAKE_VECTOR_TYPE__(float, float);
1104 __MAKE_VECTOR_TYPE__(double, double);
1105 
1106 #ifdef __cplusplus
1107 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1108  static inline __device__ __host__ \
1109  type make_##type(comp x) { type r{x}; return r; }
1110 
1111 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1112  static inline __device__ __host__ \
1113  type make_##type(comp x, comp y) { type r{x, y}; return r; }
1114 
1115 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1116  static inline __device__ __host__ \
1117  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1118 
1119 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1120  static inline __device__ __host__ \
1121  type make_##type(comp x, comp y, comp z, comp w) { \
1122  type r{x, y, z, w}; \
1123  return r; \
1124  }
1125 #else
1126  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1127  static inline __device__ __host__ \
1128  type make_##type(comp x) { type r; r.x =x; return r; }
1129 
1130  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1131  static inline __device__ __host__ \
1132  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1133 
1134  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1135  static inline __device__ __host__ \
1136  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1137 
1138  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1139  static inline __device__ __host__ \
1140  type make_##type(comp x, comp y, comp z, comp w) { \
1141  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1142  return r; \
1143  }
1144 #endif
1145 
1146 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
1147 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
1148 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
1149 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
1150 
1151 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
1152 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
1153 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
1154 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
1155 
1156 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
1157 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
1158 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
1159 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
1160 
1161 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
1162 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
1163 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
1164 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
1165 
1166 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
1167 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
1168 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
1169 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
1170 
1171 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
1172 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
1173 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
1174 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
1175 
1176 DECLOP_MAKE_ONE_COMPONENT(float, float1);
1177 DECLOP_MAKE_TWO_COMPONENT(float, float2);
1178 DECLOP_MAKE_THREE_COMPONENT(float, float3);
1179 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
1180 
1181 DECLOP_MAKE_ONE_COMPONENT(double, double1);
1182 DECLOP_MAKE_TWO_COMPONENT(double, double2);
1183 DECLOP_MAKE_THREE_COMPONENT(double, double3);
1184 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
1185 
1186 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
1187 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
1188 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
1189 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
1190 
1191 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
1192 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
1193 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
1194 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
1195 
1196 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
1197 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
1198 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
1199 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
1200 
1201 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
1202 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
1203 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
1204 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
1205 #else // defined(_MSC_VER)
1206 #include <mmintrin.h>
1207 #include <xmmintrin.h>
1208 #include <emmintrin.h>
1209 #include <immintrin.h>
1210 
1211 typedef union { char data; } char1;
1212 typedef union { char data[2]; } char2;
1213 typedef union { char data[4]; } char4;
1214 typedef union { char4 data; } char3;
1215 typedef union { __m64 data; } char8;
1216 typedef union { __m128i data; } char16;
1217 
1218 typedef union { unsigned char data; } uchar1;
1219 typedef union { unsigned char data[2]; } uchar2;
1220 typedef union { unsigned char data[4]; } uchar4;
1221 typedef union { uchar4 data; } uchar3;
1222 typedef union { __m64 data; } uchar8;
1223 typedef union { __m128i data; } uchar16;
1224 
1225 typedef union { short data; } short1;
1226 typedef union { short data[2]; } short2;
1227 typedef union { __m64 data; } short4;
1228 typedef union { short4 data; } short3;
1229 typedef union { __m128i data; } short8;
1230 typedef union { __m128i data[2]; } short16;
1231 
1232 typedef union { unsigned short data; } ushort1;
1233 typedef union { unsigned short data[2]; } ushort2;
1234 typedef union { __m64 data; } ushort4;
1235 typedef union { ushort4 data; } ushort3;
1236 typedef union { __m128i data; } ushort8;
1237 typedef union { __m128i data[2]; } ushort16;
1238 
1239 typedef union { int data; } int1;
1240 typedef union { __m64 data; } int2;
1241 typedef union { __m128i data; } int4;
1242 typedef union { int4 data; } int3;
1243 typedef union { __m128i data[2]; } int8;
1244 typedef union { __m128i data[4];} int16;
1245 
1246 typedef union { unsigned int data; } uint1;
1247 typedef union { __m64 data; } uint2;
1248 typedef union { __m128i data; } uint4;
1249 typedef union { uint4 data; } uint3;
1250 typedef union { __m128i data[2]; } uint8;
1251 typedef union { __m128i data[4]; } uint16;
1252 
1253 #if !defined(_WIN64)
1254 typedef union { int data; } long1;
1255 typedef union { __m64 data; } long2;
1256 typedef union { __m128i data; } long4;
1257 typedef union { long4 data; } long3;
1258 typedef union { __m128i data[2]; } long8;
1259 typedef union { __m128i data[4]; } long16;
1260 
1261 typedef union { unsigned int data; } ulong1;
1262 typedef union { __m64 data; } ulong2;
1263 typedef union { __m128i data; } ulong4;
1264 typedef union { ulong4 data; } ulong3;
1265 typedef union { __m128i data[2]; } ulong8;
1266 typedef union { __m128i data[4]; } ulong16;
1267 #else // defined(_WIN64)
1268 typedef union { __m64 data; } long1;
1269 typedef union { __m128i data; } long2;
1270 typedef union { __m128i data[2]; } long4;
1271 typedef union { long4 data; } long3;
1272 typedef union { __m128i data[4]; } long8;
1273 typedef union { __m128i data[8]; } long16;
1274 
1275 typedef union { __m64 data; } ulong1;
1276 typedef union { __m128i data; } ulong2;
1277 typedef union { __m128i data[2]; } ulong4;
1278 typedef union { ulong4 data; } ulong3;
1279 typedef union { __m128i data[4]; } ulong8;
1280 typedef union { __m128i data[8]; } ulong16;
1281 #endif // defined(_WIN64)
1282 
1283 typedef union { __m64 data; } longlong1;
1284 typedef union { __m128i data; } longlong2;
1285 typedef union { __m128i data[2]; } longlong4;
1286 typedef union { longlong4 data; } longlong3;
1287 typedef union { __m128i data[4]; } longlong8;
1288 typedef union { __m128i data[8]; } longlong16;
1289 
1290 typedef union { __m64 data; } ulonglong1;
1291 typedef union { __m128i data; } ulonglong2;
1292 typedef union { __m128i data[2]; } ulonglong4;
1293 typedef union { ulonglong4 data; } ulonglong3;
1294 typedef union { __m128i data[4]; } ulonglong8;
1295 typedef union { __m128i data[8]; } ulonglong16;
1296 
1297 typedef union { float data; } float1;
1298 typedef union { __m64 data; } float2;
1299 typedef union { __m128 data; } float4;
1300 typedef union { float4 data; } float3;
1301 typedef union { __m256 data; } float8;
1302 typedef union { __m256 data[2]; } float16;
1303 
1304 typedef union { double data; } double1;
1305 typedef union { __m128d data; } double2;
1306 typedef union { __m256d data; } double4;
1307 typedef union { double4 data; } double3;
1308 typedef union { __m256d data[2]; } double8;
1309 typedef union { __m256d data[4]; } double16;
1310 
1311 #endif // defined(_MSC_VER)
1312 #endif
TODO-doc.
#define __host__
Definition: host_defines.h:41
Definition: hip_runtime.h:202