HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_bf16.h
Go to the documentation of this file.
1
85#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
86#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
87
88#include "amd_hip_vector_types.h" // float2 etc
89#include "device_library_decls.h" // ocml conversion functions
90#include "math_fwd.h" // ocml device functions
91
92#if defined(__HIPCC_RTC__)
93#define __HOST_DEVICE__ __device__
94#else
95#include <climits>
96#define __HOST_DEVICE__ __host__ __device__
97#endif
98
99// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
100// different machines. These naive checks should prevent some undefined behavior on systems which
101// have different sizes for basic types.
102#if !defined(__HIPCC_RTC__)
103static_assert(CHAR_BIT == 8, "byte size should be of 8 bits");
104#endif
105static_assert(sizeof(unsigned short) == 2, "size of unsigned short should be 2 bytes");
106
109 unsigned short data;
110};
111
116};
117
122__HOST_DEVICE__ inline float __bfloat162float(__hip_bfloat16 a) {
123 unsigned int uval = 0;
124 uval = a.data << 16;
125 union {
126 unsigned int u32;
127 float fp32;
128 } u = {uval};
129 return u.fp32;
130}
131
136__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f) {
137 __hip_bfloat16 ret;
138 union {
139 float fp32;
140 unsigned int u32;
141 } u = {f};
142 if (~u.u32 & 0x7f800000) {
143 // When the exponent bits are not all 1s, then the value is zero, normal,
144 // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
145 // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
146 // This causes the bfloat16's mantissa to be incremented by 1 if the 16
147 // least significant bits of the float mantissa are greater than 0x8000,
148 // or if they are equal to 0x8000 and the least significant bit of the
149 // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
150 // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
151 // has the value 0x7f, then incrementing it causes it to become 0x00 and
152 // the exponent is incremented by one, which is the next higher FP value
153 // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
154 // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
155 // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
156 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
157 // incrementing it causes it to become an exponent of 0xFF and a mantissa
158 // of 0x00, which is Inf, the next higher value to the unrounded value.
159 u.u32 += 0x7fff + ((u.u32 >> 16) & 1); // Round to nearest, round to even
160 } else if (u.u32 & 0xffff) {
161 // When all of the exponent bits are 1, the value is Inf or NaN.
162 // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
163 // mantissa bit. Quiet NaN is indicated by the most significant mantissa
164 // bit being 1. Signaling NaN is indicated by the most significant
165 // mantissa bit being 0 but some other bit(s) being 1. If any of the
166 // lower 16 bits of the mantissa are 1, we set the least significant bit
167 // of the bfloat16 mantissa, in order to preserve signaling NaN in case
168 // the bloat16's mantissa bits are all 0.
169 u.u32 |= 0x10000; // Preserve signaling NaN
170 }
171
172 ret.data = (u.u32 >> 16);
173 return ret;
174}
175
180__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
181 return float2{__bfloat162float(a.x), __bfloat162float(a.y)};
182}
183
189 return __hip_bfloat162{a, a};
190}
191
196__device__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
197
202__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
203
208__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a) {
209 return __float2bfloat16((float)a);
210}
211
218}
219
225 return __hip_bfloat162{a, b};
226}
227
232__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
233
239 return __hip_bfloat162{a.y, a.y};
240}
241
246__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162float(a.y); }
247
253 return __hip_bfloat162{a.y, b.y};
254}
255
260__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
261
267 return __hip_bfloat162{a.x, a.x};
268}
269
274__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162float(a.x); }
275
281 return __hip_bfloat162{a.y, a.x};
282}
283
289 return __hip_bfloat162{a.x, b.x};
290}
291
296__device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
297 return __hip_bfloat16{(unsigned short)a};
298}
299
304__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
305 return __hip_bfloat16{a};
306}
307
308
315}
316
323}
324
331}
332
338 const __hip_bfloat16 c) {
339 return __float2bfloat16(
340 __ocml_fma_f32(__bfloat162float(a), __bfloat162float(b), __bfloat162float(c)));
341}
342
349}
350
356 auto ret = a;
357 ret.data ^= 0x8000;
358 return ret;
359}
360
366 auto ret = a;
367 ret.data &= 0x7FFF;
368 return ret;
369}
370
378}
379
385 return __hip_bfloat162{__habs(a.x), __habs(a.y)};
386}
387
393 return __hip_bfloat162{__hadd(a.x, b.x), __hadd(a.y, b.y)};
394}
395
401 const __hip_bfloat162 c) {
402 return __hip_bfloat162{__hfma(a.x, b.x, c.x), __hfma(a.y, b.y, c.y)};
403}
404
410 return __hip_bfloat162{__hmul(a.x, b.x), __hmul(a.y, b.y)};
411}
412
418 return __hip_bfloat162{__hneg(a.x), __hneg(a.y)};
419}
420
426 return __hip_bfloat162{__hsub(a.x, b.x), __hsub(a.y, b.y)};
427}
428
433__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
434 return __bfloat162float(a) == __bfloat162float(b);
435}
436
441__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
442 return !(__bfloat162float(a) < __bfloat162float(b)) &&
444}
445
450__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
451 return __bfloat162float(a) > __bfloat162float(b);
452}
453
458__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
459 return !(__bfloat162float(a) <= __bfloat162float(b));
460}
461
466__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
467 return __bfloat162float(a) >= __bfloat162float(b);
468}
469
474__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
475 return !(__bfloat162float(a) < __bfloat162float(b));
476}
477
482__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
483 return __bfloat162float(a) != __bfloat162float(b);
484}
485
490__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
491 return !(__bfloat162float(a) == __bfloat162float(b));
492}
493
499 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
500}
501
507 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
508}
509
514__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
515 return __bfloat162float(a) < __bfloat162float(b);
516}
517
522__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
523 return !(__bfloat162float(a) >= __bfloat162float(b));
524}
525
530__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
531 return __bfloat162float(a) <= __bfloat162float(b);
532}
533
538__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
539 return !(__bfloat162float(a) > __bfloat162float(b));
540}
541
546__device__ int __hisinf(const __hip_bfloat16 a) { return __ocml_isinf_f32(__bfloat162float(a)); }
547
552__device__ bool __hisnan(const __hip_bfloat16 a) { return __ocml_isnan_f32(__bfloat162float(a)); }
553
558__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
559 return __heq(a.x, b.x) && __heq(a.y, b.y);
560}
561
566__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
567 return __hequ(a.x, b.x) && __hequ(a.y, b.y);
568}
569
574__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
575 return __hge(a.x, b.x) && __hge(a.y, b.y);
576}
577
582__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
583 return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
584}
585
590__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
591 return __hgt(a.x, b.x) && __hgt(a.y, b.y);
592}
593
598__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
599 return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
600}
601
606__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
607 return __hle(a.x, b.x) && __hle(a.y, b.y);
608}
609
614__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
615 return __hleu(a.x, b.x) && __hleu(a.y, b.y);
616}
617
622__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
623 return __hlt(a.x, b.x) && __hlt(a.y, b.y);
624}
625
630__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
631 return __hltu(a.x, b.x) && __hltu(a.y, b.y);
632}
633
638__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
639 return __hne(a.x, b.x) && __hne(a.y, b.y);
640}
641
646__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
647 return __hneu(a.x, b.x) && __hneu(a.y, b.y);
648}
649
655 return __hip_bfloat162{{__heq(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
656 {__heq(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
657}
658
664 return __hip_bfloat162{{__hge(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
665 {__hge(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
666}
667
673 return __hip_bfloat162{{__hgt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
674 {__hgt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
675}
676
682 return __hip_bfloat162{
683 {__ocml_isnan_f32(__bfloat162float(a.x)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
684 {__ocml_isnan_f32(__bfloat162float(a.y)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
685}
686
692 return __hip_bfloat162{{__hle(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
693 {__hle(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
694}
695
701 return __hip_bfloat162{{__hlt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
702 {__hlt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
703}
704
710 return __hip_bfloat162{
711 __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
712 __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
713}
714
720 return __hip_bfloat162{
721 __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
722 __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
723}
724
730 return __hip_bfloat162{{__hne(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
731 {__hne(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
732}
733
739 return __float2bfloat16(__ocml_ceil_f32(__bfloat162float(h)));
740}
741
746__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h) {
747 return __float2bfloat16(__ocml_cos_f32(__bfloat162float(h)));
748}
749
754__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h) {
755 return __float2bfloat16(__ocml_exp_f32(__bfloat162float(h)));
756}
757
763 return __float2bfloat16(__ocml_exp10_f32(__bfloat162float(h)));
764}
765
771 return __float2bfloat16(__ocml_exp2_f32(__bfloat162float(h)));
772}
773
779 return __float2bfloat16(__ocml_floor_f32(__bfloat162float(h)));
780}
781
786__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h) {
787 return __float2bfloat16(__ocml_log_f32(__bfloat162float(h)));
788}
789
795 return __float2bfloat16(__ocml_log10_f32(__bfloat162float(h)));
796}
797
803 return __float2bfloat16(__ocml_log2_f32(__bfloat162float(h)));
804}
805
810__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h) {
811 return __float2bfloat16(1.0f / (__bfloat162float(h)));
812}
813
819 return __float2bfloat16(__ocml_rint_f32(__bfloat162float(h)));
820}
821
827 return __float2bfloat16(__ocml_rsqrt_f32(__bfloat162float(h)));
828}
829
834__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h) {
835 return __float2bfloat16(__ocml_sin_f32(__bfloat162float(h)));
836}
837
843 return __float2bfloat16(__ocml_sqrt_f32(__bfloat162float(h)));
844}
845
851 return __float2bfloat16(__ocml_trunc_f32(__bfloat162float(h)));
852}
853
859 return __hip_bfloat162{hceil(h.x), hceil(h.y)};
860}
861
867 return __hip_bfloat162{hcos(h.x), hcos(h.y)};
868}
869
875 return __hip_bfloat162{hexp(h.x), hexp(h.y)};
876}
877
883 return __hip_bfloat162{hexp10(h.x), hexp10(h.y)};
884}
885
891 return __hip_bfloat162{hexp2(h.x), hexp2(h.y)};
892}
893
899 return __hip_bfloat162{hfloor(h.x), hfloor(h.y)};
900}
901
907 return __hip_bfloat162{hlog(h.x), hlog(h.y)};
908}
909
915 return __hip_bfloat162{hlog10(h.x), hlog10(h.y)};
916}
917
923 return __hip_bfloat162{hlog2(h.x), hlog2(h.y)};
924}
925
931 return __hip_bfloat162{hrcp(h.x), hrcp(h.y)};
932}
933
939 return __hip_bfloat162{hrint(h.x), hrint(h.y)};
940}
941
947 return __hip_bfloat162{hrsqrt(h.x), hrsqrt(h.y)};
948}
949
955 return __hip_bfloat162{hsin(h.x), hsin(h.y)};
956}
957
963 return __hip_bfloat162{hsqrt(h.x), hsqrt(h.y)};
964}
965
971 return __hip_bfloat162{htrunc(h.x), htrunc(h.y)};
972}
973
974#endif
Contains declarations for types and functions in device library. Uses int64_t and uint64_t instead of...
__device__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:365
__device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:321
__device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:337
__device__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:329
__device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:355
__device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:347
__device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:313
__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:466
__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:514
__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:490
__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:450
__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:433
__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:522
__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:538
__device__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:506
__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:482
__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:441
__device__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:498
__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:458
__device__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:546
__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than.
Definition amd_hip_bf16.h:530
__device__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:552
__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:474
__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:582
__device__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:663
__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:630
__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:606
__device__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns min of two elements.
Definition amd_hip_bf16.h:719
__device__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:700
__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:566
__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:558
__device__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:691
__device__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns max of two elements.
Definition amd_hip_bf16.h:709
__device__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:654
__device__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:729
__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:622
__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:574
__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:590
__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:646
__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:614
__device__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:681
__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:638
__device__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:672
__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:598
__device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:400
__device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:375
__device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:384
__device__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:392
__device__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:425
__device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:417
__device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:409
__HOST_DEVICE__ float __bfloat162float(__hip_bfloat16 a)
Converts bfloat16 to float.
Definition amd_hip_bf16.h:122
__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f)
Converts float to bfloat16.
Definition amd_hip_bf16.h:136
__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer.
Definition amd_hip_bf16.h:202
__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a)
Convert double to __hip_bfloat16.
Definition amd_hip_bf16.h:208
__device__ __hip_bfloat16 __short_as_bfloat16(const short int a)
Reinterprets short int into a bfloat16.
Definition amd_hip_bf16.h:296
__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:246
__HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a)
Convert float2 to __hip_bfloat162.
Definition amd_hip_bf16.h:216
__device__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:266
__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a)
Reinterprets unsigned short int into a bfloat16.
Definition amd_hip_bf16.h:304
__device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts high 16 bits from each and combines them.
Definition amd_hip_bf16.h:252
__device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a)
Moves bfloat16 value to bfloat162.
Definition amd_hip_bf16.h:188
__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:274
__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:232
__device__ short int __bfloat16_as_short(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as a signed short integer.
Definition amd_hip_bf16.h:196
__device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:280
__device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b)
Combine two __hip_bfloat16 to __hip_bfloat162.
Definition amd_hip_bf16.h:224
__device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts low 16 bits from each and combines them.
Definition amd_hip_bf16.h:288
__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:260
__device__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:238
__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a)
Converts and moves bfloat162 to float2.
Definition amd_hip_bf16.h:180
__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:754
__device__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:818
__device__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:826
__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:746
__device__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:850
__device__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:794
__device__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:762
__device__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:738
__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:810
__device__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:842
__device__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:778
__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:834
__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:786
__device__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:802
__device__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:770
__device__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:954
__device__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:906
__device__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:922
__device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:962
__device__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:914
__device__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:858
__device__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:938
__device__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:930
__device__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:946
__device__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:866
__device__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:898
__device__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:882
__device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:970
__device__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:890
__device__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:874
Struct to represent a 16 bit brain floating point number.
Definition amd_hip_bf16.h:108
Struct to represent two 16 bit brain floating point numbers.
Definition amd_hip_bf16.h:113
Definition amd_hip_vector_types.h:1986