25#include "amd_device_functions.h"
27#if __has_builtin(__hip_atomic_compare_exchange_strong)
29template<
bool B,
typename T,
typename F>
struct Cond_t;
31template<
typename T,
typename F>
struct Cond_t<true, T, F> {
using type = T; };
32template<
typename T,
typename F>
struct Cond_t<false, T, F> {
using type = F; };
34#if !__HIP_DEVICE_COMPILE__
36#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
37#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
38#define __HIP_MEMORY_SCOPE_WORKGROUP 3
39#define __HIP_MEMORY_SCOPE_AGENT 4
40#define __HIP_MEMORY_SCOPE_SYSTEM 5
43#if !defined(__HIPCC_RTC__)
44#include "amd_hip_unsafe_atomics.h"
49 int mem_order = __ATOMIC_SEQ_CST,
50 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
55__attribute__((always_inline, device))
56T hip_cas_expander(T* p, T x, Op op, F f)
noexcept
58 using FP = __attribute__((address_space(0))) const
void*;
61 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
63 if (is_shared_workaround((FP)p))
66 using U = typename Cond_t<
67 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
69 auto q = reinterpret_cast<U*>(p);
71 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
76 op(
reinterpret_cast<T&
>(tmp1), x);
77 }
while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
78 mem_order, mem_scope));
80 return reinterpret_cast<const T&
>(tmp0);
84 int mem_order = __ATOMIC_SEQ_CST,
85 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
90__attribute__((always_inline, device))
91T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f)
noexcept
93 using FP = __attribute__((address_space(0))) const
void*;
96 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
98 if (is_shared_workaround((FP)p))
101 using U = typename Cond_t<
102 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
104 auto q = reinterpret_cast<U*>(p);
106 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
107 while (cmp(x,
reinterpret_cast<const T&
>(tmp)) &&
108 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
111 return reinterpret_cast<const T&
>(tmp);
116int atomicCAS(
int* address,
int compare,
int val) {
117 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
118 __HIP_MEMORY_SCOPE_AGENT);
124int atomicCAS_system(
int* address,
int compare,
int val) {
125 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
126 __HIP_MEMORY_SCOPE_SYSTEM);
132unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
133 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
134 __HIP_MEMORY_SCOPE_AGENT);
140unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
141 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
142 __HIP_MEMORY_SCOPE_SYSTEM);
148unsigned long atomicCAS(
unsigned long* address,
unsigned long compare,
unsigned long val) {
149 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
150 __HIP_MEMORY_SCOPE_AGENT);
156unsigned long atomicCAS_system(
unsigned long* address,
unsigned long compare,
unsigned long val) {
157 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
158 __HIP_MEMORY_SCOPE_SYSTEM);
164unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
165 unsigned long long val) {
166 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
167 __HIP_MEMORY_SCOPE_AGENT);
173unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
174 unsigned long long val) {
175 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
176 __HIP_MEMORY_SCOPE_SYSTEM);
182float atomicCAS(
float* address,
float compare,
float val) {
183 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
184 __HIP_MEMORY_SCOPE_AGENT);
190float atomicCAS_system(
float* address,
float compare,
float val) {
191 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
192 __HIP_MEMORY_SCOPE_SYSTEM);
198double atomicCAS(
double* address,
double compare,
double val) {
199 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
200 __HIP_MEMORY_SCOPE_AGENT);
206double atomicCAS_system(
double* address,
double compare,
double val) {
207 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
208 __HIP_MEMORY_SCOPE_SYSTEM);
214int atomicAdd(
int* address,
int val) {
215 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
220int atomicAdd_system(
int* address,
int val) {
221 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
226unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
227 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
232unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
233 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
238unsigned long atomicAdd(
unsigned long* address,
unsigned long val) {
239 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
244unsigned long atomicAdd_system(
unsigned long* address,
unsigned long val) {
245 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
250unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
251 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
256unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
257 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
262float atomicAdd(
float* address,
float val) {
263#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
264 return unsafeAtomicAdd(address, val);
266 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
272float atomicAdd_system(
float* address,
float val) {
273 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
276#if !defined(__HIPCC_RTC__)
277DEPRECATED(
"use atomicAdd instead")
281void atomicAddNoRet(
float* address,
float val)
283 __ockl_atomic_add_noret_f32(address, val);
288double atomicAdd(
double* address,
double val) {
289#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
290 return unsafeAtomicAdd(address, val);
292 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
298double atomicAdd_system(
double* address,
double val) {
299 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
304int atomicSub(
int* address,
int val) {
305 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
310int atomicSub_system(
int* address,
int val) {
311 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
316unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
317 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
322unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
323 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
328unsigned long atomicSub(
unsigned long* address,
unsigned long val) {
329 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
334unsigned long atomicSub_system(
unsigned long* address,
unsigned long val) {
335 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
340unsigned long long atomicSub(
unsigned long long* address,
unsigned long long val) {
341 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
346unsigned long long atomicSub_system(
unsigned long long* address,
unsigned long long val) {
347 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
352float atomicSub(
float* address,
float val) {
353#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
354 return unsafeAtomicAdd(address, -val);
356 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
362float atomicSub_system(
float* address,
float val) {
363 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
368double atomicSub(
double* address,
double val) {
369#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
370 return unsafeAtomicAdd(address, -val);
372 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
378double atomicSub_system(
double* address,
double val) {
379 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
384int atomicExch(
int* address,
int val) {
385 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
390int atomicExch_system(
int* address,
int val) {
391 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
396unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
397 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
402unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
403 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
408unsigned long atomicExch(
unsigned long* address,
unsigned long val) {
409 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
414unsigned long atomicExch_system(
unsigned long* address,
unsigned long val) {
415 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
420unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
421 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
426unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
427 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
432float atomicExch(
float* address,
float val) {
433 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
438float atomicExch_system(
float* address,
float val) {
439 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
444double atomicExch(
double* address,
double val) {
445 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
450double atomicExch_system(
double* address,
double val) {
451 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
456int atomicMin(
int* address,
int val) {
457#if defined(__gfx941__)
458 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
459 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
460 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
461 __HIP_MEMORY_SCOPE_AGENT);
464 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
470int atomicMin_system(
int* address,
int val) {
471#if defined(__gfx941__)
472 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
473 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
474 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
475 __HIP_MEMORY_SCOPE_SYSTEM);
478 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
484unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
485#if defined(__gfx941__)
486 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
487 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
488 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
489 __HIP_MEMORY_SCOPE_AGENT);
492 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
499unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
500#if defined(__gfx941__)
501 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
502 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
503 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
504 __HIP_MEMORY_SCOPE_SYSTEM);
507 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
513unsigned long long atomicMin(
unsigned long* address,
unsigned long val) {
514#if defined(__gfx941__)
515 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
518 [](
unsigned long x,
unsigned long y) {
return x < y; },
520 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
521 __HIP_MEMORY_SCOPE_AGENT);
524 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
530unsigned long atomicMin_system(
unsigned long* address,
unsigned long val) {
531#if defined(__gfx941__)
532 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
535 [](
unsigned long x,
unsigned long y) {
return x < y; },
537 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
538 __HIP_MEMORY_SCOPE_SYSTEM);
541 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
547unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
548#if defined(__gfx941__)
549 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
552 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
554 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
555 __HIP_MEMORY_SCOPE_AGENT);
558 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
564unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
565#if defined(__gfx941__)
566 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
569 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
571 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
572 __HIP_MEMORY_SCOPE_SYSTEM);
575 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
581long long atomicMin(
long long* address,
long long val) {
582#if defined(__gfx941__)
583 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
584 address, val, [](
long long x,
long long y) {
return x < y; },
586 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
589 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
595long long atomicMin_system(
long long* address,
long long val) {
596#if defined(__gfx941__)
597 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
598 address, val, [](
long long x,
long long y) {
return x < y; },
600 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
603 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
609float atomicMin(
float* addr,
float val) {
610#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
611 return unsafeAtomicMin(addr, val);
613 #if __has_builtin(__hip_atomic_load) && \
614 __has_builtin(__hip_atomic_compare_exchange_strong)
615 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
617 while (!done && value > val) {
618 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
619 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
623 unsigned int *uaddr = (
unsigned int *)addr;
624 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
626 while (!done && __uint_as_float(value) > val) {
627 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
628 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
630 return __uint_as_float(value);
637float atomicMin_system(
float* address,
float val) {
638 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
639 #if __has_builtin(__hip_atomic_load)
640 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
642 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
644 float value = __uint_as_float(tmp);
646 while (val < value) {
647 value = atomicCAS_system(address, value, val);
655double atomicMin(
double* addr,
double val) {
656#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
657 return unsafeAtomicMin(addr, val);
659 #if __has_builtin(__hip_atomic_load) && \
660 __has_builtin(__hip_atomic_compare_exchange_strong)
661 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
663 while (!done && value > val) {
664 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
665 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
669 unsigned long long *uaddr = (
unsigned long long *)addr;
670 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
672 while (!done && __longlong_as_double(value) > val) {
673 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
674 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
676 return __longlong_as_double(value);
683double atomicMin_system(
double* address,
double val) {
684 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
685 #if __has_builtin(__hip_atomic_load)
686 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
688 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
690 double value = __longlong_as_double(tmp);
692 while (val < value) {
693 value = atomicCAS_system(address, value, val);
701int atomicMax(
int* address,
int val) {
702#if defined(__gfx941__)
703 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
704 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
705 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
706 __HIP_MEMORY_SCOPE_AGENT);
709 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
715int atomicMax_system(
int* address,
int val) {
716#if defined(__gfx941__)
717 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
718 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
719 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
720 __HIP_MEMORY_SCOPE_SYSTEM);
723 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
729unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
730#if defined(__gfx941__)
731 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
732 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
733 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
734 __HIP_MEMORY_SCOPE_AGENT);
737 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
743unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
744#if defined(__gfx941__)
745 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
746 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
747 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
748 __HIP_MEMORY_SCOPE_SYSTEM);
751 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
757unsigned long atomicMax(
unsigned long* address,
unsigned long val) {
758#if defined(__gfx941__)
759 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
762 [](
unsigned long x,
unsigned long y) {
return y < x; },
764 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
765 __HIP_MEMORY_SCOPE_AGENT);
768 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
774unsigned long atomicMax_system(
unsigned long* address,
unsigned long val) {
775#if defined(__gfx941__)
776 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
779 [](
unsigned long x,
unsigned long y) {
return y < x; },
781 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
782 __HIP_MEMORY_SCOPE_SYSTEM);
785 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
791unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
792#if defined(__gfx941__)
793 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
796 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
798 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
799 __HIP_MEMORY_SCOPE_AGENT);
802 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
808unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
809#if defined(__gfx941__)
810 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
813 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
815 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
816 __HIP_MEMORY_SCOPE_SYSTEM);
819 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
825long long atomicMax(
long long* address,
long long val) {
826 #if defined(__gfx941__)
827 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
828 address, val, [](
long long x,
long long y) {
return y < x; },
830 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
833 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
839long long atomicMax_system(
long long* address,
long long val) {
840#if defined(__gfx941__)
841 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
842 address, val, [](
long long x,
long long y) {
return y < x; },
844 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
847 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
853float atomicMax(
float* addr,
float val) {
854#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
855 return unsafeAtomicMax(addr, val);
857 #if __has_builtin(__hip_atomic_load) && \
858 __has_builtin(__hip_atomic_compare_exchange_strong)
859 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
861 while (!done && value < val) {
862 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
863 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
867 unsigned int *uaddr = (
unsigned int *)addr;
868 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
870 while (!done && __uint_as_float(value) < val) {
871 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
872 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
874 return __uint_as_float(value);
881float atomicMax_system(
float* address,
float val) {
882 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
883 #if __has_builtin(__hip_atomic_load)
884 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
886 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
888 float value = __uint_as_float(tmp);
890 while (value < val) {
891 value = atomicCAS_system(address, value, val);
899double atomicMax(
double* addr,
double val) {
900#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
901 return unsafeAtomicMax(addr, val);
903 #if __has_builtin(__hip_atomic_load) && \
904 __has_builtin(__hip_atomic_compare_exchange_strong)
905 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
907 while (!done && value < val) {
908 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
909 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
913 unsigned long long *uaddr = (
unsigned long long *)addr;
914 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
916 while (!done && __longlong_as_double(value) < val) {
917 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
918 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
920 return __longlong_as_double(value);
927double atomicMax_system(
double* address,
double val) {
928 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
929 #if __has_builtin(__hip_atomic_load)
930 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
932 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
934 double value = __longlong_as_double(tmp);
936 while (value < val) {
937 value = atomicCAS_system(address, value, val);
945unsigned int atomicInc(
unsigned int* address,
unsigned int val)
947#if defined(__gfx941__)
950 unsigned int __builtin_amdgcn_atomic_inc(
955 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
957 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
960 [](
unsigned int& x,
unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
963 __builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1,
false);
966 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
973unsigned int atomicDec(
unsigned int* address,
unsigned int val)
975#if defined(__gfx941__)
978 unsigned int __builtin_amdgcn_atomic_dec(
983 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
985 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
988 [](
unsigned int& x,
unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
991 __builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1,
false);
994 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1001int atomicAnd(
int* address,
int val) {
1002#if defined(__gfx941__)
1003 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1004 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1005 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1006 __HIP_MEMORY_SCOPE_AGENT);
1009 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1015int atomicAnd_system(
int* address,
int val) {
1016#if defined(__gfx941__)
1017 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1018 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1019 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1020 __HIP_MEMORY_SCOPE_SYSTEM);
1023 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1029unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
1030#if defined(__gfx941__)
1031 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1032 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1033 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1034 __HIP_MEMORY_SCOPE_AGENT);
1037 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1043unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
1044#if defined(__gfx941__)
1045 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1046 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1047 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1048 __HIP_MEMORY_SCOPE_SYSTEM);
1051 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1057unsigned long atomicAnd(
unsigned long* address,
unsigned long val) {
1058#if defined(__gfx941__)
1059 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1060 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1061 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1062 __HIP_MEMORY_SCOPE_AGENT);
1065 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1071unsigned long atomicAnd_system(
unsigned long* address,
unsigned long val) {
1072#if defined(__gfx941__)
1073 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1074 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1075 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1076 __HIP_MEMORY_SCOPE_SYSTEM);
1079 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1085unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
1086#if defined(__gfx941__)
1087 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1090 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1092 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1093 __HIP_MEMORY_SCOPE_AGENT);
1096 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1102unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
1103#if defined(__gfx941__)
1104 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1107 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1109 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1110 __HIP_MEMORY_SCOPE_SYSTEM);
1113 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1119int atomicOr(
int* address,
int val) {
1120#if defined(__gfx941__)
1121 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1122 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1123 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1124 __HIP_MEMORY_SCOPE_AGENT);
1127 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1133int atomicOr_system(
int* address,
int val) {
1134#if defined(__gfx941__)
1135 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1136 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1137 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1138 __HIP_MEMORY_SCOPE_SYSTEM);
1141 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1147unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
1148#if defined(__gfx941__)
1149 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1150 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1151 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1152 __HIP_MEMORY_SCOPE_AGENT);
1155 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1161unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
1162#if defined(__gfx941__)
1163 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1164 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1165 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1166 __HIP_MEMORY_SCOPE_SYSTEM);
1169 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1175unsigned long atomicOr(
unsigned long* address,
unsigned long val) {
1176#if defined(__gfx941__)
1177 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1178 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1179 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1180 __HIP_MEMORY_SCOPE_AGENT);
1183 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1189unsigned long atomicOr_system(
unsigned long* address,
unsigned long val) {
1190#if defined(__gfx941__)
1191 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1192 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1193 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1194 __HIP_MEMORY_SCOPE_SYSTEM);
1197 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1203unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
1204#if defined(__gfx941__)
1205 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1208 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1210 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1211 __HIP_MEMORY_SCOPE_AGENT);
1214 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1220unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
1221#if defined(__gfx941__)
1222 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1225 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1227 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1228 __HIP_MEMORY_SCOPE_SYSTEM);
1231 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1237int atomicXor(
int* address,
int val) {
1238#if defined(__gfx941__)
1239 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1240 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1241 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1242 __HIP_MEMORY_SCOPE_AGENT);
1245 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1251int atomicXor_system(
int* address,
int val) {
1252#if defined(__gfx941__)
1253 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1254 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1255 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1256 __HIP_MEMORY_SCOPE_SYSTEM);
1259 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1265unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
1266#if defined(__gfx941__)
1267 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1268 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1269 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1270 __HIP_MEMORY_SCOPE_AGENT);
1273 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1279unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
1280#if defined(__gfx941__)
1281 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1282 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1283 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1284 __HIP_MEMORY_SCOPE_SYSTEM);
1287 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1293unsigned long atomicXor(
unsigned long* address,
unsigned long val) {
1294#if defined(__gfx941__)
1295 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1296 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1297 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1298 __HIP_MEMORY_SCOPE_AGENT);
1301 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1307unsigned long atomicXor_system(
unsigned long* address,
unsigned long val) {
1308#if defined(__gfx941__)
1309 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1310 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1311 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1312 __HIP_MEMORY_SCOPE_SYSTEM);
1315 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1321unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
1322#if defined(__gfx941__)
1323 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1326 [](
unsigned long long& x,
unsigned long long y) { x ^= y; },
1328 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1329 __HIP_MEMORY_SCOPE_AGENT);
1332 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1338unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
1339 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1346int atomicCAS(
int* address,
int compare,
int val)
1348 __atomic_compare_exchange_n(
1349 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1355unsigned int atomicCAS(
1356 unsigned int* address,
unsigned int compare,
unsigned int val)
1358 __atomic_compare_exchange_n(
1359 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1365unsigned long long atomicCAS(
1366 unsigned long long* address,
1367 unsigned long long compare,
1368 unsigned long long val)
1370 __atomic_compare_exchange_n(
1371 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1378int atomicAdd(
int* address,
int val)
1380 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1384unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
1386 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1390unsigned long long atomicAdd(
1391 unsigned long long* address,
unsigned long long val)
1393 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1397float atomicAdd(
float* address,
float val)
1399#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1400 return unsafeAtomicAdd(address, val);
1402 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1406#if !defined(__HIPCC_RTC__)
1407DEPRECATED(
"use atomicAdd instead")
1411void atomicAddNoRet(
float* address,
float val)
1413 __ockl_atomic_add_noret_f32(address, val);
1418double atomicAdd(
double* address,
double val)
1420#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1421 return unsafeAtomicAdd(address, val);
1423 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1429int atomicSub(
int* address,
int val)
1431 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1435unsigned int atomicSub(
unsigned int* address,
unsigned int val)
1437 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1442int atomicExch(
int* address,
int val)
1444 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1448unsigned int atomicExch(
unsigned int* address,
unsigned int val)
1450 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1454unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
1456 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1460float atomicExch(
float* address,
float val)
1462 return __uint_as_float(__atomic_exchange_n(
1463 reinterpret_cast<unsigned int*
>(address),
1464 __float_as_uint(val),
1470int atomicMin(
int* address,
int val)
1472 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1476unsigned int atomicMin(
unsigned int* address,
unsigned int val)
1478 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1482unsigned long long atomicMin(
1483 unsigned long long* address,
unsigned long long val)
1485 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1487 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1489 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1491 tmp = atomicCAS(address, tmp, val);
1496__device__
inline long long atomicMin(
long long* address,
long long val) {
1497 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1499 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1506 tmp = atomicCAS(address, tmp, val);
1513int atomicMax(
int* address,
int val)
1515 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1519unsigned int atomicMax(
unsigned int* address,
unsigned int val)
1521 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1525unsigned long long atomicMax(
1526 unsigned long long* address,
unsigned long long val)
1528 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1530 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1532 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1534 tmp = atomicCAS(address, tmp, val);
1539__device__
inline long long atomicMax(
long long* address,
long long val) {
1540 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1542 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1549 tmp = atomicCAS(address, tmp, val);
1556unsigned int atomicInc(
unsigned int* address,
unsigned int val)
1558 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
1563unsigned int atomicDec(
unsigned int* address,
unsigned int val)
1565 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1570int atomicAnd(
int* address,
int val)
1572 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1576unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
1578 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1582unsigned long long atomicAnd(
1583 unsigned long long* address,
unsigned long long val)
1585 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1590int atomicOr(
int* address,
int val)
1592 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1596unsigned int atomicOr(
unsigned int* address,
unsigned int val)
1598 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1602unsigned long long atomicOr(
1603 unsigned long long* address,
unsigned long long val)
1605 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1610int atomicXor(
int* address,
int val)
1612 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1616unsigned int atomicXor(
unsigned int* address,
unsigned int val)
1618 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1622unsigned long long atomicXor(
1623 unsigned long long* address,
unsigned long long val)
1625 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);