Files
libsst/libsst-atomic/SST_Atomic_ia64.asm
2026-04-03 00:22:39 -05:00

265 lines
8.4 KiB
NASM

// SST_Atomic_ia64.asm
// Author: Patrick Baggett
// Created: 4/16/2012
//
// Purpose:
//
// Assembly for Itanium processors (aka ia64)
// Assembles with Intel ias assembler or GNU as.
//
// License:
//
// This program is free software. It comes without any warranty, to
// the extent permitted by applicable law. You can redistribute it
// and/or modify it under the terms of the Do What The Fuck You Want
// To Public License, Version 2, as published by Sam Hocevar. See
// http://sam.zoy.org/wtfpl/COPYING for more details.
.global SST_Atomic_Add#
.global SST_Atomic_AddReturn#
.global SST_Atomic_AddPtr#
.global SST_Atomic_AddPtrReturn#
.global SST_Atomic_And#
.global SST_Atomic_AndReturn#
.global SST_Atomic_Or#
.global SST_Atomic_OrReturn#
.global SST_Atomic_Xor#
.global SST_Atomic_XorReturn#
.global SST_Atomic_Not#
.global SST_Atomic_NotReturn#
.global SST_Atomic_Exchange#
.global SST_Atomic_ExchangePtr#
.global SST_Atomic_ExchangeAdd#
.global SST_Atomic_ExchangeAddPtr#
.global SST_Atomic_CAS#
.global SST_Atomic_CASPtr#
.global SST_Atomic_LoadAcquire#
.global SST_Atomic_LoadAcquirePtr#
.global SST_Atomic_StoreRelease#
.global SST_Atomic_StoreReleasePtr#
.section .text
.align 32
// SST_Atomic_Add(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Add:
SST_Atomic_AddReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r8 = r31, r33;; //generated summed r8 = (*x + value)
try_again1:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
add r8 = r33, r30 //set up return value r8 = value + *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again1;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_AddPtr(volatile void** x, int value);
// x = r32, value = r33
SST_Atomic_AddPtr:
SST_Atomic_AddPtrReturn:
sxt4 r29 = r33;; //sign extend value to 64-bit quantity
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r8 = r31, r29;; //generated summed r8 = (*x + value)
try_again2:
cmpxchg8.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
add r8 = r29, r30 //set up return value r8 = value + *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again2;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_And(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_And:
SST_Atomic_AndReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
and r8 = r31, r33;; //generated r8 = (*x & value)
try_again3:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
and r8 = r33, r30 //set up return value r8 = value & *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again3;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Or(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Or:
SST_Atomic_OrReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
or r8 = r31, r33;; //generated r8 = (*x | value)
try_again4:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
or r8 = r33, r30 //set up return value r8 = value | *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again4;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Xor(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Xor:
SST_Atomic_XorReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
xor r8 = r31, r33;; //generated r8 = (*x | value)
try_again5:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
xor r8 = r33, r30 //set up return value r8 = value & *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again5;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Not(volatile int* x);
// x = r32
SST_Atomic_Not:
SST_Atomic_NotReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
andcm r8 = -1, r31;; //generated r8 = ~(*x)
try_again6:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30 (in case compare failed)
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
andcm r8 = -1, r30 //set up return value r8 = ~*x (new)
(p15) br.cond.dptk.few try_again6;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Exchange(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Exchange:
mf
xchg4 r8 = [r32], r33
br.ret.sptk.many b0;;
// SST_Atomic_ExchangePtr(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_ExchangePtr:
mf
xchg8 r8 = [r32], r33
br.ret.sptk.many b0;;
// int SST_Atomic_ExchangeAdd(volatile int* x, int value)
SST_Atomic_ExchangeAdd:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r30 = r31, r33;; //r30 = *x + value
try_again9:
cmpxchg4.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
mov ar.ccv = r8
cmp4.ne.unc p15, p0 = r8, r31
mov r31 = r8
add r30 = r33, r8
(p15) br.cond.dptk.few try_again9;;
br.ret.sptk.many b0;;
// int SST_Atomic_ExchangeAddPtr(volatile void** x, int value)
SST_Atomic_ExchangeAddPtr:
sxt4 r29 = r33;; //sign extend value to 64 bits
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r30 = r31, r29;; //r30 = *x + value
try_again10:
cmpxchg8.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
mov ar.ccv = r8
cmp.ne.unc p15, p0 = r8, r31
mov r31 = r8
add r30 = r8, r29
(p15) br.cond.dptk.few try_again10;;
br.ret.sptk.many b0;;
// x = r32, compare = r33, newValue = r34
SST_Atomic_CAS:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r33;; //Set CCV to be *x
cmpxchg4.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
br.ret.sptk.many b0;;
// x = r32, compare = r33, newValue = r34
SST_Atomic_CASPtr:
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r33;; //Set CCV to be *x
cmpxchg8.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
br.ret.sptk.many b0;;
// int SST_Atomic_LoadAcquire(const volatile int* src);
SST_Atomic_LoadAcquire:
ld4.acq r8 = [r32];; //Load-with-acquire into return value register r8
br.ret.sptk.many b0;;
// void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
SST_Atomic_LoadAcquirePtr:
ld8.acq r8 = [r32];; //Load-with-acquire into return value register r8
br.ret.sptk.many b0;;
// void SST_Atomic_StoreRelease(volatile int* dest, int value);
SST_Atomic_StoreRelease:
st4.rel [r32] = r33;; //Store with release
br.ret.sptk.many b0;;
// void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
SST_Atomic_StoreReleasePtr:
st8.rel [r32] = r33;; //Store with release
br.ret.sptk.many b0;;