Initial commit
This commit is contained in:
48
libsst-atomic/Makefile
Normal file
48
libsst-atomic/Makefile
Normal file
@@ -0,0 +1,48 @@
|
||||
# libsst-atomic/Makefile
|
||||
# Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
# Created: 12/23/2011
|
||||
#
|
||||
# Purpose:
|
||||
#
|
||||
# Makefile for libsst-atomic, requires GNU "make"
|
||||
#
|
||||
# License:
|
||||
#
|
||||
# This program is free software. It comes without any warranty, to
|
||||
# the extent permitted by applicable law. You can redistribute it
|
||||
# and/or modify it under the terms of the Do What The Fuck You Want
|
||||
# To Public License, Version 2, as published by Sam Hocevar. See
|
||||
# http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
# Should be invoked with:
|
||||
# TARGET := { release, debug }
|
||||
# ARCH := { x86, x86-64, sparc, sparc64, powerpc, powerpc64 }
|
||||
# ASM := assembler tool
|
||||
|
||||
|
||||
BINNAME := $(DIST)/libsst-atomic.a
|
||||
ifeq ($(TARGET),debug)
|
||||
BINNAME := $(subst libsst-atomic,libsst-atomic_d, $(BINNAME))
|
||||
endif
|
||||
|
||||
# Architecture -> source file conversion
|
||||
ASM_SRC := SST_Atomic_$(ARCH).asm
|
||||
|
||||
|
||||
OBJ := $(addprefix obj/$(ARCH)/$(TARGET)/,$(subst .asm,.o,$(ASM_SRC)) )
|
||||
|
||||
$(shell mkdir -p obj/$(ARCH)/$(TARGET))
|
||||
|
||||
$(BINNAME): $(OBJ)
|
||||
$(AR) cru $@ $+
|
||||
$(RANLIB) $@
|
||||
|
||||
# CLEAN
|
||||
clean:
|
||||
@-rm -r -f obj $(DIST)/libsst-atomic*.a
|
||||
|
||||
|
||||
# *.asm files to *.o files
|
||||
obj/$(ARCH)/$(TARGET)/%.o: %.asm
|
||||
@echo ASM $@
|
||||
@$(ASM) -o obj/$(ARCH)/$(TARGET)/$*.o $*.asm
|
||||
233
libsst-atomic/SST_Atomic_arm.asm
Normal file
233
libsst-atomic/SST_Atomic_arm.asm
Normal file
@@ -0,0 +1,233 @@
|
||||
@ SST_Atomic_arm.asm
|
||||
@ Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
@ Created: 6/21/2012
|
||||
@
|
||||
@ Purpose:
|
||||
@
|
||||
@ 32-bit assembly for atomic operations for ARMv6+ CPUs
|
||||
@ Assembles with GNU as
|
||||
@
|
||||
@ License:
|
||||
@
|
||||
@ This program is free software. It comes without any warranty, to
|
||||
@ the extent permitted by applicable law. You can redistribute it
|
||||
@ and/or modify it under the terms of the Do What The Fuck You Want
|
||||
@ To Public License, Version 2, as published by Sam Hocevar. See
|
||||
@ http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
.text
|
||||
|
||||
@Don't use thumb mode, use full ARM mode
|
||||
.arm
|
||||
|
||||
@ ELF symbol names
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
|
||||
.type SST_Atomic_Add, %function
|
||||
.type SST_Atomic_AddPtr, %function
|
||||
.type SST_Atomic_And, %function
|
||||
.type SST_Atomic_Or, %function
|
||||
.type SST_Atomic_Xor, %function
|
||||
.type SST_Atomic_Not, %function
|
||||
.type SST_Atomic_AddReturn, %function
|
||||
.type SST_Atomic_AddPtrReturn, %function
|
||||
.type SST_Atomic_AndReturn, %function
|
||||
.type SST_Atomic_OrReturn, %function
|
||||
.type SST_Atomic_XorReturn, %function
|
||||
.type SST_Atomic_NotReturn, %function
|
||||
.type SST_Atomic_ExchangeAdd, %function
|
||||
.type SST_Atomic_ExchangeAddPtr, %function
|
||||
.type SST_Atomic_Exchange, %function
|
||||
.type SST_Atomic_ExchangePtr, %function
|
||||
.type SST_Atomic_CAS, %function
|
||||
.type SST_Atomic_CASPtr, %function
|
||||
.type SST_Atomic_LoadAcquire, %function
|
||||
.type SST_Atomic_LoadAcquirePtr, %function
|
||||
.type SST_Atomic_StoreRelease, %function
|
||||
.type SST_Atomic_StoreReleasePtr, %function
|
||||
|
||||
@=======================================================================
|
||||
@ Since pointers and integers are the same size on GCC, many of the
|
||||
@ integer/pointer code paths are the same. This of course only holds true
|
||||
@ for 32-bit compiles on ARM, which is exactly this file.
|
||||
@=======================================================================
|
||||
|
||||
@ I've merged the Atomic_XXX and Atomic_XXXReturn functions. The difference
|
||||
@ is 1 mov instruction -- not really worth making separate functions.
|
||||
|
||||
@ void SST_Atomic_Add(volatile int* x, int value)
|
||||
@ void SST_Atomic_AddPtr(volatile void* x, uintptr_t value)
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddReturn:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again8:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
add r2, r2, r1 @ r2 += value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again8 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
@ void SST_Atomic_And(volatile int* x, int value)
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again7:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
and r2, r2, r1 @ r2 &= value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again7 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
@ void SST_Atomic_Or(volatile int* x, int value)
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again6:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
orr r2, r2, r1 @ r2 |= value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again6 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
@ void SST_Atomic_Xor(volatile int* x, int value)
|
||||
SST_Atomic_Xor:
|
||||
SST_Atomic_XorReturn:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again5:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
eor r2, r2, r1 @ r2 ^= value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again5 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
@ void SST_Atomic_Not(volatile int* x)
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again4:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
mvn r2, r2 @ r2 = ~value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again4 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
|
||||
@ int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
@ void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
SST_Atomic_Exchange:
|
||||
SST_Atomic_ExchangePtr:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
|
||||
_again3:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
strex r3, r1, [r0] @ try { *x <- r1 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again3 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r2 @ Setup return value
|
||||
bx lr @ Return
|
||||
|
||||
@ int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
@ void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
SST_Atomic_ExchangeAdd:
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
push {r4}
|
||||
|
||||
_again2:
|
||||
ldrex r2, [r0] @ r2 <- *x
|
||||
mov r4, r2 @ r4 <- *x
|
||||
add r2, r2, r1 @ r2 += value
|
||||
strex r3, r2, [r0] @ try { *x <- r2 }
|
||||
teq r3, #0 @ success?
|
||||
bne _again2 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r4 @ Setup return value as the old value of *x
|
||||
pop {r4}
|
||||
bx lr @ Return
|
||||
|
||||
|
||||
@ int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
@ void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
SST_Atomic_CAS:
|
||||
SST_Atomic_CASPtr:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
push {r4}
|
||||
mov r4, #0
|
||||
|
||||
_again1:
|
||||
ldrex r3, [r0] @ r3 <- *dest
|
||||
teq r3, r1 @ if(r3 == compare) {
|
||||
strexeq r4, r2, [r0] @ try { *dest <- newValue } }
|
||||
teq r4, #0 @ success?
|
||||
bne _again1 @ No, reload and retry
|
||||
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
mov r0, r3 @ Setup return value as the value of *dest
|
||||
pop {r4}
|
||||
bx lr @ Return
|
||||
|
||||
|
||||
|
||||
SST_Atomic_LoadAcquire:
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
ldr r0, [r0]
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
bx lr
|
||||
|
||||
SST_Atomic_StoreRelease:
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
|
||||
str r1, [r0]
|
||||
bx lr
|
||||
265
libsst-atomic/SST_Atomic_ia64.asm
Normal file
265
libsst-atomic/SST_Atomic_ia64.asm
Normal file
@@ -0,0 +1,265 @@
|
||||
// SST_Atomic_ia64.asm
|
||||
// Author: Patrick Baggett
|
||||
// Created: 4/16/2012
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// Assembly for Itanium processors (aka ia64)
|
||||
// Assembles with Intel ias assembler or GNU as.
|
||||
//
|
||||
// License:
|
||||
//
|
||||
// This program is free software. It comes without any warranty, to
|
||||
// the extent permitted by applicable law. You can redistribute it
|
||||
// and/or modify it under the terms of the Do What The Fuck You Want
|
||||
// To Public License, Version 2, as published by Sam Hocevar. See
|
||||
// http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
|
||||
.global SST_Atomic_Add#
|
||||
.global SST_Atomic_AddReturn#
|
||||
.global SST_Atomic_AddPtr#
|
||||
.global SST_Atomic_AddPtrReturn#
|
||||
.global SST_Atomic_And#
|
||||
.global SST_Atomic_AndReturn#
|
||||
.global SST_Atomic_Or#
|
||||
.global SST_Atomic_OrReturn#
|
||||
.global SST_Atomic_Xor#
|
||||
.global SST_Atomic_XorReturn#
|
||||
.global SST_Atomic_Not#
|
||||
.global SST_Atomic_NotReturn#
|
||||
.global SST_Atomic_Exchange#
|
||||
.global SST_Atomic_ExchangePtr#
|
||||
.global SST_Atomic_ExchangeAdd#
|
||||
.global SST_Atomic_ExchangeAddPtr#
|
||||
.global SST_Atomic_CAS#
|
||||
.global SST_Atomic_CASPtr#
|
||||
.global SST_Atomic_LoadAcquire#
|
||||
.global SST_Atomic_LoadAcquirePtr#
|
||||
.global SST_Atomic_StoreRelease#
|
||||
.global SST_Atomic_StoreReleasePtr#
|
||||
.section .text
|
||||
|
||||
.align 32
|
||||
|
||||
// SST_Atomic_Add(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddReturn:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
add r8 = r31, r33;; //generated summed r8 = (*x + value)
|
||||
|
||||
try_again1:
|
||||
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30
|
||||
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
add r8 = r33, r30 //set up return value r8 = value + *x (new)
|
||||
mov r31 = r30 //set up old *x to be the new value if we need to loop again
|
||||
(p15) br.cond.dptk.few try_again1;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_AddPtr(volatile void** x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
sxt4 r29 = r33;; //sign extend value to 64-bit quantity
|
||||
mf
|
||||
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
add r8 = r31, r29;; //generated summed r8 = (*x + value)
|
||||
|
||||
try_again2:
|
||||
cmpxchg8.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30
|
||||
cmp.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
add r8 = r29, r30 //set up return value r8 = value + *x (new)
|
||||
mov r31 = r30 //set up old *x to be the new value if we need to loop again
|
||||
(p15) br.cond.dptk.few try_again2;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
|
||||
// SST_Atomic_And(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
and r8 = r31, r33;; //generated r8 = (*x & value)
|
||||
|
||||
try_again3:
|
||||
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30
|
||||
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
and r8 = r33, r30 //set up return value r8 = value & *x (new)
|
||||
mov r31 = r30 //set up old *x to be the new value if we need to loop again
|
||||
(p15) br.cond.dptk.few try_again3;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_Or(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
or r8 = r31, r33;; //generated r8 = (*x | value)
|
||||
|
||||
try_again4:
|
||||
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30
|
||||
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
or r8 = r33, r30 //set up return value r8 = value | *x (new)
|
||||
mov r31 = r30 //set up old *x to be the new value if we need to loop again
|
||||
(p15) br.cond.dptk.few try_again4;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_Xor(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_Xor:
|
||||
SST_Atomic_XorReturn:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
xor r8 = r31, r33;; //generated r8 = (*x | value)
|
||||
|
||||
try_again5:
|
||||
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30
|
||||
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
xor r8 = r33, r30 //set up return value r8 = value & *x (new)
|
||||
mov r31 = r30 //set up old *x to be the new value if we need to loop again
|
||||
(p15) br.cond.dptk.few try_again5;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_Not(volatile int* x);
|
||||
// x = r32
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
andcm r8 = -1, r31;; //generated r8 = ~(*x)
|
||||
|
||||
try_again6:
|
||||
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
|
||||
|
||||
mov ar.ccv = r30 //CCV <- r30 (in case compare failed)
|
||||
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
|
||||
andcm r8 = -1, r30 //set up return value r8 = ~*x (new)
|
||||
(p15) br.cond.dptk.few try_again6;; //branch if old *x != new *x
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_Exchange(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_Exchange:
|
||||
mf
|
||||
xchg4 r8 = [r32], r33
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// SST_Atomic_ExchangePtr(volatile int* x, int value);
|
||||
// x = r32, value = r33
|
||||
SST_Atomic_ExchangePtr:
|
||||
mf
|
||||
xchg8 r8 = [r32], r33
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// int SST_Atomic_ExchangeAdd(volatile int* x, int value)
|
||||
SST_Atomic_ExchangeAdd:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
add r30 = r31, r33;; //r30 = *x + value
|
||||
|
||||
try_again9:
|
||||
cmpxchg4.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
|
||||
mov ar.ccv = r8
|
||||
cmp4.ne.unc p15, p0 = r8, r31
|
||||
mov r31 = r8
|
||||
add r30 = r33, r8
|
||||
(p15) br.cond.dptk.few try_again9;;
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// int SST_Atomic_ExchangeAddPtr(volatile void** x, int value)
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
sxt4 r29 = r33;; //sign extend value to 64 bits
|
||||
mf
|
||||
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
|
||||
mov ar.ccv = r31 //Set CCV to be *x
|
||||
add r30 = r31, r29;; //r30 = *x + value
|
||||
|
||||
try_again10:
|
||||
cmpxchg8.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
|
||||
mov ar.ccv = r8
|
||||
cmp.ne.unc p15, p0 = r8, r31
|
||||
mov r31 = r8
|
||||
add r30 = r8, r29
|
||||
(p15) br.cond.dptk.few try_again10;;
|
||||
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// x = r32, compare = r33, newValue = r34
|
||||
SST_Atomic_CAS:
|
||||
mf
|
||||
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
mov ar.ccv = r33;; //Set CCV to be *x
|
||||
cmpxchg4.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// x = r32, compare = r33, newValue = r34
|
||||
SST_Atomic_CASPtr:
|
||||
mf
|
||||
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
|
||||
mov ar.ccv = r33;; //Set CCV to be *x
|
||||
cmpxchg8.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// int SST_Atomic_LoadAcquire(const volatile int* src);
|
||||
SST_Atomic_LoadAcquire:
|
||||
ld4.acq r8 = [r32];; //Load-with-acquire into return value register r8
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
ld8.acq r8 = [r32];; //Load-with-acquire into return value register r8
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
// void SST_Atomic_StoreRelease(volatile int* dest, int value);
|
||||
SST_Atomic_StoreRelease:
|
||||
st4.rel [r32] = r33;; //Store with release
|
||||
br.ret.sptk.many b0;;
|
||||
|
||||
|
||||
// void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
st8.rel [r32] = r33;; //Store with release
|
||||
br.ret.sptk.many b0;;
|
||||
219
libsst-atomic/SST_Atomic_mips.asm
Normal file
219
libsst-atomic/SST_Atomic_mips.asm
Normal file
@@ -0,0 +1,219 @@
|
||||
# SST_Atomic_mips.asm
|
||||
# Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
# Created: 1/15/2013
|
||||
#
|
||||
# Purpose:
|
||||
#
|
||||
# 32-bit assembly for atomic operations for MIPS CPUs. Targets MIPS32R2 ISA, but should run on
|
||||
# older MIPS machines (e.g. MIPS-III / Loongson).
|
||||
# Assembles with GNU as (doubtfully IRIX)
|
||||
#
|
||||
# License:
|
||||
#
|
||||
# This program is free software. It comes without any warranty, to
|
||||
# the extent permitted by applicable law. You can redistribute it
|
||||
# and/or modify it under the terms of the Do What The Fuck You Want
|
||||
# To Public License, Version 2, as published by Sam Hocevar. See
|
||||
# http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
.text
|
||||
|
||||
#Don't reorder instructions
|
||||
.set noreorder
|
||||
|
||||
# ELF symbol names
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
#===================================================================================
|
||||
# Since pointers and integers are the same size on 32-bit code (GCC), many of the
|
||||
# integer/pointer code paths are the same. This of course only holds true
|
||||
# for 32-bit compiles on MIPS, which is exactly this file.
|
||||
#===================================================================================
|
||||
|
||||
# MIPS has a "sync" instruction with a bitfield describing "what" to sync, sort of like SPARC's "membar" instruction.
|
||||
# By default, "sync" means "sync 0" -- a full memory barrier including I/O devices/instructions. This is a very heavy-weight operation and used for things like ordering MMIO accesses
|
||||
# MIPS32R2 defines "sync 0x10" as a full memory barrier that doesn't synchronize I/O, and "sync 0x04" as a "write/write" memory barrier. However,
|
||||
# we can't use MIPS32R2 code only. But as it would turn out, it is OK because "sync <k>" is treated like "sync 0" in older MIPS CPUs, meaning full barrier,
|
||||
# but it will automatically be faster on MIPS32R2 CPUs. Neat.
|
||||
|
||||
|
||||
# TODO: not sure if this is a CPU hazard on any remotely recent MIPS CPU (i.e. not MIPS-I or MIPS-II)
|
||||
# : beq $rs, $rt, someLabel //compare rs to rt
|
||||
# : addiu $rs, $rs, 1 //increment rs
|
||||
# I can't find any literature that says this is not allowed on anything other than MIPS-I, which has no interlocks.
|
||||
|
||||
# void SST_Atomic_Add(volatile int* x, int value)
|
||||
# void SST_Atomic_AddPtr(volatile void** x, int value)
|
||||
# int SST_Atomic_AddReturn(volatile int* x, int value)
|
||||
# void* SST_Atomic_AddPtrReturn(volatile void** x, int value)
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddReturn:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $t4, 0($a0) # t4 <- *x
|
||||
addu $t5, $t4, $a1 # t5 <- t4 + value
|
||||
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $t5,1b # if failure, retry
|
||||
addu $v0, $t4, $a1 # (delay slot) $v0 <- *x + value (return value)
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_And(volatile int* x, int value)
|
||||
# int SST_Atomic_AndReturn(volatile int* x, int value)
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $t4, 0($a0) # t4 <- *x
|
||||
and $t5, $t4, $a1 # t5 <- t4 & value
|
||||
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $t5,1b # if failure, retry
|
||||
and $v0, $t4, $a1 # (delay slot) compute return value
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Or(volatile int* x, int value)
|
||||
# int SST_Atomic_OrReturn(volatile int* x, int value)
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $t4, 0($a0) # t4 <- *x
|
||||
or $t5, $t4, $a1 # t5 <- t4 | value
|
||||
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $t5,1b # if failure, retry
|
||||
or $v0, $t4, $a1 # (delay slot) compute return value
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Xor(volatile int* x, int value)
|
||||
# int SST_Atomic_XorReturn(volatile int* x, int value)
|
||||
SST_Atomic_XorReturn:
|
||||
SST_Atomic_Xor:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $t4, 0($a0) # t4 <- *x
|
||||
xor $t5, $t4, $a1 # t5 <- t4 ^ value
|
||||
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $t5,1b # if failure, retry
|
||||
xor $v0, $t4, $a1 # (delay slot) Nothing to do
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Not(volatile int* x)
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $t4, 0($a0) # t4 <- *x
|
||||
nor $t5, $t4, $0 # t5 <- ~t4
|
||||
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $t5,1b # if failure, retry
|
||||
nor $v0, $t4, $0 # (delay slot) compute return value
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
# int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
# void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
SST_Atomic_Exchange:
|
||||
SST_Atomic_ExchangePtr:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
move $t4, $a1 # t4 <- value
|
||||
|
||||
1:
|
||||
ll $v0, 0($a0) # v0 <- *x
|
||||
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $t4,1b # if failure, retry
|
||||
move $t4, $a1 # (delay slot) reload t4 <- value for retry
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
# int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
# void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
SST_Atomic_ExchangeAdd:
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $v0, 0($a0) # v0 <- *x
|
||||
addu $t4, $v0, $a1 # t4 <- v0 + value
|
||||
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $t4,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
jr $ra # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
# int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
# void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
# a0 = dest, a1 = compare, a2 = newValue
|
||||
SST_Atomic_CAS:
|
||||
SST_Atomic_CASPtr:
|
||||
sync 0x10 # sync_mb
|
||||
1:
|
||||
move $t4, $a2 # t4 <- newValue
|
||||
ll $v0, 0($a0) # v0 <- *dest (captures "old" value of *dest, also is the return value)
|
||||
bne $v0, $a1, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
|
||||
nop # (delay slot) do nothing
|
||||
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $t4,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
sync 0x04 # sync_wmb
|
||||
|
||||
# Return
|
||||
3:
|
||||
jr $ra # return
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
SST_Atomic_LoadAcquire:
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
lw $v0, 0($a0)
|
||||
sync 0x11 # sync_acquire: LoadLoad | LoadStore
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
SST_Atomic_StoreRelease:
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
sync 0x12 # sync_release: LoadStore | StoreStore
|
||||
jr $ra
|
||||
sw $a1, 0($a0)
|
||||
|
||||
277
libsst-atomic/SST_Atomic_mips64.asm
Normal file
277
libsst-atomic/SST_Atomic_mips64.asm
Normal file
@@ -0,0 +1,277 @@
|
||||
# SST_Atomic_mips64.asm
|
||||
# Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
# Created: 7/1/2013
|
||||
#
|
||||
# Purpose:
|
||||
#
|
||||
# 64-bit assembly for atomic operations for MIPS CPUs. Targets MIPS64R2 ISA, but should run on
|
||||
# older MIPS machines (e.g. MIPS-III / Loongson).
|
||||
# Assembles with GNU as (doubtfully IRIX)
|
||||
#
|
||||
# License:
|
||||
#
|
||||
# This program is free software. It comes without any warranty, to
|
||||
# the extent permitted by applicable law. You can redistribute it
|
||||
# and/or modify it under the terms of the Do What The Fuck You Want
|
||||
# To Public License, Version 2, as published by Sam Hocevar. See
|
||||
# http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
.text
|
||||
|
||||
#Don't reorder instructions
|
||||
.set noreorder
|
||||
|
||||
# ELF symbol names
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
# Unbelievable. binutils-2.22 doesn't support register names in n64 (-mabi=64) mode, but it does in o32 (-mabi=32)
|
||||
# Here are the symbolic names for registers used
|
||||
# $v0-$v1 = $2-$3
|
||||
# $a0-$a4 = $4-$7
|
||||
# $t4-$t5 = $8-$9
|
||||
# $ra = $31
|
||||
|
||||
|
||||
# MIPS has a "sync" instruction with a bitfield describing "what" to sync, sort of like SPARC's "membar" instruction.
|
||||
# By default, "sync" means "sync 0" -- a full memory barrier including I/O devices/instructions. This is a very heavy-weight operation and used for things like ordering MMIO accesses
|
||||
# MIPS64R2 defines "sync 0x10" as a full memory barrier that doesn't synchronize I/O, and "sync 0x04" as a "write/write" memory barrier. However,
|
||||
# we can't use MIPS64R2 code only. But as it would turn out, it is OK because "sync <k>" is treated like "sync 0" in older MIPS CPUs, meaning full barrier,
|
||||
# but it will automatically be faster on MIPS32R2 CPUs. Neat.
|
||||
|
||||
# void SST_Atomic_Add(volatile int* x, int value)
|
||||
# int SST_Atomic_AddReturn(volatile int* x, int value)
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $12, 0($4) # t4 <- *x
|
||||
addu $13, $12, $5 # t5 <- t4 + value
|
||||
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
addu $2, $12, $5 # (delay slot) $2 <- *x + value (return value)
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_AddPtr(volatile void** x, int value)
|
||||
# void* SST_Atomic_AddPtrReturn(volatile void** x, int value)
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
sync 0x10 # sync_mb
|
||||
1:
|
||||
lld $12, 0($4) # t4 <- *x
|
||||
daddu $13, $12, $5 # t5 <- t4 + value
|
||||
scd $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
daddu $2, $12, $5 # (delay slot) $2 <- *x + value (return value)
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
|
||||
# void SST_Atomic_And(volatile int* x, int value)
|
||||
# int SST_Atomic_AndReturn(volatile int* x, int value)
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $12, 0($4) # t4 <- *x
|
||||
and $13, $12, $5 # t5 <- t4 & value
|
||||
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
and $2, $12, $5 # (delay slot) compute return value
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Or(volatile int* x, int value)
|
||||
# int SST_Atomic_OrReturn(volatile int* x, int value)
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $12, 0($4) # t4 <- *x
|
||||
or $13, $12, $5 # t5 <- t4 | value
|
||||
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
or $2, $12, $5 # (delay slot) compute return value
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Xor(volatile int* x, int value)
|
||||
# int SST_Atomic_XorReturn(volatile int* x, int value)
|
||||
SST_Atomic_XorReturn:
|
||||
SST_Atomic_Xor:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $12, 0($4) # t4 <- *x
|
||||
xor $13, $12, $5 # t5 <- t4 ^ value
|
||||
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
xor $2, $12, $5 # (delay slot) Nothing to do
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void SST_Atomic_Not(volatile int* x)
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $12, 0($4) # t4 <- *x
|
||||
nor $13, $12, $0 # t5 <- ~t4
|
||||
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
|
||||
beqz $13,1b # if failure, retry
|
||||
nor $2, $12, $0 # (delay slot) compute return value
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
# int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
SST_Atomic_Exchange:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
move $12, $5 # t4 <- value
|
||||
|
||||
1:
|
||||
ll $2, 0($4) # v0 <- *x
|
||||
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
move $12, $5 # (delay slot) reload t4 <- value for retry
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
SST_Atomic_ExchangePtr:
|
||||
sync 0x10 # sync_mb
|
||||
move $12, $5 # value for exchange
|
||||
1:
|
||||
lld $2, 0($4) # v0 <- *x
|
||||
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
move $12, $5 # (delay slot) t4 <- value for retry
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
SST_Atomic_ExchangeAdd:
|
||||
sync 0x10 # sync_mb
|
||||
|
||||
1:
|
||||
ll $2, 0($4) # v0 <- *x
|
||||
addu $12, $2, $5 # t4 <- v0 + value
|
||||
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
# void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
sync 0x10 # sync_mb
|
||||
1:
|
||||
lld $2, 0($4) # v0 <- *x
|
||||
daddu $12, $2, $5 # t4 <- v0 + value
|
||||
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
jr $31 # return
|
||||
sync 0x04 # (delay slot) sync_wmb
|
||||
|
||||
|
||||
|
||||
# int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
# a0 = dest, a1 = compare, a2 = newValue
|
||||
SST_Atomic_CAS:
|
||||
sync 0x10 # sync_mb
|
||||
1:
|
||||
move $12, $6 # t4 <- newValue
|
||||
ll $2, 0($4) # v0 <- *dest (captures "old" value of *dest, also is the return value)
|
||||
bne $2, $5, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
|
||||
nop # (delay slot) do nothing
|
||||
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
sync 0x04 # sync_wmb
|
||||
|
||||
# Return
|
||||
3:
|
||||
jr $31 # return
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
# void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
# a0 = dest, a1 = compare, a2 = newValue
|
||||
SST_Atomic_CASPtr:
|
||||
sync 0x10 # sync_mb
|
||||
1:
|
||||
move $12, $6 # t4 <- newValue
|
||||
lld $2, 0($4) # v0 <- *dest (captures "old" value of *dest, also is the return value)
|
||||
bne $2, $5, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
|
||||
nop # (delay slot) do nothing
|
||||
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
|
||||
beqz $12,1b # if failure, retry
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
sync 0x04 # sync_wmb
|
||||
|
||||
# Return
|
||||
3:
|
||||
jr $31 # return
|
||||
nop # (delay slot) do nothing
|
||||
|
||||
SST_Atomic_LoadAcquire:
|
||||
lw $2, 0($4)
|
||||
sync 0x11 # sync_acquire: LoadLoad | LoadStore
|
||||
jr $31
|
||||
nop
|
||||
|
||||
SST_Atomic_StoreRelease:
|
||||
sync 0x12 # sync_release: LoadStore | StoreStore
|
||||
jr $31
|
||||
sw $5, 0($4)
|
||||
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
ld $2, 0($4)
|
||||
sync 0x11 # sync_acquire: LoadLoad | LoadStore
|
||||
jr $31
|
||||
nop
|
||||
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
sync 0x12 # sync_release: LoadStore | StoreStore
|
||||
jr $31
|
||||
sd $5, 0($4)
|
||||
|
||||
201
libsst-atomic/SST_Atomic_ppc.asm
Normal file
201
libsst-atomic/SST_Atomic_ppc.asm
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
SST_Atomic_ppc.asm
|
||||
Author: Patrick Baggett
|
||||
Created: 7/20/2012
|
||||
|
||||
Purpose:
|
||||
|
||||
32-bit assembly for Power Architecture processors.
|
||||
Assembles with GNU as.
|
||||
|
||||
License:
|
||||
|
||||
This program is free software. It comes without any warranty, to
|
||||
the extent permitted by applicable law. You can redistribute it
|
||||
and/or modify it under the terms of the Do What The Fuck You Want
|
||||
To Public License, Version 2, as published by Sam Hocevar. See
|
||||
http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
*/
|
||||
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
/*
|
||||
This is for 32-bit Power arch, so the pointer versions are the same as
|
||||
the non-pointer versions.
|
||||
These are all leaf functions, so r3-r11 are legal to use.
|
||||
*/
|
||||
|
||||
/* void SST_Atomic_Add(volatile int* x, int value) */
|
||||
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
|
||||
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
|
||||
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddReturn:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r6, 0, r5 /* r6 = *x */
|
||||
add r3, r6, r4 /* return_value = r6 + value */
|
||||
add r6, r6, r4 /* r6 += value */
|
||||
stwcx. r6, 0, r5 /* try { *x = sum } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
|
||||
/* void SST_Atomic_And(volatile int* x, int value) */
|
||||
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r6, 0, r5 /* r6 = *x */
|
||||
and r3, r6, r4 /* return_value = r6 & value */
|
||||
and r6, r6, r4 /* r6 &= value */
|
||||
stwcx. r6, 0, r5 /* try { *x = result } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
|
||||
|
||||
/* void SST_Atomic_Or(volatile int* x, int value) */
|
||||
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r6, 0, r5 /* r6 = *x */
|
||||
or r3, r6, r4 /* return_value = r6 | value */
|
||||
or r6, r6, r4 /* r6 |= value */
|
||||
stwcx. r6, 0, r5 /* try { *x = result } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* void SST_Atomic_Xor(volatile int* x, int value) */
|
||||
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Xor:
|
||||
SST_Atomic_XorReturn:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r6, 0, r5 /* r6 = *x */
|
||||
xor r3, r6, r4 /* return_value = r6 ^ value */
|
||||
xor r6, r6, r4 /* r6 ^= value */
|
||||
stwcx. r6, 0, r5 /* try { *x = result } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* void SST_Atomic_Not(volatile int* x) */
|
||||
/* int SST_Atomic_NotReturn(volatile int* x) */
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
mr r4, r3 /* r4 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r3, 0, r4 /* r3 = *x */
|
||||
nor r3, r3, r3 /* r5 = ~r5 */
|
||||
stwcx. r3, 0, r4 /* try { *x = sum } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* int SST_Atomic_Exchange(volatile int* x, int value) */
|
||||
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
|
||||
SST_Atomic_Exchange:
|
||||
SST_Atomic_ExchangePtr:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r3, 0, r5 /* try { return_value = *x */
|
||||
stwcx. r4, 0, r5 /* *x = value } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
|
||||
SST_Atomic_CAS:
|
||||
SST_Atomic_CASPtr:
|
||||
mr r6, r3 /* r6 = dest */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r3, 0, r6 /* return_value = *dest */
|
||||
cmpw r4, r3 /* if(return_value != compare) */
|
||||
bne- 2f /* return return_value; */
|
||||
stwcx. r5, 0, r6 /* else { *dest = newValue } */
|
||||
bne- 1b /* retry? */
|
||||
isync /* order future L/S instructions */
|
||||
2:
|
||||
blr
|
||||
|
||||
/* void* SST_Atomic_ExchangeAdd(volatile void* x, int value) */
|
||||
/* int SST_Atomic_(volatile int* x, int value) */
|
||||
SST_Atomic_ExchangeAdd:
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
mr r5, r3 /* r5 = x */
|
||||
sync /* mem barrier */
|
||||
|
||||
1:
|
||||
lwarx r3, 0, r5 /* r3 = *x */
|
||||
add r6, r3, r4 /* r6 = *x + value */
|
||||
stwcx. r6, 0, r5 /* try { *x = sum } */
|
||||
bne- 1b /* retry? */
|
||||
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
|
||||
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
|
||||
SST_Atomic_LoadAcquire:
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
lw r3, 0, r3
|
||||
isync /* order future L/S instructions */
|
||||
blr
|
||||
|
||||
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
|
||||
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
|
||||
SST_Atomic_StoreRelease:
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
lwsync
|
||||
stw r4, 0, r3
|
||||
blr
|
||||
214
libsst-atomic/SST_Atomic_sparc.asm
Normal file
214
libsst-atomic/SST_Atomic_sparc.asm
Normal file
@@ -0,0 +1,214 @@
|
||||
/*
|
||||
SST_Atomic_sparc.asm
|
||||
Author: Patrick Baggett
|
||||
Created: 12/28/2011
|
||||
|
||||
Purpose:
|
||||
|
||||
32-bit assembly for SPARCv9 processors (aka v8+)
|
||||
Older SPARC CPUs (e.g. sun4m) are not supported.
|
||||
Assembles with Solaris assembler or GNU as.
|
||||
|
||||
License:
|
||||
|
||||
This program is free software. It comes without any warranty, to
|
||||
the extent permitted by applicable law. You can redistribute it
|
||||
and/or modify it under the terms of the Do What The Fuck You Want
|
||||
To Public License, Version 2, as published by Sam Hocevar. See
|
||||
http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
*/
|
||||
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
/*
|
||||
This is for 32-bit SPARC code, so the pointer versions are the same as
|
||||
the non-pointer versions.
|
||||
These are all leaf functions, so %o0-%o5 and %g1 are legal to use.
|
||||
*/
|
||||
|
||||
/* void SST_Atomic_Add(volatile int* x, int value) */
|
||||
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
|
||||
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
|
||||
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddReturn:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
add %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
|
||||
|
||||
/* void SST_Atomic_And(volatile int* x, int value) */
|
||||
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
and %o1, %o4, %o5 /* o5 = *x & value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
and %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
|
||||
|
||||
/* void SST_Atomic_Or(volatile int* x, int value) */
|
||||
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
or %o1, %o4, %o5 /* o5 = *x | value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
or %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
|
||||
/* void SST_Atomic_Xor(volatile int* x, int value) */
|
||||
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Xor:
|
||||
SST_Atomic_XorReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
xor %o1, %o4, %o5 /* o5 = *x | value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
xor %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
/* void SST_Atomic_Not(volatile int* x) */
|
||||
/* int SST_Atomic_NotReturn(volatile int* x) */
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
not %o4, %o5 /* o5 = ~*x */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
not %o4, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
/* int SST_Atomic_Exchange(volatile int* x, int value) */
|
||||
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
|
||||
SST_Atomic_Exchange:
|
||||
SST_Atomic_ExchangePtr:
|
||||
swap [%o0], %o1 /* swap(*x, value) */
|
||||
retl
|
||||
mov %o1, %o0 /* move return value to %o0 */
|
||||
/* NOTE: SPARCv9 manual calls the "swap" instruction deprecated. I don't
|
||||
understand why. "cas" could fail, when really, we don't care if the
|
||||
value changed, we care about ensuring that only 1 thread gets that value,
|
||||
i.e. if 3 threads execute "swap" at the same time using 3 different values,
|
||||
it is undefined which value is the final value, but each thread gets a
|
||||
different value (thus, it is atomic).
|
||||
*/
|
||||
|
||||
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
|
||||
SST_Atomic_CAS:
|
||||
SST_Atomic_CASPtr:
|
||||
cas [%o0], %o1, %o2
|
||||
retl
|
||||
mov %o2, %o0
|
||||
|
||||
/* void* SST_Atomic_ExchangeAdd(volatile void* x, int value) */
|
||||
/* int SST_Atomic_(volatile int* x, int value) */
|
||||
SST_Atomic_ExchangeAdd:
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
|
||||
|
||||
retl /* Success */
|
||||
mov %o4, %o0 /* %o4 = value before add */
|
||||
|
||||
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
|
||||
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
|
||||
SST_Atomic_LoadAcquire:
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
ld [%o0], %o0
|
||||
membar #LoadStore | #LoadLoad
|
||||
retl
|
||||
nop
|
||||
|
||||
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
|
||||
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
|
||||
SST_Atomic_StoreRelease:
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
membar #LoadStore | #StoreStore
|
||||
retl
|
||||
st %o1, [%o0]
|
||||
254
libsst-atomic/SST_Atomic_sparc64.asm
Normal file
254
libsst-atomic/SST_Atomic_sparc64.asm
Normal file
@@ -0,0 +1,254 @@
|
||||
/*
|
||||
SST_Atomic_sparc64.asm
|
||||
Author: Patrick Baggett
|
||||
Created: 4/3/2012
|
||||
|
||||
Purpose:
|
||||
|
||||
64-bit assembly for SPARCv9 processors
|
||||
Assembles with Solaris assembler or GNU as.
|
||||
|
||||
License:
|
||||
|
||||
This program is free software. It comes without any warranty, to
|
||||
the extent permitted by applicable law. You can redistribute it
|
||||
and/or modify it under the terms of the Do What The Fuck You Want
|
||||
To Public License, Version 2, as published by Sam Hocevar. See
|
||||
http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
*/
|
||||
|
||||
.global SST_Atomic_Add
|
||||
.global SST_Atomic_AddPtr
|
||||
.global SST_Atomic_And
|
||||
.global SST_Atomic_Or
|
||||
.global SST_Atomic_Xor
|
||||
.global SST_Atomic_Not
|
||||
.global SST_Atomic_AddReturn
|
||||
.global SST_Atomic_AddPtrReturn
|
||||
.global SST_Atomic_AndReturn
|
||||
.global SST_Atomic_OrReturn
|
||||
.global SST_Atomic_XorReturn
|
||||
.global SST_Atomic_NotReturn
|
||||
.global SST_Atomic_ExchangeAdd
|
||||
.global SST_Atomic_ExchangeAddPtr
|
||||
.global SST_Atomic_Exchange
|
||||
.global SST_Atomic_ExchangePtr
|
||||
.global SST_Atomic_CAS
|
||||
.global SST_Atomic_CASPtr
|
||||
.global SST_Atomic_LoadAcquire
|
||||
.global SST_Atomic_LoadAcquirePtr
|
||||
.global SST_Atomic_StoreRelease
|
||||
.global SST_Atomic_StoreReleasePtr
|
||||
|
||||
/*
|
||||
This is for 64-bit SPARC code, so the pointer versions are different
|
||||
from the non-pointer versions.
|
||||
These are all leaf functions, so %o0-%o5 and %g1 are legal to use.
|
||||
|
||||
On TSO model, only "membar #StoreLoad" is a true barrier, the
|
||||
rest are no-ops. This code assumes RMO.
|
||||
*/
|
||||
|
||||
/* void SST_Atomic_Add(volatile int* x, int value) */
|
||||
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Add:
|
||||
SST_Atomic_AddReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
add %o1, %o5, %o0 /* return sum */
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
|
||||
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
ldx [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
casx [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %xcc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
add %o1, %o5, %o0 /* return sum */
|
||||
|
||||
/* void SST_Atomic_And(volatile int* x, int value) */
|
||||
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
|
||||
SST_Atomic_And:
|
||||
SST_Atomic_AndReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
and %o1, %o4, %o5 /* o5 = *x & value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
and %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
/* void SST_Atomic_Or(volatile int* x, int value) */
|
||||
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Or:
|
||||
SST_Atomic_OrReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
or %o1, %o4, %o5 /* o5 = *x | value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
or %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
|
||||
/* void SST_Atomic_Xor(volatile int* x, int value) */
|
||||
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
|
||||
SST_Atomic_Xor:
|
||||
SST_Atomic_XorReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
xor %o1, %o4, %o5 /* o5 = *x | value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
xor %o1, %o5, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
/* void SST_Atomic_Not(volatile int* x) */
|
||||
/* int SST_Atomic_NotReturn(volatile int* x) */
|
||||
SST_Atomic_Not:
|
||||
SST_Atomic_NotReturn:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
not %o4, %o5 /* o5 = ~*x */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
not %o4, %o0 /* could do nop, but delay slot is free.
|
||||
then we can make the "Return" versions
|
||||
use the same code path */
|
||||
|
||||
/* int SST_Atomic_Exchange(volatile int* x, int value) */
|
||||
SST_Atomic_Exchange:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
mov %o1, %o5 /* o5 = value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
membar #StoreStore | #LoadStore /* Ensure ordering */
|
||||
|
||||
retl /* Success */
|
||||
mov %o4, %o0
|
||||
|
||||
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
|
||||
SST_Atomic_ExchangePtr:
|
||||
ldx [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
mov %o1, %o5 /* o5 = value */
|
||||
casx [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %xcc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
membar #StoreStore | #LoadStore /* Ensure ordering */
|
||||
|
||||
retl /* Success */
|
||||
mov %o4, %o0
|
||||
|
||||
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
|
||||
SST_Atomic_CAS:
|
||||
cas [%o0], %o1, %o2
|
||||
retl
|
||||
mov %o2, %o0
|
||||
|
||||
SST_Atomic_CASPtr:
|
||||
casx [%o0], %o1, %o2
|
||||
retl
|
||||
mov %o2, %o0
|
||||
|
||||
/* void* SST_Atomic_ExchangeAdd(volatile int* x, int value) */
|
||||
SST_Atomic_ExchangeAdd:
|
||||
ld [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
cas [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %icc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
mov %o4, %o0 /* %o4 = value before add */
|
||||
|
||||
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
ldx [%o0], %o4 /* o4 = *x */
|
||||
1:
|
||||
add %o1, %o4, %o5 /* o5 = *x + value */
|
||||
casx [%o0], %o4, %o5 /* compare and swap */
|
||||
|
||||
cmp %o4, %o5 /* compare what was in mem with our value */
|
||||
bne,a,pn %xcc, 1b /* not the same -> try again. */
|
||||
mov %o5, %o4 /* restore o4, it should contain *x */
|
||||
|
||||
retl /* Success */
|
||||
mov %o4, %o0 /* %o4 = value before add */
|
||||
|
||||
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
|
||||
SST_Atomic_LoadAcquire:
|
||||
ld [%o0], %o0
|
||||
membar #LoadStore | #LoadLoad
|
||||
retl
|
||||
nop
|
||||
|
||||
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
ldx [%o0], %o0
|
||||
membar #LoadStore | #LoadLoad
|
||||
retl
|
||||
nop
|
||||
|
||||
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
|
||||
SST_Atomic_StoreRelease:
|
||||
membar #LoadStore | #StoreStore
|
||||
retl
|
||||
st %o1, [%o0]
|
||||
|
||||
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
membar #LoadStore | #StoreStore
|
||||
retl
|
||||
stx %o1, [%o0]
|
||||
237
libsst-atomic/SST_Atomic_x86-64-win64.asm
Normal file
237
libsst-atomic/SST_Atomic_x86-64-win64.asm
Normal file
@@ -0,0 +1,237 @@
|
||||
; SST_Atomic_x86-64-win64.asm
|
||||
; Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
; Created: 12/27/2011
|
||||
;
|
||||
; Purpose:
|
||||
;
|
||||
; 64-bit assembly for atomic operations using the Microsoft x64 ABI
|
||||
; Assembles with YASM 1.1/NASM 2.0+
|
||||
;
|
||||
; License:
|
||||
;
|
||||
; This program is free software. It comes without any warranty, to
|
||||
; the extent permitted by applicable law. You can redistribute it
|
||||
; and/or modify it under the terms of the Do What The Fuck You Want
|
||||
; To Public License, Version 2, as published by Sam Hocevar. See
|
||||
; http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
|
||||
; Win64 and UNIX calling conventions differ for x64-64. I know, it's absurd.
|
||||
; Because of this, there are separate implementation for Win64 and x64-64 UNIX (Linux, *BSD, Solaris, MacOS X, etc.)
|
||||
; Here is the break down:
|
||||
;
|
||||
; REGISTER | Win64 | UNIX
|
||||
;-----------+-----------+----------
|
||||
;rax | Retval | Retval
|
||||
;rdi | Not used | 1st arg
|
||||
;rsi | Not used | 2nd arg
|
||||
;rcx | 1st arg | 3rd arg
|
||||
;rdx | 2nd arg | 4th arg
|
||||
;r8 | 3rd arg | 5th arg
|
||||
;r9 | 4th arg | 6th arg
|
||||
|
||||
[bits 64]
|
||||
[segment .text]
|
||||
|
||||
; Win32 symbol names
|
||||
[global SST_Atomic_Add]
|
||||
[global SST_Atomic_AddPtr]
|
||||
[global SST_Atomic_And]
|
||||
[global SST_Atomic_Or]
|
||||
[global SST_Atomic_Xor]
|
||||
[global SST_Atomic_Not]
|
||||
[global SST_Atomic_AddReturn]
|
||||
[global SST_Atomic_AddPtrReturn]
|
||||
[global SST_Atomic_AndReturn]
|
||||
[global SST_Atomic_OrReturn]
|
||||
[global SST_Atomic_XorReturn]
|
||||
[global SST_Atomic_NotReturn]
|
||||
[global SST_Atomic_ExchangeAdd]
|
||||
[global SST_Atomic_ExchangeAddPtr]
|
||||
[global SST_Atomic_Exchange]
|
||||
[global SST_Atomic_ExchangePtr]
|
||||
[global SST_Atomic_CAS]
|
||||
[global SST_Atomic_CASPtr]
|
||||
[global SST_Atomic_LoadAcquire]
|
||||
[global SST_Atomic_LoadAcquirePtr]
|
||||
[global SST_Atomic_StoreRelease]
|
||||
[global SST_Atomic_StoreReleasePtr]
|
||||
|
||||
;=======================================================================
|
||||
; Pointers and integers are different sized (64 bits vs 32 bits) so
|
||||
; each SST_Atomic_XxxxPtr() function uses the full 64-bit registers.
|
||||
;=======================================================================
|
||||
|
||||
|
||||
;====================================================
|
||||
; MICROSOFT WIN64 ABI
|
||||
;====================================================
|
||||
|
||||
;void SST_Atomic_Add(volatile int* x, int value)
|
||||
SST_Atomic_Add:
|
||||
lock add [rcx], edx
|
||||
ret
|
||||
|
||||
;void SST_Atomic_AddPtr(volatile void** x, int value)
|
||||
SST_Atomic_AddPtr:
|
||||
lock add [rcx], rdx
|
||||
ret
|
||||
|
||||
; void SST_Atomic_And(volatile int* x, int value)
|
||||
SST_Atomic_And:
|
||||
lock and [rcx], edx
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Or(volatile int* x, int value)
|
||||
SST_Atomic_Or:
|
||||
lock or [rcx], edx
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Xor(volatile int* x, int value)
|
||||
SST_Atomic_Xor:
|
||||
lock xor [rcx], edx
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Not(volatile int* x)
|
||||
SST_Atomic_Not:
|
||||
lock not DWORD [rcx]
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AddReturn(volatile int* x, int value)
|
||||
SST_Atomic_AddReturn:
|
||||
mov eax, edx
|
||||
lock xadd [rcx], eax ; eax contains orig value before xadd
|
||||
add eax, edx ; now add 'value' (in edx) to get the sum that was stored in *x
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AddReturn(volatile void** x, int value)
|
||||
SST_Atomic_AddPtrReturn:
|
||||
mov rax, rdx
|
||||
lock xadd [rcx], rax ; rax contains orig value before xadd
|
||||
add rax, rdx ; now add 'value' (in rdx) to get the sum that was stored in *x
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AndReturn(volatile int* x, int value)
|
||||
SST_Atomic_AndReturn:
|
||||
mov eax, [rcx] ;eax = *x
|
||||
mov r8d, eax ;r8d = *x
|
||||
try_again_5:
|
||||
and r8d, edx ;r8d <- *x & value
|
||||
|
||||
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
|
||||
jz done_5 ;success -> *x was successfully updated. Its new value is in r8d.
|
||||
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
|
||||
jmp try_again_5
|
||||
done_5:
|
||||
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
SST_Atomic_OrReturn:
|
||||
mov eax, [rcx] ;eax = *x
|
||||
mov r8d, eax ;r8d = *x
|
||||
try_again_6:
|
||||
or r8d, edx ;r8d <- *x | value
|
||||
|
||||
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
|
||||
jz done_6 ;success -> *x was successfully updated. Its new value is in r8d.
|
||||
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
|
||||
jmp try_again_6
|
||||
done_6:
|
||||
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
SST_Atomic_XorReturn:
|
||||
mov eax, [rcx] ;eax = *x
|
||||
mov r8d, eax ;r8d = *x
|
||||
try_again_7:
|
||||
xor r8d, edx ;r8d <- *x ^ value
|
||||
|
||||
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
|
||||
jz done_7 ;success -> *x was successfully updated. Its new value is in r8d.
|
||||
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
|
||||
jmp try_again_7
|
||||
done_7:
|
||||
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
; int SST_Atomic_NotReturn(volatile int* x)
|
||||
SST_Atomic_NotReturn:
|
||||
mov eax, [rcx] ;eax = *x
|
||||
mov r8d, eax ;r8d = *x
|
||||
try_again_8:
|
||||
not r8d ;r8d <- ~*x
|
||||
|
||||
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
|
||||
jz done_8 ;success -> *x was successfully updated. Its new value is in r8d.
|
||||
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
|
||||
jmp try_again_8
|
||||
done_8:
|
||||
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
;int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
SST_Atomic_Exchange:
|
||||
lock xchg [rcx], edx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
SST_Atomic_ExchangePtr:
|
||||
lock xchg [rcx], rdx
|
||||
mov rax, rdx
|
||||
ret
|
||||
|
||||
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
SST_Atomic_ExchangeAdd:
|
||||
lock xadd [rcx], edx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
lock xadd [rcx], rdx
|
||||
mov rax, rdx
|
||||
ret
|
||||
|
||||
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
SST_Atomic_CAS:
|
||||
;rcx <- dest
|
||||
;edx <- compare
|
||||
;r8d <- newValue
|
||||
mov eax, edx
|
||||
lock cmpxchg [rcx], r8d ;compare [rcx] with eax | swap with r8d if equal, else store [rcx] into eax
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
SST_Atomic_CASPtr:
|
||||
;rcx <- dest
|
||||
;rdx <- compare
|
||||
;r8 <- newValue
|
||||
mov rax, rdx
|
||||
lock cmpxchg [rcx], r8 ;compare [rcx] with rax | swap with r8 if equal, else store [rcx] into eax
|
||||
ret
|
||||
|
||||
; int SST_Atomic_LoadAcquire(const volatile int* src);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_LoadAcquire:
|
||||
mov eax, [rcx]
|
||||
ret
|
||||
|
||||
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
mov rax, [rcx]
|
||||
ret
|
||||
|
||||
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_StoreRelease:
|
||||
mov [rcx], edx
|
||||
ret
|
||||
|
||||
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
mov [rcx], rdx
|
||||
ret
|
||||
|
||||
285
libsst-atomic/SST_Atomic_x86-64.asm
Normal file
285
libsst-atomic/SST_Atomic_x86-64.asm
Normal file
@@ -0,0 +1,285 @@
|
||||
; SST_Atomic_x86-64.asm
|
||||
; Author: Patrick Baggett <ptb1@762studios.com>
|
||||
; Created: 12/20/2011
|
||||
;
|
||||
; Purpose:
|
||||
;
|
||||
; 64-bit assembly for atomic operations using the SysV x86-64 ABI
|
||||
; Assembles with YASM 1.1/NASM 2.0+
|
||||
;
|
||||
; License:
|
||||
;
|
||||
; This program is free software. It comes without any warranty, to
|
||||
; the extent permitted by applicable law. You can redistribute it
|
||||
; and/or modify it under the terms of the Do What The Fuck You Want
|
||||
; To Public License, Version 2, as published by Sam Hocevar. See
|
||||
; http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
; Win64 and UNIX calling conventions differ for x64-64. I know, it's absurd.
|
||||
; Because of this, there are separate implementation for Win64 and x64-64 UNIX (Linux, *BSD, Solaris, MacOS X, etc.)
|
||||
; Here is the break down:
|
||||
;
|
||||
; REGISTER | Win64 | UNIX
|
||||
;-----------+-----------+----------
|
||||
;rax | Retval | Retval
|
||||
;rdi | Not used | 1st arg
|
||||
;rsi | Not used | 2nd arg
|
||||
;rcx | 1st arg | 3rd arg
|
||||
;rdx | 2nd arg | 4th arg
|
||||
;r8 | 3rd arg | 5th arg
|
||||
;r9 | 4th arg | 6th arg
|
||||
|
||||
[bits 64]
|
||||
[segment .text]
|
||||
|
||||
; ELF symbol names (UNIX)
|
||||
[global SST_Atomic_Add]
|
||||
[global SST_Atomic_AddPtr]
|
||||
[global SST_Atomic_And]
|
||||
[global SST_Atomic_Or]
|
||||
[global SST_Atomic_Xor]
|
||||
[global SST_Atomic_Not]
|
||||
[global SST_Atomic_AddReturn]
|
||||
[global SST_Atomic_AddPtrReturn]
|
||||
[global SST_Atomic_AndReturn]
|
||||
[global SST_Atomic_OrReturn]
|
||||
[global SST_Atomic_XorReturn]
|
||||
[global SST_Atomic_NotReturn]
|
||||
[global SST_Atomic_ExchangeAdd]
|
||||
[global SST_Atomic_ExchangeAddPtr]
|
||||
[global SST_Atomic_Exchange]
|
||||
[global SST_Atomic_ExchangePtr]
|
||||
[global SST_Atomic_CAS]
|
||||
[global SST_Atomic_CASPtr]
|
||||
[global SST_Atomic_LoadAcquire]
|
||||
[global SST_Atomic_LoadAcquirePtr]
|
||||
[global SST_Atomic_StoreRelease]
|
||||
[global SST_Atomic_StoreReleasePtr]
|
||||
|
||||
|
||||
; Mach-O symbol names (MacOS X)
|
||||
[global _SST_Atomic_Add]
|
||||
[global _SST_Atomic_AddPtr]
|
||||
[global _SST_Atomic_And]
|
||||
[global _SST_Atomic_Or]
|
||||
[global _SST_Atomic_Xor]
|
||||
[global _SST_Atomic_Not]
|
||||
[global _SST_Atomic_AddReturn]
|
||||
[global _SST_Atomic_AddPtrReturn]
|
||||
[global _SST_Atomic_AndReturn]
|
||||
[global _SST_Atomic_OrReturn]
|
||||
[global _SST_Atomic_XorReturn]
|
||||
[global _SST_Atomic_NotReturn]
|
||||
[global _SST_Atomic_ExchangeAdd]
|
||||
[global _SST_Atomic_ExchangeAddPtr]
|
||||
[global _SST_Atomic_Exchange]
|
||||
[global _SST_Atomic_ExchangePtr]
|
||||
[global _SST_Atomic_CAS]
|
||||
[global _SST_Atomic_CASPtr]
|
||||
[global _SST_Atomic_LoadAcquire]
|
||||
[global _SST_Atomic_LoadAcquirePtr]
|
||||
[global _SST_Atomic_StoreRelease]
|
||||
[global _SST_Atomic_StoreReleasePtr]
|
||||
|
||||
|
||||
;=======================================================================
|
||||
; Pointers and integers are different sized (64 bits vs 32 bits) so
|
||||
; each SST_Atomic_XxxxPtr() function uses the full 64-bit registers.
|
||||
;=======================================================================
|
||||
|
||||
;====================================================
|
||||
; UNIX/ELF ABI
|
||||
;====================================================
|
||||
|
||||
;void SST_Atomic_Add(volatile int* x, int value)
|
||||
SST_Atomic_Add:
|
||||
_SST_Atomic_Add:
|
||||
lock add [rdi], esi
|
||||
ret
|
||||
|
||||
;void SST_Atomic_AddPtr(volatile void** x, int value)
|
||||
SST_Atomic_AddPtr:
|
||||
_SST_Atomic_AddPtr:
|
||||
lock add [rdi], rsi
|
||||
ret
|
||||
|
||||
; void SST_Atomic_And(volatile int* x, int value)
|
||||
SST_Atomic_And:
|
||||
_SST_Atomic_And:
|
||||
lock and [rdi], esi
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Or(volatile int* x, int value)
|
||||
SST_Atomic_Or:
|
||||
_SST_Atomic_Or:
|
||||
lock or [rdi], esi
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Xor(volatile int* x, int value)
|
||||
SST_Atomic_Xor:
|
||||
_SST_Atomic_Xor:
|
||||
lock xor [rdi], esi
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Not(volatile int* x)
|
||||
SST_Atomic_Not:
|
||||
_SST_Atomic_Not:
|
||||
lock not DWORD [rdi]
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AddReturn(volatile int* x, int value)
|
||||
SST_Atomic_AddReturn:
|
||||
_SST_Atomic_AddReturn:
|
||||
mov eax, esi
|
||||
lock xadd [rdi], eax ; eax contains orig value before xadd
|
||||
add eax, esi ; now add 'value' (in esi) to get the sum that was stored in *x
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AddReturn(volatile void** x, int value)
|
||||
SST_Atomic_AddPtrReturn:
|
||||
_SST_Atomic_AddPtrReturn:
|
||||
mov rax, rsi
|
||||
lock xadd [rdi], rax ; rax contains orig value before xadd
|
||||
add rax, rsi ; now add 'value' (in rsi) to get the sum that was stored in *x
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AndReturn(volatile int* x, int value)
|
||||
SST_Atomic_AndReturn:
|
||||
_SST_Atomic_AndReturn:
|
||||
mov eax, [rdi] ;eax = *x
|
||||
mov edx, eax ;edx = *x
|
||||
try_again_1:
|
||||
and edx, esi ;edx <- *x & value
|
||||
|
||||
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_1 ;success -> *x was successfully updated. It's new value is in edx
|
||||
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
|
||||
jmp try_again_1
|
||||
done_1:
|
||||
mov eax,edx ;edx has the value (*x & value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
SST_Atomic_OrReturn:
|
||||
_SST_Atomic_OrReturn:
|
||||
mov eax, [rdi] ;eax = *x
|
||||
mov edx, eax ;edx = *x
|
||||
try_again_2:
|
||||
or edx, esi ;edx <- *x | value
|
||||
|
||||
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_2 ;success -> *x was successfully updated. It's new value is in edx
|
||||
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
|
||||
jmp try_again_2
|
||||
done_2:
|
||||
mov eax,edx ;edx has the value (*x | value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
SST_Atomic_XorReturn:
|
||||
_SST_Atomic_XorReturn:
|
||||
mov eax, [rdi] ;eax = *x
|
||||
mov edx, eax ;edx = *x
|
||||
try_again_3:
|
||||
xor edx, esi ;edx <- *x ^ value
|
||||
|
||||
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_3 ;success -> *x was successfully updated. It's new value is in edx
|
||||
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
|
||||
jmp try_again_3
|
||||
done_3:
|
||||
mov eax,edx ;edx has the value (*x ^ value), so mov to eax for return value
|
||||
ret
|
||||
|
||||
|
||||
; int SST_Atomic_NotReturn(volatile int* x)
|
||||
SST_Atomic_NotReturn:
|
||||
_SST_Atomic_NotReturn:
|
||||
mov eax, [rdi] ;eax = *x
|
||||
mov edx, eax ;edx = *x
|
||||
try_again_4:
|
||||
not edx
|
||||
|
||||
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_4 ;success -> *x was successfully updated. It's new value is in edx
|
||||
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
|
||||
jmp try_again_4
|
||||
done_4:
|
||||
mov eax,edx ;edx has the value (~*x), so mov to eax for return value
|
||||
ret
|
||||
|
||||
;int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
SST_Atomic_Exchange:
|
||||
_SST_Atomic_Exchange:
|
||||
lock xchg [rdi], esi
|
||||
mov eax, esi
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
SST_Atomic_ExchangePtr:
|
||||
_SST_Atomic_ExchangePtr:
|
||||
lock xchg [rdi], rsi
|
||||
mov rax, rsi
|
||||
ret
|
||||
|
||||
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
SST_Atomic_ExchangeAdd:
|
||||
_SST_Atomic_ExchangeAdd:
|
||||
lock xadd [rdi],esi
|
||||
mov eax, esi
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
_SST_Atomic_ExchangeAddPtr:
|
||||
lock xadd [rdi],rsi
|
||||
mov rax, rsi
|
||||
ret
|
||||
|
||||
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
SST_Atomic_CAS:
|
||||
_SST_Atomic_CAS:
|
||||
;rdi <- dest
|
||||
;rsi <- compare
|
||||
;rdx <- newValue
|
||||
mov eax, esi
|
||||
lock cmpxchg [rdi], edx ;compare [rdi] with eax | swap with edx if equal, else store [rdi] into eax
|
||||
ret
|
||||
|
||||
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
SST_Atomic_CASPtr:
|
||||
_SST_Atomic_CASPtr:
|
||||
;rdi <- dest
|
||||
;rsi <- compare
|
||||
;rdx <- newValue
|
||||
mov rax, rsi
|
||||
lock cmpxchg [rdi], rdx ;compare [rdi] with rax | swap with rdx if equal, else store [rdi] into rax
|
||||
ret
|
||||
|
||||
|
||||
|
||||
; int SST_Atomic_LoadAcquire(const volatile int* src);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_LoadAcquire:
|
||||
_SST_Atomic_LoadAcquire:
|
||||
mov eax, [rdi]
|
||||
ret
|
||||
|
||||
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
_SST_Atomic_LoadAcquirePtr:
|
||||
mov rax, [rdi]
|
||||
ret
|
||||
|
||||
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_StoreRelease:
|
||||
_SST_Atomic_StoreRelease:
|
||||
mov [rdi], esi
|
||||
ret
|
||||
|
||||
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
_SST_Atomic_StoreReleasePtr:
|
||||
mov [rdi], rsi
|
||||
ret
|
||||
271
libsst-atomic/SST_Atomic_x86.asm
Normal file
271
libsst-atomic/SST_Atomic_x86.asm
Normal file
@@ -0,0 +1,271 @@
|
||||
; SST_Atomic_x86.asm
|
||||
; Author: Patrick Baggett <ptbaggett@762studios.com>
|
||||
; Created: 11/15/2011
|
||||
;
|
||||
; Purpose:
|
||||
;
|
||||
; 32-bit assembly for atomic operations for x86 CPUs
|
||||
; Assembles with YASM 1.1/NASM 2.0+
|
||||
;
|
||||
; License:
|
||||
;
|
||||
; This program is free software. It comes without any warranty, to
|
||||
; the extent permitted by applicable law. You can redistribute it
|
||||
; and/or modify it under the terms of the Do What The Fuck You Want
|
||||
; To Public License, Version 2, as published by Sam Hocevar. See
|
||||
; http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||
|
||||
[bits 32]
|
||||
[segment .text]
|
||||
|
||||
; Win32 symbol names
|
||||
[global _SST_Atomic_Add]
|
||||
[global _SST_Atomic_AddPtr]
|
||||
[global _SST_Atomic_And]
|
||||
[global _SST_Atomic_Or]
|
||||
[global _SST_Atomic_Xor]
|
||||
[global _SST_Atomic_Not]
|
||||
[global _SST_Atomic_AddReturn]
|
||||
[global _SST_Atomic_AddPtrReturn]
|
||||
[global _SST_Atomic_AndReturn]
|
||||
[global _SST_Atomic_OrReturn]
|
||||
[global _SST_Atomic_XorReturn]
|
||||
[global _SST_Atomic_NotReturn]
|
||||
[global _SST_Atomic_ExchangeAdd]
|
||||
[global _SST_Atomic_ExchangeAddPtr]
|
||||
[global _SST_Atomic_Exchange]
|
||||
[global _SST_Atomic_ExchangePtr]
|
||||
[global _SST_Atomic_CAS]
|
||||
[global _SST_Atomic_CASPtr]
|
||||
[global _SST_Atomic_LoadAcquire]
|
||||
[global _SST_Atomic_LoadAcquirePtr]
|
||||
[global _SST_Atomic_StoreRelease]
|
||||
[global _SST_Atomic_StoreReleasePtr]
|
||||
|
||||
; ELF symbol names
|
||||
[global SST_Atomic_Add]
|
||||
[global SST_Atomic_AddPtr]
|
||||
[global SST_Atomic_And]
|
||||
[global SST_Atomic_Or]
|
||||
[global SST_Atomic_Xor]
|
||||
[global SST_Atomic_Not]
|
||||
[global SST_Atomic_AddReturn]
|
||||
[global SST_Atomic_AddPtrReturn]
|
||||
[global SST_Atomic_AndReturn]
|
||||
[global SST_Atomic_OrReturn]
|
||||
[global SST_Atomic_XorReturn]
|
||||
[global SST_Atomic_NotReturn]
|
||||
[global SST_Atomic_ExchangeAdd]
|
||||
[global SST_Atomic_ExchangeAddPtr]
|
||||
[global SST_Atomic_Exchange]
|
||||
[global SST_Atomic_ExchangePtr]
|
||||
[global SST_Atomic_CAS]
|
||||
[global SST_Atomic_CASPtr]
|
||||
[global SST_Atomic_LoadAcquire]
|
||||
[global SST_Atomic_LoadAcquirePtr]
|
||||
[global SST_Atomic_StoreRelease]
|
||||
[global SST_Atomic_StoreReleasePtr]
|
||||
|
||||
;=======================================================================
|
||||
; Since pointers and integers are the same on GCC/MSVC, many of the
|
||||
; integer/pointer code paths are the same. This of course only holds true
|
||||
; for 32-bit compiles on x86, which is exactly this file.
|
||||
;=======================================================================
|
||||
|
||||
|
||||
;void SST_Atomic_Add(volatile int* x, int value)
|
||||
;void SST_Atomic_AddPtr(volatile void* x, uintptr_t value)
|
||||
_SST_Atomic_AddPtr:
|
||||
_SST_Atomic_Add:
|
||||
SST_Atomic_AddPtr:
|
||||
SST_Atomic_Add:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
lock add [edx], eax
|
||||
ret
|
||||
|
||||
|
||||
; void SST_Atomic_And(volatile int* x, int value)
|
||||
_SST_Atomic_And:
|
||||
SST_Atomic_And:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
lock and [edx], eax
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Or(volatile int* x, int value)
|
||||
_SST_Atomic_Or:
|
||||
SST_Atomic_Or:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
lock or [edx], eax
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Xor(volatile int* x, int value)
|
||||
_SST_Atomic_Xor:
|
||||
SST_Atomic_Xor:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
lock xor [edx], eax
|
||||
ret
|
||||
|
||||
; void SST_Atomic_Not(volatile int* x)
|
||||
_SST_Atomic_Not:
|
||||
SST_Atomic_Not:
|
||||
mov edx, [esp+4]
|
||||
lock not DWORD [edx]
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AddReturn(volatile int* x, int value)
|
||||
_SST_Atomic_AddReturn:
|
||||
_SST_Atomic_AddPtrReturn:
|
||||
SST_Atomic_AddReturn:
|
||||
SST_Atomic_AddPtrReturn:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
lock xadd [edx], eax
|
||||
add eax, [esp+8]
|
||||
ret
|
||||
|
||||
; int SST_Atomic_AndReturn(volatile int* x, int value)
|
||||
_SST_Atomic_AndReturn:
|
||||
SST_Atomic_AndReturn:
|
||||
push ebx ;save ebx
|
||||
|
||||
mov ecx, [esp+8] ;ecx = x
|
||||
mov ebx, [esp+12] ;ebx = value
|
||||
|
||||
mov edx, [ecx] ;edx = *x
|
||||
mov eax, edx ;eax = edx = *x
|
||||
try_again_1:
|
||||
and edx, ebx ;edx = *x & value
|
||||
|
||||
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_1 ;*x was modified. It's new value is in edx
|
||||
mov edx, eax ;copy that new value to edx, lets try again
|
||||
jmp try_again_1
|
||||
done_1:
|
||||
xchg edx, eax ;edx has the value (*x & value), so swap edx and eax
|
||||
pop ebx ;restore ebx
|
||||
ret
|
||||
|
||||
_SST_Atomic_OrReturn:
|
||||
SST_Atomic_OrReturn:
|
||||
push ebx ;save ebx
|
||||
|
||||
mov ecx, [esp+8] ;ecx = x
|
||||
mov ebx, [esp+12] ;ebx = value
|
||||
|
||||
mov edx, [ecx] ;edx = *x
|
||||
mov eax, edx ;eax = edx = *x
|
||||
try_again_2:
|
||||
or edx, ebx ;edx = *x | value
|
||||
|
||||
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_2 ;*x was modified. It's new value is in edx
|
||||
mov edx, eax ;copy that new value to edx, lets try again
|
||||
jmp try_again_2
|
||||
done_2:
|
||||
xchg edx, eax ;edx has the value (*x | value), so swap edx and eax
|
||||
pop ebx ;restore ebx
|
||||
ret
|
||||
|
||||
_SST_Atomic_XorReturn:
|
||||
SST_Atomic_XorReturn:
|
||||
push ebx ;save ebx
|
||||
|
||||
mov ecx, [esp+8] ;ecx = x
|
||||
mov ebx, [esp+12] ;ebx = value
|
||||
|
||||
mov edx, [ecx] ;edx = *x
|
||||
mov eax, edx ;eax = edx = *x
|
||||
try_again_3:
|
||||
xor edx, ebx ;edx = *x ^ value
|
||||
|
||||
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_3 ;*x was modified. It's new value is in edx
|
||||
mov edx, eax ;copy that new value to edx, lets try again
|
||||
jmp try_again_3
|
||||
done_3:
|
||||
xchg edx, eax ;edx has the value (*x ^ value), so swap edx and eax
|
||||
pop ebx ;restore ebx
|
||||
ret
|
||||
|
||||
; int SST_Atomic_NotReturn(volatile int* x)
|
||||
; =============================================
|
||||
_SST_Atomic_NotReturn:
|
||||
SST_Atomic_NotReturn:
|
||||
mov ecx, [esp+4] ;ecx = x
|
||||
|
||||
mov edx, [ecx] ;edx = *x
|
||||
mov eax, edx ;eax = edx = *x
|
||||
try_again_4:
|
||||
not edx ;edx = ~(*x)
|
||||
|
||||
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
|
||||
jz done_4 ;*x was modified. It's new value is in edx
|
||||
mov edx, eax ;copy that new value to edx, lets try again
|
||||
jmp try_again_4
|
||||
done_4:
|
||||
xchg edx, eax ;edx has the value (~*x), so swap edx and eax
|
||||
ret
|
||||
|
||||
;int SST_Atomic_Exchange(volatile int* x, int value)
|
||||
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
|
||||
_SST_Atomic_Exchange:
|
||||
SST_Atomic_Exchange:
|
||||
_SST_Atomic_ExchangePtr:
|
||||
SST_Atomic_ExchangePtr:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
|
||||
lock xchg [edx], eax
|
||||
ret
|
||||
|
||||
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
|
||||
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
|
||||
_SST_Atomic_ExchangeAdd:
|
||||
SST_Atomic_ExchangeAdd:
|
||||
_SST_Atomic_ExchangeAddPtr:
|
||||
SST_Atomic_ExchangeAddPtr:
|
||||
mov edx, [esp+4]
|
||||
mov eax, [esp+8]
|
||||
|
||||
lock xadd [edx],eax ;Save [edx] to temp, store [edx]+eax into [edx], save
|
||||
ret
|
||||
|
||||
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
|
||||
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
|
||||
_SST_Atomic_CAS:
|
||||
SST_Atomic_CAS:
|
||||
_SST_Atomic_CASPtr:
|
||||
SST_Atomic_CASPtr:
|
||||
mov edx, [esp+4] ;edx <- dest
|
||||
mov eax, [esp+8] ;eax <- compare
|
||||
mov ecx, [esp+12] ;ecx <- newValue
|
||||
|
||||
lock cmpxchg [edx], ecx ;compare [edx] with eax | swap with ecx if equal, else store [edx] into eax
|
||||
ret
|
||||
|
||||
; int SST_Atomic_LoadAcquire(const volatile int* src);
|
||||
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
_SST_Atomic_LoadAcquire:
|
||||
SST_Atomic_LoadAcquire:
|
||||
_SST_Atomic_LoadAcquirePtr:
|
||||
SST_Atomic_LoadAcquirePtr:
|
||||
mov eax, [esp+4] ;eax = ptr
|
||||
mov eax, [eax] ;eax = *ptr
|
||||
ret
|
||||
|
||||
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
|
||||
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
|
||||
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
|
||||
_SST_Atomic_StoreRelease:
|
||||
SST_Atomic_StoreRelease:
|
||||
_SST_Atomic_StoreReleasePtr:
|
||||
SST_Atomic_StoreReleasePtr:
|
||||
mov edx, [esp+4] ;edx = dest
|
||||
mov eax, [esp+8] ;eax = value
|
||||
mov [edx], eax
|
||||
ret
|
||||
151
libsst-atomic/libsst-atomic.vcxproj
Normal file
151
libsst-atomic/libsst-atomic.vcxproj
Normal file
@@ -0,0 +1,151 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{43ED481F-C4A3-40ED-81A3-46C43DC4EB1E}</ProjectGuid>
|
||||
<RootNamespace>libsstatomic</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\vsyasm.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>11.0.50727.1</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<OutDir>$(SolutionDir)\Lib\x86\</OutDir>
|
||||
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
|
||||
<TargetName>$(ProjectName)-debug</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<OutDir>$(SolutionDir)\Lib\x64\</OutDir>
|
||||
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
|
||||
<TargetName>$(ProjectName)-debug</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<OutDir>$(SolutionDir)\Lib\x86\</OutDir>
|
||||
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(SolutionDir)\Lib\x64\</OutDir>
|
||||
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Lib />
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Lib />
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<YASM Include="SST_Atomic_x86-64-win64.asm">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
</YASM>
|
||||
<YASM Include="SST_Atomic_x86.asm">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</YASM>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Lib\Include\SST\SST_Atomic.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\vsyasm.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
26
libsst-atomic/libsst-atomic.vcxproj.filters
Normal file
26
libsst-atomic/libsst-atomic.vcxproj.filters
Normal file
@@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<YASM Include="SST_Atomic_x86-64-win64.asm">
|
||||
<Filter>Source Files</Filter>
|
||||
</YASM>
|
||||
<YASM Include="SST_Atomic_x86.asm">
|
||||
<Filter>Source Files</Filter>
|
||||
</YASM>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Lib\Include\SST\SST_Atomic.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
BIN
libsst-atomic/obj/x86-64/release/SST_Atomic_x86-64.o
Normal file
BIN
libsst-atomic/obj/x86-64/release/SST_Atomic_x86-64.o
Normal file
Binary file not shown.
Reference in New Issue
Block a user