Initial commit

This commit is contained in:
2026-04-03 00:22:39 -05:00
commit eca1e8c458
945 changed files with 218160 additions and 0 deletions

48
libsst-atomic/Makefile Normal file
View File

@@ -0,0 +1,48 @@
# libsst-atomic/Makefile
# Author: Patrick Baggett <ptbaggett@762studios.com>
# Created: 12/23/2011
#
# Purpose:
#
# Makefile for libsst-atomic, requires GNU "make"
#
# License:
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
# Should be invoked with:
# TARGET := { release, debug }
# ARCH := { x86, x86-64, sparc, sparc64, powerpc, powerpc64 }
# ASM := assembler tool
BINNAME := $(DIST)/libsst-atomic.a
ifeq ($(TARGET),debug)
BINNAME := $(subst libsst-atomic,libsst-atomic_d, $(BINNAME))
endif
# Architecture -> source file conversion
ASM_SRC := SST_Atomic_$(ARCH).asm
OBJ := $(addprefix obj/$(ARCH)/$(TARGET)/,$(subst .asm,.o,$(ASM_SRC)) )
$(shell mkdir -p obj/$(ARCH)/$(TARGET))
$(BINNAME): $(OBJ)
$(AR) cru $@ $+
$(RANLIB) $@
# CLEAN
clean:
@-rm -r -f obj $(DIST)/libsst-atomic*.a
# *.asm files to *.o files
obj/$(ARCH)/$(TARGET)/%.o: %.asm
@echo ASM $@
@$(ASM) -o obj/$(ARCH)/$(TARGET)/$*.o $*.asm

View File

@@ -0,0 +1,233 @@
@ SST_Atomic_arm.asm
@ Author: Patrick Baggett <ptbaggett@762studios.com>
@ Created: 6/21/2012
@
@ Purpose:
@
@ 32-bit assembly for atomic operations for ARMv6+ CPUs
@ Assembles with GNU as
@
@ License:
@
@ This program is free software. It comes without any warranty, to
@ the extent permitted by applicable law. You can redistribute it
@ and/or modify it under the terms of the Do What The Fuck You Want
@ To Public License, Version 2, as published by Sam Hocevar. See
@ http://sam.zoy.org/wtfpl/COPYING for more details.
.text
@Don't use thumb mode, use full ARM mode
.arm
@ ELF symbol names
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
.type SST_Atomic_Add, %function
.type SST_Atomic_AddPtr, %function
.type SST_Atomic_And, %function
.type SST_Atomic_Or, %function
.type SST_Atomic_Xor, %function
.type SST_Atomic_Not, %function
.type SST_Atomic_AddReturn, %function
.type SST_Atomic_AddPtrReturn, %function
.type SST_Atomic_AndReturn, %function
.type SST_Atomic_OrReturn, %function
.type SST_Atomic_XorReturn, %function
.type SST_Atomic_NotReturn, %function
.type SST_Atomic_ExchangeAdd, %function
.type SST_Atomic_ExchangeAddPtr, %function
.type SST_Atomic_Exchange, %function
.type SST_Atomic_ExchangePtr, %function
.type SST_Atomic_CAS, %function
.type SST_Atomic_CASPtr, %function
.type SST_Atomic_LoadAcquire, %function
.type SST_Atomic_LoadAcquirePtr, %function
.type SST_Atomic_StoreRelease, %function
.type SST_Atomic_StoreReleasePtr, %function
@=======================================================================
@ Since pointers and integers are the same size on GCC, many of the
@ integer/pointer code paths are the same. This of course only holds true
@ for 32-bit compiles on ARM, which is exactly this file.
@=======================================================================
@ I've merged the Atomic_XXX and Atomic_XXXReturn functions. The difference
@ is 1 mov instruction -- not really worth making separate functions.
@ void SST_Atomic_Add(volatile int* x, int value)
@ void SST_Atomic_AddPtr(volatile void* x, uintptr_t value)
SST_Atomic_Add:
SST_Atomic_AddPtr:
SST_Atomic_AddReturn:
SST_Atomic_AddPtrReturn:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again8:
ldrex r2, [r0] @ r2 <- *x
add r2, r2, r1 @ r2 += value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again8 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ void SST_Atomic_And(volatile int* x, int value)
SST_Atomic_And:
SST_Atomic_AndReturn:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again7:
ldrex r2, [r0] @ r2 <- *x
and r2, r2, r1 @ r2 &= value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again7 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ void SST_Atomic_Or(volatile int* x, int value)
SST_Atomic_Or:
SST_Atomic_OrReturn:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again6:
ldrex r2, [r0] @ r2 <- *x
orr r2, r2, r1 @ r2 |= value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again6 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ void SST_Atomic_Xor(volatile int* x, int value)
SST_Atomic_Xor:
SST_Atomic_XorReturn:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again5:
ldrex r2, [r0] @ r2 <- *x
eor r2, r2, r1 @ r2 ^= value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again5 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ void SST_Atomic_Not(volatile int* x)
SST_Atomic_Not:
SST_Atomic_NotReturn:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again4:
ldrex r2, [r0] @ r2 <- *x
mvn r2, r2 @ r2 = ~value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again4 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ int SST_Atomic_Exchange(volatile int* x, int value)
@ void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
SST_Atomic_Exchange:
SST_Atomic_ExchangePtr:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
_again3:
ldrex r2, [r0] @ r2 <- *x
strex r3, r1, [r0] @ try { *x <- r1 }
teq r3, #0 @ success?
bne _again3 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r2 @ Setup return value
bx lr @ Return
@ int SST_Atomic_ExchangeAdd(volatile int* x, int value);
@ void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
SST_Atomic_ExchangeAdd:
SST_Atomic_ExchangeAddPtr:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
push {r4}
_again2:
ldrex r2, [r0] @ r2 <- *x
mov r4, r2 @ r4 <- *x
add r2, r2, r1 @ r2 += value
strex r3, r2, [r0] @ try { *x <- r2 }
teq r3, #0 @ success?
bne _again2 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r4 @ Setup return value as the old value of *x
pop {r4}
bx lr @ Return
@ int SST_Atomic_CAS(int* dest, int compare, int newValue);
@ void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
SST_Atomic_CAS:
SST_Atomic_CASPtr:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
push {r4}
mov r4, #0
_again1:
ldrex r3, [r0] @ r3 <- *dest
teq r3, r1 @ if(r3 == compare) {
strexeq r4, r2, [r0] @ try { *dest <- newValue } }
teq r4, #0 @ success?
bne _again1 @ No, reload and retry
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
mov r0, r3 @ Setup return value as the value of *dest
pop {r4}
bx lr @ Return
SST_Atomic_LoadAcquire:
SST_Atomic_LoadAcquirePtr:
ldr r0, [r0]
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
bx lr
SST_Atomic_StoreRelease:
SST_Atomic_StoreReleasePtr:
mcr p15, 0, r0, c7, c10, 5 @ memory barrier
str r1, [r0]
bx lr

View File

@@ -0,0 +1,265 @@
// SST_Atomic_ia64.asm
// Author: Patrick Baggett
// Created: 4/16/2012
//
// Purpose:
//
// Assembly for Itanium processors (aka ia64)
// Assembles with Intel ias assembler or GNU as.
//
// License:
//
// This program is free software. It comes without any warranty, to
// the extent permitted by applicable law. You can redistribute it
// and/or modify it under the terms of the Do What The Fuck You Want
// To Public License, Version 2, as published by Sam Hocevar. See
// http://sam.zoy.org/wtfpl/COPYING for more details.
.global SST_Atomic_Add#
.global SST_Atomic_AddReturn#
.global SST_Atomic_AddPtr#
.global SST_Atomic_AddPtrReturn#
.global SST_Atomic_And#
.global SST_Atomic_AndReturn#
.global SST_Atomic_Or#
.global SST_Atomic_OrReturn#
.global SST_Atomic_Xor#
.global SST_Atomic_XorReturn#
.global SST_Atomic_Not#
.global SST_Atomic_NotReturn#
.global SST_Atomic_Exchange#
.global SST_Atomic_ExchangePtr#
.global SST_Atomic_ExchangeAdd#
.global SST_Atomic_ExchangeAddPtr#
.global SST_Atomic_CAS#
.global SST_Atomic_CASPtr#
.global SST_Atomic_LoadAcquire#
.global SST_Atomic_LoadAcquirePtr#
.global SST_Atomic_StoreRelease#
.global SST_Atomic_StoreReleasePtr#
.section .text
.align 32
// SST_Atomic_Add(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Add:
SST_Atomic_AddReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r8 = r31, r33;; //generated summed r8 = (*x + value)
try_again1:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
add r8 = r33, r30 //set up return value r8 = value + *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again1;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_AddPtr(volatile void** x, int value);
// x = r32, value = r33
SST_Atomic_AddPtr:
SST_Atomic_AddPtrReturn:
sxt4 r29 = r33;; //sign extend value to 64-bit quantity
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r8 = r31, r29;; //generated summed r8 = (*x + value)
try_again2:
cmpxchg8.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
add r8 = r29, r30 //set up return value r8 = value + *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again2;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_And(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_And:
SST_Atomic_AndReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
and r8 = r31, r33;; //generated r8 = (*x & value)
try_again3:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
and r8 = r33, r30 //set up return value r8 = value & *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again3;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Or(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Or:
SST_Atomic_OrReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
or r8 = r31, r33;; //generated r8 = (*x | value)
try_again4:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
or r8 = r33, r30 //set up return value r8 = value | *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again4;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Xor(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Xor:
SST_Atomic_XorReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
xor r8 = r31, r33;; //generated r8 = (*x | value)
try_again5:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
xor r8 = r33, r30 //set up return value r8 = value & *x (new)
mov r31 = r30 //set up old *x to be the new value if we need to loop again
(p15) br.cond.dptk.few try_again5;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Not(volatile int* x);
// x = r32
SST_Atomic_Not:
SST_Atomic_NotReturn:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
andcm r8 = -1, r31;; //generated r8 = ~(*x)
try_again6:
cmpxchg4.acq r30 = [r32], r8, ar.ccv;; //compare [r32] to CCV, exchange with r8 is successful. r30 <- *x
mov ar.ccv = r30 //CCV <- r30 (in case compare failed)
cmp4.ne.unc p15, p0 = r30, r31 //compare old value in *x with new value in *x
andcm r8 = -1, r30 //set up return value r8 = ~*x (new)
(p15) br.cond.dptk.few try_again6;; //branch if old *x != new *x
br.ret.sptk.many b0;;
// SST_Atomic_Exchange(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_Exchange:
mf
xchg4 r8 = [r32], r33
br.ret.sptk.many b0;;
// SST_Atomic_ExchangePtr(volatile int* x, int value);
// x = r32, value = r33
SST_Atomic_ExchangePtr:
mf
xchg8 r8 = [r32], r33
br.ret.sptk.many b0;;
// int SST_Atomic_ExchangeAdd(volatile int* x, int value)
SST_Atomic_ExchangeAdd:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r30 = r31, r33;; //r30 = *x + value
try_again9:
cmpxchg4.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
mov ar.ccv = r8
cmp4.ne.unc p15, p0 = r8, r31
mov r31 = r8
add r30 = r33, r8
(p15) br.cond.dptk.few try_again9;;
br.ret.sptk.many b0;;
// int SST_Atomic_ExchangeAddPtr(volatile void** x, int value)
SST_Atomic_ExchangeAddPtr:
sxt4 r29 = r33;; //sign extend value to 64 bits
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r31 //Set CCV to be *x
add r30 = r31, r29;; //r30 = *x + value
try_again10:
cmpxchg8.acq r8 = [r32], r30, ar.ccv;; //compare [r32] to CCV, exchange with r30 if successful. r8 <- *x
mov ar.ccv = r8
cmp.ne.unc p15, p0 = r8, r31
mov r31 = r8
add r30 = r8, r29
(p15) br.cond.dptk.few try_again10;;
br.ret.sptk.many b0;;
// x = r32, compare = r33, newValue = r34
SST_Atomic_CAS:
mf
ld4.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r33;; //Set CCV to be *x
cmpxchg4.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
br.ret.sptk.many b0;;
// x = r32, compare = r33, newValue = r34
SST_Atomic_CASPtr:
mf
ld8.bias r31 = [r32];; //ensure all memory operations happen first, then load *x into r31
mov ar.ccv = r33;; //Set CCV to be *x
cmpxchg8.acq r8 = [r32], r34, ar.ccv;; //compare [r32] to CCV, exchange with r34 is successful. r8 <- *x
br.ret.sptk.many b0;;
// int SST_Atomic_LoadAcquire(const volatile int* src);
SST_Atomic_LoadAcquire:
ld4.acq r8 = [r32];; //Load-with-acquire into return value register r8
br.ret.sptk.many b0;;
// void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
SST_Atomic_LoadAcquirePtr:
ld8.acq r8 = [r32];; //Load-with-acquire into return value register r8
br.ret.sptk.many b0;;
// void SST_Atomic_StoreRelease(volatile int* dest, int value);
SST_Atomic_StoreRelease:
st4.rel [r32] = r33;; //Store with release
br.ret.sptk.many b0;;
// void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
SST_Atomic_StoreReleasePtr:
st8.rel [r32] = r33;; //Store with release
br.ret.sptk.many b0;;

View File

@@ -0,0 +1,219 @@
# SST_Atomic_mips.asm
# Author: Patrick Baggett <ptbaggett@762studios.com>
# Created: 1/15/2013
#
# Purpose:
#
# 32-bit assembly for atomic operations for MIPS CPUs. Targets MIPS32R2 ISA, but should run on
# older MIPS machines (e.g. MIPS-III / Loongson).
# Assembles with GNU as (doubtfully IRIX)
#
# License:
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
.text
#Don't reorder instructions
.set noreorder
# ELF symbol names
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
#===================================================================================
# Since pointers and integers are the same size on 32-bit code (GCC), many of the
# integer/pointer code paths are the same. This of course only holds true
# for 32-bit compiles on MIPS, which is exactly this file.
#===================================================================================
# MIPS has a "sync" instruction with a bitfield describing "what" to sync, sort of like SPARC's "membar" instruction.
# By default, "sync" means "sync 0" -- a full memory barrier including I/O devices/instructions. This is a very heavy-weight operation and used for things like ordering MMIO accesses
# MIPS32R2 defines "sync 0x10" as a full memory barrier that doesn't synchronize I/O, and "sync 0x04" as a "write/write" memory barrier. However,
# we can't use MIPS32R2 code only. But as it would turn out, it is OK because "sync <k>" is treated like "sync 0" in older MIPS CPUs, meaning full barrier,
# but it will automatically be faster on MIPS32R2 CPUs. Neat.
# TODO: not sure if this is a CPU hazard on any remotely recent MIPS CPU (i.e. not MIPS-I or MIPS-II)
# : beq $rs, $rt, someLabel //compare rs to rt
# : addiu $rs, $rs, 1 //increment rs
# I can't find any literature that says this is not allowed on anything other than MIPS-I, which has no interlocks.
# void SST_Atomic_Add(volatile int* x, int value)
# void SST_Atomic_AddPtr(volatile void** x, int value)
# int SST_Atomic_AddReturn(volatile int* x, int value)
# void* SST_Atomic_AddPtrReturn(volatile void** x, int value)
SST_Atomic_Add:
SST_Atomic_AddPtr:
SST_Atomic_AddReturn:
SST_Atomic_AddPtrReturn:
sync 0x10 # sync_mb
1:
ll $t4, 0($a0) # t4 <- *x
addu $t5, $t4, $a1 # t5 <- t4 + value
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $t5,1b # if failure, retry
addu $v0, $t4, $a1 # (delay slot) $v0 <- *x + value (return value)
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_And(volatile int* x, int value)
# int SST_Atomic_AndReturn(volatile int* x, int value)
SST_Atomic_And:
SST_Atomic_AndReturn:
sync 0x10 # sync_mb
1:
ll $t4, 0($a0) # t4 <- *x
and $t5, $t4, $a1 # t5 <- t4 & value
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $t5,1b # if failure, retry
and $v0, $t4, $a1 # (delay slot) compute return value
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Or(volatile int* x, int value)
# int SST_Atomic_OrReturn(volatile int* x, int value)
SST_Atomic_Or:
SST_Atomic_OrReturn:
sync 0x10 # sync_mb
1:
ll $t4, 0($a0) # t4 <- *x
or $t5, $t4, $a1 # t5 <- t4 | value
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $t5,1b # if failure, retry
or $v0, $t4, $a1 # (delay slot) compute return value
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Xor(volatile int* x, int value)
# int SST_Atomic_XorReturn(volatile int* x, int value)
SST_Atomic_XorReturn:
SST_Atomic_Xor:
sync 0x10 # sync_mb
1:
ll $t4, 0($a0) # t4 <- *x
xor $t5, $t4, $a1 # t5 <- t4 ^ value
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $t5,1b # if failure, retry
xor $v0, $t4, $a1 # (delay slot) Nothing to do
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Not(volatile int* x)
SST_Atomic_Not:
SST_Atomic_NotReturn:
sync 0x10 # sync_mb
1:
ll $t4, 0($a0) # t4 <- *x
nor $t5, $t4, $0 # t5 <- ~t4
sc $t5, 0($a0) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $t5,1b # if failure, retry
nor $v0, $t4, $0 # (delay slot) compute return value
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_Exchange(volatile int* x, int value)
# void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
SST_Atomic_Exchange:
SST_Atomic_ExchangePtr:
sync 0x10 # sync_mb
move $t4, $a1 # t4 <- value
1:
ll $v0, 0($a0) # v0 <- *x
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $t4,1b # if failure, retry
move $t4, $a1 # (delay slot) reload t4 <- value for retry
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_ExchangeAdd(volatile int* x, int value);
# void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
SST_Atomic_ExchangeAdd:
SST_Atomic_ExchangeAddPtr:
sync 0x10 # sync_mb
1:
ll $v0, 0($a0) # v0 <- *x
addu $t4, $v0, $a1 # t4 <- v0 + value
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $t4,1b # if failure, retry
nop # (delay slot) do nothing
jr $ra # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_CAS(int* dest, int compare, int newValue);
# void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
# a0 = dest, a1 = compare, a2 = newValue
SST_Atomic_CAS:
SST_Atomic_CASPtr:
sync 0x10 # sync_mb
1:
move $t4, $a2 # t4 <- newValue
ll $v0, 0($a0) # v0 <- *dest (captures "old" value of *dest, also is the return value)
bne $v0, $a1, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
nop # (delay slot) do nothing
sc $t4, 0($a0) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $t4,1b # if failure, retry
nop # (delay slot) do nothing
sync 0x04 # sync_wmb
# Return
3:
jr $ra # return
nop # (delay slot) do nothing
SST_Atomic_LoadAcquire:
SST_Atomic_LoadAcquirePtr:
lw $v0, 0($a0)
sync 0x11 # sync_acquire: LoadLoad | LoadStore
jr $ra
nop
SST_Atomic_StoreRelease:
SST_Atomic_StoreReleasePtr:
sync 0x12 # sync_release: LoadStore | StoreStore
jr $ra
sw $a1, 0($a0)

View File

@@ -0,0 +1,277 @@
# SST_Atomic_mips64.asm
# Author: Patrick Baggett <ptbaggett@762studios.com>
# Created: 7/1/2013
#
# Purpose:
#
# 64-bit assembly for atomic operations for MIPS CPUs. Targets MIPS64R2 ISA, but should run on
# older MIPS machines (e.g. MIPS-III / Loongson).
# Assembles with GNU as (doubtfully IRIX)
#
# License:
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
.text
#Don't reorder instructions
.set noreorder
# ELF symbol names
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
# Unbelievable. binutils-2.22 doesn't support register names in n64 (-mabi=64) mode, but it does in o32 (-mabi=32)
# Here are the symbolic names for registers used
# $v0-$v1 = $2-$3
# $a0-$a4 = $4-$7
# $t4-$t5 = $8-$9
# $ra = $31
# MIPS has a "sync" instruction with a bitfield describing "what" to sync, sort of like SPARC's "membar" instruction.
# By default, "sync" means "sync 0" -- a full memory barrier including I/O devices/instructions. This is a very heavy-weight operation and used for things like ordering MMIO accesses
# MIPS64R2 defines "sync 0x10" as a full memory barrier that doesn't synchronize I/O, and "sync 0x04" as a "write/write" memory barrier. However,
# we can't use MIPS64R2 code only. But as it would turn out, it is OK because "sync <k>" is treated like "sync 0" in older MIPS CPUs, meaning full barrier,
# but it will automatically be faster on MIPS32R2 CPUs. Neat.
# void SST_Atomic_Add(volatile int* x, int value)
# int SST_Atomic_AddReturn(volatile int* x, int value)
SST_Atomic_Add:
SST_Atomic_AddReturn:
sync 0x10 # sync_mb
1:
ll $12, 0($4) # t4 <- *x
addu $13, $12, $5 # t5 <- t4 + value
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
addu $2, $12, $5 # (delay slot) $2 <- *x + value (return value)
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_AddPtr(volatile void** x, int value)
# void* SST_Atomic_AddPtrReturn(volatile void** x, int value)
SST_Atomic_AddPtr:
SST_Atomic_AddPtrReturn:
sync 0x10 # sync_mb
1:
lld $12, 0($4) # t4 <- *x
daddu $13, $12, $5 # t5 <- t4 + value
scd $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
daddu $2, $12, $5 # (delay slot) $2 <- *x + value (return value)
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_And(volatile int* x, int value)
# int SST_Atomic_AndReturn(volatile int* x, int value)
SST_Atomic_And:
SST_Atomic_AndReturn:
sync 0x10 # sync_mb
1:
ll $12, 0($4) # t4 <- *x
and $13, $12, $5 # t5 <- t4 & value
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
and $2, $12, $5 # (delay slot) compute return value
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Or(volatile int* x, int value)
# int SST_Atomic_OrReturn(volatile int* x, int value)
SST_Atomic_Or:
SST_Atomic_OrReturn:
sync 0x10 # sync_mb
1:
ll $12, 0($4) # t4 <- *x
or $13, $12, $5 # t5 <- t4 | value
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
or $2, $12, $5 # (delay slot) compute return value
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Xor(volatile int* x, int value)
# int SST_Atomic_XorReturn(volatile int* x, int value)
SST_Atomic_XorReturn:
SST_Atomic_Xor:
sync 0x10 # sync_mb
1:
ll $12, 0($4) # t4 <- *x
xor $13, $12, $5 # t5 <- t4 ^ value
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
xor $2, $12, $5 # (delay slot) Nothing to do
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void SST_Atomic_Not(volatile int* x)
SST_Atomic_Not:
SST_Atomic_NotReturn:
sync 0x10 # sync_mb
1:
ll $12, 0($4) # t4 <- *x
nor $13, $12, $0 # t5 <- ~t4
sc $13, 0($4) # try { *x <- t5 }, set t5 = 1/0 depending
beqz $13,1b # if failure, retry
nor $2, $12, $0 # (delay slot) compute return value
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_Exchange(volatile int* x, int value)
SST_Atomic_Exchange:
sync 0x10 # sync_mb
move $12, $5 # t4 <- value
1:
ll $2, 0($4) # v0 <- *x
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
move $12, $5 # (delay slot) reload t4 <- value for retry
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
SST_Atomic_ExchangePtr:
sync 0x10 # sync_mb
move $12, $5 # value for exchange
1:
lld $2, 0($4) # v0 <- *x
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
move $12, $5 # (delay slot) t4 <- value for retry
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_ExchangeAdd(volatile int* x, int value);
SST_Atomic_ExchangeAdd:
sync 0x10 # sync_mb
1:
ll $2, 0($4) # v0 <- *x
addu $12, $2, $5 # t4 <- v0 + value
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
nop # (delay slot) do nothing
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
SST_Atomic_ExchangeAddPtr:
sync 0x10 # sync_mb
1:
lld $2, 0($4) # v0 <- *x
daddu $12, $2, $5 # t4 <- v0 + value
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
nop # (delay slot) do nothing
jr $31 # return
sync 0x04 # (delay slot) sync_wmb
# int SST_Atomic_CAS(int* dest, int compare, int newValue);
# a0 = dest, a1 = compare, a2 = newValue
SST_Atomic_CAS:
sync 0x10 # sync_mb
1:
move $12, $6 # t4 <- newValue
ll $2, 0($4) # v0 <- *dest (captures "old" value of *dest, also is the return value)
bne $2, $5, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
nop # (delay slot) do nothing
sc $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
nop # (delay slot) do nothing
sync 0x04 # sync_wmb
# Return
3:
jr $31 # return
nop # (delay slot) do nothing
# void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
# a0 = dest, a1 = compare, a2 = newValue
SST_Atomic_CASPtr:
sync 0x10 # sync_mb
1:
move $12, $6 # t4 <- newValue
lld $2, 0($4) # v0 <- *dest (captures "old" value of *dest, also is the return value)
bne $2, $5, 3f # compare v0 with a1 ("compare"), and if not equal, exit loop
nop # (delay slot) do nothing
scd $12, 0($4) # try { *x <- t4 }, set t4 = 1/0 depending
beqz $12,1b # if failure, retry
nop # (delay slot) do nothing
sync 0x04 # sync_wmb
# Return
3:
jr $31 # return
nop # (delay slot) do nothing
SST_Atomic_LoadAcquire:
lw $2, 0($4)
sync 0x11 # sync_acquire: LoadLoad | LoadStore
jr $31
nop
SST_Atomic_StoreRelease:
sync 0x12 # sync_release: LoadStore | StoreStore
jr $31
sw $5, 0($4)
SST_Atomic_LoadAcquirePtr:
ld $2, 0($4)
sync 0x11 # sync_acquire: LoadLoad | LoadStore
jr $31
nop
SST_Atomic_StoreReleasePtr:
sync 0x12 # sync_release: LoadStore | StoreStore
jr $31
sd $5, 0($4)

View File

@@ -0,0 +1,201 @@
/*
SST_Atomic_ppc.asm
Author: Patrick Baggett
Created: 7/20/2012
Purpose:
32-bit assembly for Power Architecture processors.
Assembles with GNU as.
License:
This program is free software. It comes without any warranty, to
the extent permitted by applicable law. You can redistribute it
and/or modify it under the terms of the Do What The Fuck You Want
To Public License, Version 2, as published by Sam Hocevar. See
http://sam.zoy.org/wtfpl/COPYING for more details.
*/
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
/*
This is for 32-bit Power arch, so the pointer versions are the same as
the non-pointer versions.
These are all leaf functions, so r3-r11 are legal to use.
*/
/* void SST_Atomic_Add(volatile int* x, int value) */
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
SST_Atomic_Add:
SST_Atomic_AddPtr:
SST_Atomic_AddReturn:
SST_Atomic_AddPtrReturn:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r6, 0, r5 /* r6 = *x */
add r3, r6, r4 /* return_value = r6 + value */
add r6, r6, r4 /* r6 += value */
stwcx. r6, 0, r5 /* try { *x = sum } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* void SST_Atomic_And(volatile int* x, int value) */
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
SST_Atomic_And:
SST_Atomic_AndReturn:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r6, 0, r5 /* r6 = *x */
and r3, r6, r4 /* return_value = r6 & value */
and r6, r6, r4 /* r6 &= value */
stwcx. r6, 0, r5 /* try { *x = result } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* void SST_Atomic_Or(volatile int* x, int value) */
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
SST_Atomic_Or:
SST_Atomic_OrReturn:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r6, 0, r5 /* r6 = *x */
or r3, r6, r4 /* return_value = r6 | value */
or r6, r6, r4 /* r6 |= value */
stwcx. r6, 0, r5 /* try { *x = result } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* void SST_Atomic_Xor(volatile int* x, int value) */
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
SST_Atomic_Xor:
SST_Atomic_XorReturn:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r6, 0, r5 /* r6 = *x */
xor r3, r6, r4 /* return_value = r6 ^ value */
xor r6, r6, r4 /* r6 ^= value */
stwcx. r6, 0, r5 /* try { *x = result } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* void SST_Atomic_Not(volatile int* x) */
/* int SST_Atomic_NotReturn(volatile int* x) */
SST_Atomic_Not:
SST_Atomic_NotReturn:
mr r4, r3 /* r4 = x */
sync /* mem barrier */
1:
lwarx r3, 0, r4 /* r3 = *x */
nor r3, r3, r3 /* r5 = ~r5 */
stwcx. r3, 0, r4 /* try { *x = sum } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* int SST_Atomic_Exchange(volatile int* x, int value) */
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
SST_Atomic_Exchange:
SST_Atomic_ExchangePtr:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r3, 0, r5 /* try { return_value = *x */
stwcx. r4, 0, r5 /* *x = value } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
SST_Atomic_CAS:
SST_Atomic_CASPtr:
mr r6, r3 /* r6 = dest */
sync /* mem barrier */
1:
lwarx r3, 0, r6 /* return_value = *dest */
cmpw r4, r3 /* if(return_value != compare) */
bne- 2f /* return return_value; */
stwcx. r5, 0, r6 /* else { *dest = newValue } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
2:
blr
/* void* SST_Atomic_ExchangeAdd(volatile void* x, int value) */
/* int SST_Atomic_(volatile int* x, int value) */
SST_Atomic_ExchangeAdd:
SST_Atomic_ExchangeAddPtr:
mr r5, r3 /* r5 = x */
sync /* mem barrier */
1:
lwarx r3, 0, r5 /* r3 = *x */
add r6, r3, r4 /* r6 = *x + value */
stwcx. r6, 0, r5 /* try { *x = sum } */
bne- 1b /* retry? */
isync /* order future L/S instructions */
blr
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
SST_Atomic_LoadAcquire:
SST_Atomic_LoadAcquirePtr:
lw r3, 0, r3
isync /* order future L/S instructions */
blr
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
SST_Atomic_StoreRelease:
SST_Atomic_StoreReleasePtr:
lwsync
stw r4, 0, r3
blr

View File

@@ -0,0 +1,214 @@
/*
SST_Atomic_sparc.asm
Author: Patrick Baggett
Created: 12/28/2011
Purpose:
32-bit assembly for SPARCv9 processors (aka v8+)
Older SPARC CPUs (e.g. sun4m) are not supported.
Assembles with Solaris assembler or GNU as.
License:
This program is free software. It comes without any warranty, to
the extent permitted by applicable law. You can redistribute it
and/or modify it under the terms of the Do What The Fuck You Want
To Public License, Version 2, as published by Sam Hocevar. See
http://sam.zoy.org/wtfpl/COPYING for more details.
*/
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
/*
This is for 32-bit SPARC code, so the pointer versions are the same as
the non-pointer versions.
These are all leaf functions, so %o0-%o5 and %g1 are legal to use.
*/
/* void SST_Atomic_Add(volatile int* x, int value) */
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
SST_Atomic_Add:
SST_Atomic_AddPtr:
SST_Atomic_AddReturn:
SST_Atomic_AddPtrReturn:
ld [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
add %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_And(volatile int* x, int value) */
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
SST_Atomic_And:
SST_Atomic_AndReturn:
ld [%o0], %o4 /* o4 = *x */
1:
and %o1, %o4, %o5 /* o5 = *x & value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
and %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Or(volatile int* x, int value) */
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
SST_Atomic_Or:
SST_Atomic_OrReturn:
ld [%o0], %o4 /* o4 = *x */
1:
or %o1, %o4, %o5 /* o5 = *x | value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
or %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Xor(volatile int* x, int value) */
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
SST_Atomic_Xor:
SST_Atomic_XorReturn:
ld [%o0], %o4 /* o4 = *x */
1:
xor %o1, %o4, %o5 /* o5 = *x | value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
xor %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Not(volatile int* x) */
/* int SST_Atomic_NotReturn(volatile int* x) */
SST_Atomic_Not:
SST_Atomic_NotReturn:
ld [%o0], %o4 /* o4 = *x */
1:
not %o4, %o5 /* o5 = ~*x */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
not %o4, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* int SST_Atomic_Exchange(volatile int* x, int value) */
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
SST_Atomic_Exchange:
SST_Atomic_ExchangePtr:
swap [%o0], %o1 /* swap(*x, value) */
retl
mov %o1, %o0 /* move return value to %o0 */
/* NOTE: SPARCv9 manual calls the "swap" instruction deprecated. I don't
understand why. "cas" could fail, when really, we don't care if the
value changed, we care about ensuring that only 1 thread gets that value,
i.e. if 3 threads execute "swap" at the same time using 3 different values,
it is undefined which value is the final value, but each thread gets a
different value (thus, it is atomic).
*/
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
SST_Atomic_CAS:
SST_Atomic_CASPtr:
cas [%o0], %o1, %o2
retl
mov %o2, %o0
/* void* SST_Atomic_ExchangeAdd(volatile void* x, int value) */
/* int SST_Atomic_(volatile int* x, int value) */
SST_Atomic_ExchangeAdd:
SST_Atomic_ExchangeAddPtr:
ld [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
/* membar #StoreStore | #StoreLoad Force stores to complete before loads */
retl /* Success */
mov %o4, %o0 /* %o4 = value before add */
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
SST_Atomic_LoadAcquire:
SST_Atomic_LoadAcquirePtr:
ld [%o0], %o0
membar #LoadStore | #LoadLoad
retl
nop
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
SST_Atomic_StoreRelease:
SST_Atomic_StoreReleasePtr:
membar #LoadStore | #StoreStore
retl
st %o1, [%o0]

View File

@@ -0,0 +1,254 @@
/*
SST_Atomic_sparc64.asm
Author: Patrick Baggett
Created: 4/3/2012
Purpose:
64-bit assembly for SPARCv9 processors
Assembles with Solaris assembler or GNU as.
License:
This program is free software. It comes without any warranty, to
the extent permitted by applicable law. You can redistribute it
and/or modify it under the terms of the Do What The Fuck You Want
To Public License, Version 2, as published by Sam Hocevar. See
http://sam.zoy.org/wtfpl/COPYING for more details.
*/
.global SST_Atomic_Add
.global SST_Atomic_AddPtr
.global SST_Atomic_And
.global SST_Atomic_Or
.global SST_Atomic_Xor
.global SST_Atomic_Not
.global SST_Atomic_AddReturn
.global SST_Atomic_AddPtrReturn
.global SST_Atomic_AndReturn
.global SST_Atomic_OrReturn
.global SST_Atomic_XorReturn
.global SST_Atomic_NotReturn
.global SST_Atomic_ExchangeAdd
.global SST_Atomic_ExchangeAddPtr
.global SST_Atomic_Exchange
.global SST_Atomic_ExchangePtr
.global SST_Atomic_CAS
.global SST_Atomic_CASPtr
.global SST_Atomic_LoadAcquire
.global SST_Atomic_LoadAcquirePtr
.global SST_Atomic_StoreRelease
.global SST_Atomic_StoreReleasePtr
/*
This is for 64-bit SPARC code, so the pointer versions are different
from the non-pointer versions.
These are all leaf functions, so %o0-%o5 and %g1 are legal to use.
On TSO model, only "membar #StoreLoad" is a true barrier, the
rest are no-ops. This code assumes RMO.
*/
/* void SST_Atomic_Add(volatile int* x, int value) */
/* int SST_Atomic_AddReturn(volatile int* x, int value) */
SST_Atomic_Add:
SST_Atomic_AddReturn:
ld [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
add %o1, %o5, %o0 /* return sum */
/******************************************************************************/
/* void SST_Atomic_AddPtr(volatile void* x, int value) */
/* void* SST_Atomic_AddPtrReturn(volatile void* x, int value) */
SST_Atomic_AddPtr:
SST_Atomic_AddPtrReturn:
ldx [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
casx [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %xcc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
add %o1, %o5, %o0 /* return sum */
/* void SST_Atomic_And(volatile int* x, int value) */
/* int SST_Atomic_AndReturn(volatile int* x, int value) */
SST_Atomic_And:
SST_Atomic_AndReturn:
ld [%o0], %o4 /* o4 = *x */
1:
and %o1, %o4, %o5 /* o5 = *x & value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
and %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Or(volatile int* x, int value) */
/* int SST_Atomic_OrReturn(volatile int* x, int value) */
SST_Atomic_Or:
SST_Atomic_OrReturn:
ld [%o0], %o4 /* o4 = *x */
1:
or %o1, %o4, %o5 /* o5 = *x | value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
or %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Xor(volatile int* x, int value) */
/* int SST_Atomic_XorReturn(volatile int* x, int value) */
SST_Atomic_Xor:
SST_Atomic_XorReturn:
ld [%o0], %o4 /* o4 = *x */
1:
xor %o1, %o4, %o5 /* o5 = *x | value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
xor %o1, %o5, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* void SST_Atomic_Not(volatile int* x) */
/* int SST_Atomic_NotReturn(volatile int* x) */
SST_Atomic_Not:
SST_Atomic_NotReturn:
ld [%o0], %o4 /* o4 = *x */
1:
not %o4, %o5 /* o5 = ~*x */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
not %o4, %o0 /* could do nop, but delay slot is free.
then we can make the "Return" versions
use the same code path */
/* int SST_Atomic_Exchange(volatile int* x, int value) */
SST_Atomic_Exchange:
ld [%o0], %o4 /* o4 = *x */
1:
mov %o1, %o5 /* o5 = value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
membar #StoreStore | #LoadStore /* Ensure ordering */
retl /* Success */
mov %o4, %o0
/* int SST_Atomic_ExchangePtr(volatile void** x, void* value) */
SST_Atomic_ExchangePtr:
ldx [%o0], %o4 /* o4 = *x */
1:
mov %o1, %o5 /* o5 = value */
casx [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %xcc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
membar #StoreStore | #LoadStore /* Ensure ordering */
retl /* Success */
mov %o4, %o0
/* int SST_Atomic_CAS(volatile int* dest, int compare, int newValue) */
SST_Atomic_CAS:
cas [%o0], %o1, %o2
retl
mov %o2, %o0
SST_Atomic_CASPtr:
casx [%o0], %o1, %o2
retl
mov %o2, %o0
/* void* SST_Atomic_ExchangeAdd(volatile int* x, int value) */
SST_Atomic_ExchangeAdd:
ld [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
cas [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %icc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
mov %o4, %o0 /* %o4 = value before add */
SST_Atomic_ExchangeAddPtr:
ldx [%o0], %o4 /* o4 = *x */
1:
add %o1, %o4, %o5 /* o5 = *x + value */
casx [%o0], %o4, %o5 /* compare and swap */
cmp %o4, %o5 /* compare what was in mem with our value */
bne,a,pn %xcc, 1b /* not the same -> try again. */
mov %o5, %o4 /* restore o4, it should contain *x */
retl /* Success */
mov %o4, %o0 /* %o4 = value before add */
/* int SST_Atomic_LoadAcquire(const volatile int* src); */
SST_Atomic_LoadAcquire:
ld [%o0], %o0
membar #LoadStore | #LoadLoad
retl
nop
/* void* SST_Atomic_LoadAcquirePtr(const volatile void** src); */
SST_Atomic_LoadAcquirePtr:
ldx [%o0], %o0
membar #LoadStore | #LoadLoad
retl
nop
/* void SST_Atomic_StoreRelease(volatile int* dest, int value); */
SST_Atomic_StoreRelease:
membar #LoadStore | #StoreStore
retl
st %o1, [%o0]
/* void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value); */
SST_Atomic_StoreReleasePtr:
membar #LoadStore | #StoreStore
retl
stx %o1, [%o0]

View File

@@ -0,0 +1,237 @@
; SST_Atomic_x86-64-win64.asm
; Author: Patrick Baggett <ptbaggett@762studios.com>
; Created: 12/27/2011
;
; Purpose:
;
; 64-bit assembly for atomic operations using the Microsoft x64 ABI
; Assembles with YASM 1.1/NASM 2.0+
;
; License:
;
; This program is free software. It comes without any warranty, to
; the extent permitted by applicable law. You can redistribute it
; and/or modify it under the terms of the Do What The Fuck You Want
; To Public License, Version 2, as published by Sam Hocevar. See
; http://sam.zoy.org/wtfpl/COPYING for more details.
; Win64 and UNIX calling conventions differ for x64-64. I know, it's absurd.
; Because of this, there are separate implementation for Win64 and x64-64 UNIX (Linux, *BSD, Solaris, MacOS X, etc.)
; Here is the break down:
;
; REGISTER | Win64 | UNIX
;-----------+-----------+----------
;rax | Retval | Retval
;rdi | Not used | 1st arg
;rsi | Not used | 2nd arg
;rcx | 1st arg | 3rd arg
;rdx | 2nd arg | 4th arg
;r8 | 3rd arg | 5th arg
;r9 | 4th arg | 6th arg
[bits 64]
[segment .text]
; Win32 symbol names
[global SST_Atomic_Add]
[global SST_Atomic_AddPtr]
[global SST_Atomic_And]
[global SST_Atomic_Or]
[global SST_Atomic_Xor]
[global SST_Atomic_Not]
[global SST_Atomic_AddReturn]
[global SST_Atomic_AddPtrReturn]
[global SST_Atomic_AndReturn]
[global SST_Atomic_OrReturn]
[global SST_Atomic_XorReturn]
[global SST_Atomic_NotReturn]
[global SST_Atomic_ExchangeAdd]
[global SST_Atomic_ExchangeAddPtr]
[global SST_Atomic_Exchange]
[global SST_Atomic_ExchangePtr]
[global SST_Atomic_CAS]
[global SST_Atomic_CASPtr]
[global SST_Atomic_LoadAcquire]
[global SST_Atomic_LoadAcquirePtr]
[global SST_Atomic_StoreRelease]
[global SST_Atomic_StoreReleasePtr]
;=======================================================================
; Pointers and integers are different sized (64 bits vs 32 bits) so
; each SST_Atomic_XxxxPtr() function uses the full 64-bit registers.
;=======================================================================
;====================================================
; MICROSOFT WIN64 ABI
;====================================================
;void SST_Atomic_Add(volatile int* x, int value)
SST_Atomic_Add:
lock add [rcx], edx
ret
;void SST_Atomic_AddPtr(volatile void** x, int value)
SST_Atomic_AddPtr:
lock add [rcx], rdx
ret
; void SST_Atomic_And(volatile int* x, int value)
SST_Atomic_And:
lock and [rcx], edx
ret
; void SST_Atomic_Or(volatile int* x, int value)
SST_Atomic_Or:
lock or [rcx], edx
ret
; void SST_Atomic_Xor(volatile int* x, int value)
SST_Atomic_Xor:
lock xor [rcx], edx
ret
; void SST_Atomic_Not(volatile int* x)
SST_Atomic_Not:
lock not DWORD [rcx]
ret
; int SST_Atomic_AddReturn(volatile int* x, int value)
SST_Atomic_AddReturn:
mov eax, edx
lock xadd [rcx], eax ; eax contains orig value before xadd
add eax, edx ; now add 'value' (in edx) to get the sum that was stored in *x
ret
; int SST_Atomic_AddReturn(volatile void** x, int value)
SST_Atomic_AddPtrReturn:
mov rax, rdx
lock xadd [rcx], rax ; rax contains orig value before xadd
add rax, rdx ; now add 'value' (in rdx) to get the sum that was stored in *x
ret
; int SST_Atomic_AndReturn(volatile int* x, int value)
SST_Atomic_AndReturn:
mov eax, [rcx] ;eax = *x
mov r8d, eax ;r8d = *x
try_again_5:
and r8d, edx ;r8d <- *x & value
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
jz done_5 ;success -> *x was successfully updated. Its new value is in r8d.
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
jmp try_again_5
done_5:
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
ret
SST_Atomic_OrReturn:
mov eax, [rcx] ;eax = *x
mov r8d, eax ;r8d = *x
try_again_6:
or r8d, edx ;r8d <- *x | value
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
jz done_6 ;success -> *x was successfully updated. Its new value is in r8d.
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
jmp try_again_6
done_6:
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
ret
SST_Atomic_XorReturn:
mov eax, [rcx] ;eax = *x
mov r8d, eax ;r8d = *x
try_again_7:
xor r8d, edx ;r8d <- *x ^ value
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
jz done_7 ;success -> *x was successfully updated. Its new value is in r8d.
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
jmp try_again_7
done_7:
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
ret
; int SST_Atomic_NotReturn(volatile int* x)
SST_Atomic_NotReturn:
mov eax, [rcx] ;eax = *x
mov r8d, eax ;r8d = *x
try_again_8:
not r8d ;r8d <- ~*x
lock cmpxchg [rcx], r8d ;if(eax == *x) *x = r8d, else eax = *x
jz done_8 ;success -> *x was successfully updated. Its new value is in r8d.
mov r8d, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to r8d, let's try again
jmp try_again_8
done_8:
mov eax,r8d ;r8d has the value (*x & value), so mov to eax for return value
ret
;int SST_Atomic_Exchange(volatile int* x, int value)
SST_Atomic_Exchange:
lock xchg [rcx], edx
mov eax, edx
ret
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
SST_Atomic_ExchangePtr:
lock xchg [rcx], rdx
mov rax, rdx
ret
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
SST_Atomic_ExchangeAdd:
lock xadd [rcx], edx
mov eax, edx
ret
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
SST_Atomic_ExchangeAddPtr:
lock xadd [rcx], rdx
mov rax, rdx
ret
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
SST_Atomic_CAS:
;rcx <- dest
;edx <- compare
;r8d <- newValue
mov eax, edx
lock cmpxchg [rcx], r8d ;compare [rcx] with eax | swap with r8d if equal, else store [rcx] into eax
ret
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
SST_Atomic_CASPtr:
;rcx <- dest
;rdx <- compare
;r8 <- newValue
mov rax, rdx
lock cmpxchg [rcx], r8 ;compare [rcx] with rax | swap with r8 if equal, else store [rcx] into eax
ret
; int SST_Atomic_LoadAcquire(const volatile int* src);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_LoadAcquire:
mov eax, [rcx]
ret
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_LoadAcquirePtr:
mov rax, [rcx]
ret
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_StoreRelease:
mov [rcx], edx
ret
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_StoreReleasePtr:
mov [rcx], rdx
ret

View File

@@ -0,0 +1,285 @@
; SST_Atomic_x86-64.asm
; Author: Patrick Baggett <ptb1@762studios.com>
; Created: 12/20/2011
;
; Purpose:
;
; 64-bit assembly for atomic operations using the SysV x86-64 ABI
; Assembles with YASM 1.1/NASM 2.0+
;
; License:
;
; This program is free software. It comes without any warranty, to
; the extent permitted by applicable law. You can redistribute it
; and/or modify it under the terms of the Do What The Fuck You Want
; To Public License, Version 2, as published by Sam Hocevar. See
; http://sam.zoy.org/wtfpl/COPYING for more details.
; Win64 and UNIX calling conventions differ for x64-64. I know, it's absurd.
; Because of this, there are separate implementation for Win64 and x64-64 UNIX (Linux, *BSD, Solaris, MacOS X, etc.)
; Here is the break down:
;
; REGISTER | Win64 | UNIX
;-----------+-----------+----------
;rax | Retval | Retval
;rdi | Not used | 1st arg
;rsi | Not used | 2nd arg
;rcx | 1st arg | 3rd arg
;rdx | 2nd arg | 4th arg
;r8 | 3rd arg | 5th arg
;r9 | 4th arg | 6th arg
[bits 64]
[segment .text]
; ELF symbol names (UNIX)
[global SST_Atomic_Add]
[global SST_Atomic_AddPtr]
[global SST_Atomic_And]
[global SST_Atomic_Or]
[global SST_Atomic_Xor]
[global SST_Atomic_Not]
[global SST_Atomic_AddReturn]
[global SST_Atomic_AddPtrReturn]
[global SST_Atomic_AndReturn]
[global SST_Atomic_OrReturn]
[global SST_Atomic_XorReturn]
[global SST_Atomic_NotReturn]
[global SST_Atomic_ExchangeAdd]
[global SST_Atomic_ExchangeAddPtr]
[global SST_Atomic_Exchange]
[global SST_Atomic_ExchangePtr]
[global SST_Atomic_CAS]
[global SST_Atomic_CASPtr]
[global SST_Atomic_LoadAcquire]
[global SST_Atomic_LoadAcquirePtr]
[global SST_Atomic_StoreRelease]
[global SST_Atomic_StoreReleasePtr]
; Mach-O symbol names (MacOS X)
[global _SST_Atomic_Add]
[global _SST_Atomic_AddPtr]
[global _SST_Atomic_And]
[global _SST_Atomic_Or]
[global _SST_Atomic_Xor]
[global _SST_Atomic_Not]
[global _SST_Atomic_AddReturn]
[global _SST_Atomic_AddPtrReturn]
[global _SST_Atomic_AndReturn]
[global _SST_Atomic_OrReturn]
[global _SST_Atomic_XorReturn]
[global _SST_Atomic_NotReturn]
[global _SST_Atomic_ExchangeAdd]
[global _SST_Atomic_ExchangeAddPtr]
[global _SST_Atomic_Exchange]
[global _SST_Atomic_ExchangePtr]
[global _SST_Atomic_CAS]
[global _SST_Atomic_CASPtr]
[global _SST_Atomic_LoadAcquire]
[global _SST_Atomic_LoadAcquirePtr]
[global _SST_Atomic_StoreRelease]
[global _SST_Atomic_StoreReleasePtr]
;=======================================================================
; Pointers and integers are different sized (64 bits vs 32 bits) so
; each SST_Atomic_XxxxPtr() function uses the full 64-bit registers.
;=======================================================================
;====================================================
; UNIX/ELF ABI
;====================================================
;void SST_Atomic_Add(volatile int* x, int value)
SST_Atomic_Add:
_SST_Atomic_Add:
lock add [rdi], esi
ret
;void SST_Atomic_AddPtr(volatile void** x, int value)
SST_Atomic_AddPtr:
_SST_Atomic_AddPtr:
lock add [rdi], rsi
ret
; void SST_Atomic_And(volatile int* x, int value)
SST_Atomic_And:
_SST_Atomic_And:
lock and [rdi], esi
ret
; void SST_Atomic_Or(volatile int* x, int value)
SST_Atomic_Or:
_SST_Atomic_Or:
lock or [rdi], esi
ret
; void SST_Atomic_Xor(volatile int* x, int value)
SST_Atomic_Xor:
_SST_Atomic_Xor:
lock xor [rdi], esi
ret
; void SST_Atomic_Not(volatile int* x)
SST_Atomic_Not:
_SST_Atomic_Not:
lock not DWORD [rdi]
ret
; int SST_Atomic_AddReturn(volatile int* x, int value)
SST_Atomic_AddReturn:
_SST_Atomic_AddReturn:
mov eax, esi
lock xadd [rdi], eax ; eax contains orig value before xadd
add eax, esi ; now add 'value' (in esi) to get the sum that was stored in *x
ret
; int SST_Atomic_AddReturn(volatile void** x, int value)
SST_Atomic_AddPtrReturn:
_SST_Atomic_AddPtrReturn:
mov rax, rsi
lock xadd [rdi], rax ; rax contains orig value before xadd
add rax, rsi ; now add 'value' (in rsi) to get the sum that was stored in *x
ret
; int SST_Atomic_AndReturn(volatile int* x, int value)
SST_Atomic_AndReturn:
_SST_Atomic_AndReturn:
mov eax, [rdi] ;eax = *x
mov edx, eax ;edx = *x
try_again_1:
and edx, esi ;edx <- *x & value
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_1 ;success -> *x was successfully updated. It's new value is in edx
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
jmp try_again_1
done_1:
mov eax,edx ;edx has the value (*x & value), so mov to eax for return value
ret
SST_Atomic_OrReturn:
_SST_Atomic_OrReturn:
mov eax, [rdi] ;eax = *x
mov edx, eax ;edx = *x
try_again_2:
or edx, esi ;edx <- *x | value
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_2 ;success -> *x was successfully updated. It's new value is in edx
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
jmp try_again_2
done_2:
mov eax,edx ;edx has the value (*x | value), so mov to eax for return value
ret
SST_Atomic_XorReturn:
_SST_Atomic_XorReturn:
mov eax, [rdi] ;eax = *x
mov edx, eax ;edx = *x
try_again_3:
xor edx, esi ;edx <- *x ^ value
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_3 ;success -> *x was successfully updated. It's new value is in edx
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
jmp try_again_3
done_3:
mov eax,edx ;edx has the value (*x ^ value), so mov to eax for return value
ret
; int SST_Atomic_NotReturn(volatile int* x)
SST_Atomic_NotReturn:
_SST_Atomic_NotReturn:
mov eax, [rdi] ;eax = *x
mov edx, eax ;edx = *x
try_again_4:
not edx
lock cmpxchg [rdi], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_4 ;success -> *x was successfully updated. It's new value is in edx
mov edx, eax ;failure -> *x was changed previous, new value is in eax. Copy that new value to edx, lets try again
jmp try_again_4
done_4:
mov eax,edx ;edx has the value (~*x), so mov to eax for return value
ret
;int SST_Atomic_Exchange(volatile int* x, int value)
SST_Atomic_Exchange:
_SST_Atomic_Exchange:
lock xchg [rdi], esi
mov eax, esi
ret
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
SST_Atomic_ExchangePtr:
_SST_Atomic_ExchangePtr:
lock xchg [rdi], rsi
mov rax, rsi
ret
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
SST_Atomic_ExchangeAdd:
_SST_Atomic_ExchangeAdd:
lock xadd [rdi],esi
mov eax, esi
ret
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
SST_Atomic_ExchangeAddPtr:
_SST_Atomic_ExchangeAddPtr:
lock xadd [rdi],rsi
mov rax, rsi
ret
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
SST_Atomic_CAS:
_SST_Atomic_CAS:
;rdi <- dest
;rsi <- compare
;rdx <- newValue
mov eax, esi
lock cmpxchg [rdi], edx ;compare [rdi] with eax | swap with edx if equal, else store [rdi] into eax
ret
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
SST_Atomic_CASPtr:
_SST_Atomic_CASPtr:
;rdi <- dest
;rsi <- compare
;rdx <- newValue
mov rax, rsi
lock cmpxchg [rdi], rdx ;compare [rdi] with rax | swap with rdx if equal, else store [rdi] into rax
ret
; int SST_Atomic_LoadAcquire(const volatile int* src);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_LoadAcquire:
_SST_Atomic_LoadAcquire:
mov eax, [rdi]
ret
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_LoadAcquirePtr:
_SST_Atomic_LoadAcquirePtr:
mov rax, [rdi]
ret
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_StoreRelease:
_SST_Atomic_StoreRelease:
mov [rdi], esi
ret
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
SST_Atomic_StoreReleasePtr:
_SST_Atomic_StoreReleasePtr:
mov [rdi], rsi
ret

View File

@@ -0,0 +1,271 @@
; SST_Atomic_x86.asm
; Author: Patrick Baggett <ptbaggett@762studios.com>
; Created: 11/15/2011
;
; Purpose:
;
; 32-bit assembly for atomic operations for x86 CPUs
; Assembles with YASM 1.1/NASM 2.0+
;
; License:
;
; This program is free software. It comes without any warranty, to
; the extent permitted by applicable law. You can redistribute it
; and/or modify it under the terms of the Do What The Fuck You Want
; To Public License, Version 2, as published by Sam Hocevar. See
; http://sam.zoy.org/wtfpl/COPYING for more details.
[bits 32]
[segment .text]
; Win32 symbol names
[global _SST_Atomic_Add]
[global _SST_Atomic_AddPtr]
[global _SST_Atomic_And]
[global _SST_Atomic_Or]
[global _SST_Atomic_Xor]
[global _SST_Atomic_Not]
[global _SST_Atomic_AddReturn]
[global _SST_Atomic_AddPtrReturn]
[global _SST_Atomic_AndReturn]
[global _SST_Atomic_OrReturn]
[global _SST_Atomic_XorReturn]
[global _SST_Atomic_NotReturn]
[global _SST_Atomic_ExchangeAdd]
[global _SST_Atomic_ExchangeAddPtr]
[global _SST_Atomic_Exchange]
[global _SST_Atomic_ExchangePtr]
[global _SST_Atomic_CAS]
[global _SST_Atomic_CASPtr]
[global _SST_Atomic_LoadAcquire]
[global _SST_Atomic_LoadAcquirePtr]
[global _SST_Atomic_StoreRelease]
[global _SST_Atomic_StoreReleasePtr]
; ELF symbol names
[global SST_Atomic_Add]
[global SST_Atomic_AddPtr]
[global SST_Atomic_And]
[global SST_Atomic_Or]
[global SST_Atomic_Xor]
[global SST_Atomic_Not]
[global SST_Atomic_AddReturn]
[global SST_Atomic_AddPtrReturn]
[global SST_Atomic_AndReturn]
[global SST_Atomic_OrReturn]
[global SST_Atomic_XorReturn]
[global SST_Atomic_NotReturn]
[global SST_Atomic_ExchangeAdd]
[global SST_Atomic_ExchangeAddPtr]
[global SST_Atomic_Exchange]
[global SST_Atomic_ExchangePtr]
[global SST_Atomic_CAS]
[global SST_Atomic_CASPtr]
[global SST_Atomic_LoadAcquire]
[global SST_Atomic_LoadAcquirePtr]
[global SST_Atomic_StoreRelease]
[global SST_Atomic_StoreReleasePtr]
;=======================================================================
; Since pointers and integers are the same on GCC/MSVC, many of the
; integer/pointer code paths are the same. This of course only holds true
; for 32-bit compiles on x86, which is exactly this file.
;=======================================================================
;void SST_Atomic_Add(volatile int* x, int value)
;void SST_Atomic_AddPtr(volatile void* x, uintptr_t value)
_SST_Atomic_AddPtr:
_SST_Atomic_Add:
SST_Atomic_AddPtr:
SST_Atomic_Add:
mov edx, [esp+4]
mov eax, [esp+8]
lock add [edx], eax
ret
; void SST_Atomic_And(volatile int* x, int value)
_SST_Atomic_And:
SST_Atomic_And:
mov edx, [esp+4]
mov eax, [esp+8]
lock and [edx], eax
ret
; void SST_Atomic_Or(volatile int* x, int value)
_SST_Atomic_Or:
SST_Atomic_Or:
mov edx, [esp+4]
mov eax, [esp+8]
lock or [edx], eax
ret
; void SST_Atomic_Xor(volatile int* x, int value)
_SST_Atomic_Xor:
SST_Atomic_Xor:
mov edx, [esp+4]
mov eax, [esp+8]
lock xor [edx], eax
ret
; void SST_Atomic_Not(volatile int* x)
_SST_Atomic_Not:
SST_Atomic_Not:
mov edx, [esp+4]
lock not DWORD [edx]
ret
; int SST_Atomic_AddReturn(volatile int* x, int value)
_SST_Atomic_AddReturn:
_SST_Atomic_AddPtrReturn:
SST_Atomic_AddReturn:
SST_Atomic_AddPtrReturn:
mov edx, [esp+4]
mov eax, [esp+8]
lock xadd [edx], eax
add eax, [esp+8]
ret
; int SST_Atomic_AndReturn(volatile int* x, int value)
_SST_Atomic_AndReturn:
SST_Atomic_AndReturn:
push ebx ;save ebx
mov ecx, [esp+8] ;ecx = x
mov ebx, [esp+12] ;ebx = value
mov edx, [ecx] ;edx = *x
mov eax, edx ;eax = edx = *x
try_again_1:
and edx, ebx ;edx = *x & value
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_1 ;*x was modified. It's new value is in edx
mov edx, eax ;copy that new value to edx, lets try again
jmp try_again_1
done_1:
xchg edx, eax ;edx has the value (*x & value), so swap edx and eax
pop ebx ;restore ebx
ret
_SST_Atomic_OrReturn:
SST_Atomic_OrReturn:
push ebx ;save ebx
mov ecx, [esp+8] ;ecx = x
mov ebx, [esp+12] ;ebx = value
mov edx, [ecx] ;edx = *x
mov eax, edx ;eax = edx = *x
try_again_2:
or edx, ebx ;edx = *x | value
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_2 ;*x was modified. It's new value is in edx
mov edx, eax ;copy that new value to edx, lets try again
jmp try_again_2
done_2:
xchg edx, eax ;edx has the value (*x | value), so swap edx and eax
pop ebx ;restore ebx
ret
_SST_Atomic_XorReturn:
SST_Atomic_XorReturn:
push ebx ;save ebx
mov ecx, [esp+8] ;ecx = x
mov ebx, [esp+12] ;ebx = value
mov edx, [ecx] ;edx = *x
mov eax, edx ;eax = edx = *x
try_again_3:
xor edx, ebx ;edx = *x ^ value
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_3 ;*x was modified. It's new value is in edx
mov edx, eax ;copy that new value to edx, lets try again
jmp try_again_3
done_3:
xchg edx, eax ;edx has the value (*x ^ value), so swap edx and eax
pop ebx ;restore ebx
ret
; int SST_Atomic_NotReturn(volatile int* x)
; =============================================
_SST_Atomic_NotReturn:
SST_Atomic_NotReturn:
mov ecx, [esp+4] ;ecx = x
mov edx, [ecx] ;edx = *x
mov eax, edx ;eax = edx = *x
try_again_4:
not edx ;edx = ~(*x)
lock cmpxchg [ecx], edx ;if(eax == *x) *x = edx, else eax = *x
jz done_4 ;*x was modified. It's new value is in edx
mov edx, eax ;copy that new value to edx, lets try again
jmp try_again_4
done_4:
xchg edx, eax ;edx has the value (~*x), so swap edx and eax
ret
;int SST_Atomic_Exchange(volatile int* x, int value)
;void* SST_Atomic_ExchangePtr(volatile void** x, void* value)
_SST_Atomic_Exchange:
SST_Atomic_Exchange:
_SST_Atomic_ExchangePtr:
SST_Atomic_ExchangePtr:
mov edx, [esp+4]
mov eax, [esp+8]
lock xchg [edx], eax
ret
;int SST_Atomic_ExchangeAdd(volatile int* x, int value);
;void* SST_Atomic_ExchangeAddPtr(volatile void** x, int value);
_SST_Atomic_ExchangeAdd:
SST_Atomic_ExchangeAdd:
_SST_Atomic_ExchangeAddPtr:
SST_Atomic_ExchangeAddPtr:
mov edx, [esp+4]
mov eax, [esp+8]
lock xadd [edx],eax ;Save [edx] to temp, store [edx]+eax into [edx], save
ret
;int SST_Atomic_CAS(int* dest, int compare, int newValue);
;void* SST_Atomic_CASPtr(void** dest, void* compare, void* newValue);
_SST_Atomic_CAS:
SST_Atomic_CAS:
_SST_Atomic_CASPtr:
SST_Atomic_CASPtr:
mov edx, [esp+4] ;edx <- dest
mov eax, [esp+8] ;eax <- compare
mov ecx, [esp+12] ;ecx <- newValue
lock cmpxchg [edx], ecx ;compare [edx] with eax | swap with ecx if equal, else store [edx] into eax
ret
; int SST_Atomic_LoadAcquire(const volatile int* src);
; void* SST_Atomic_LoadAcquirePtr(const volatile void** src);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
_SST_Atomic_LoadAcquire:
SST_Atomic_LoadAcquire:
_SST_Atomic_LoadAcquirePtr:
SST_Atomic_LoadAcquirePtr:
mov eax, [esp+4] ;eax = ptr
mov eax, [eax] ;eax = *ptr
ret
; void SST_Atomic_StoreRelease(volatile int* dest, int value);
; void SST_Atomic_StoreReleasePtr(volatile void** dest, void* value);
; NOTE: x86 has strong memory ordering, thus no barrier is necessary, except for compiler barrier to reordering instructions
_SST_Atomic_StoreRelease:
SST_Atomic_StoreRelease:
_SST_Atomic_StoreReleasePtr:
SST_Atomic_StoreReleasePtr:
mov edx, [esp+4] ;edx = dest
mov eax, [esp+8] ;eax = value
mov [edx], eax
ret

View File

@@ -0,0 +1,151 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{43ED481F-C4A3-40ED-81A3-46C43DC4EB1E}</ProjectGuid>
<RootNamespace>libsstatomic</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\vsyasm.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>11.0.50727.1</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>$(SolutionDir)\Lib\x86\</OutDir>
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
<TargetName>$(ProjectName)-debug</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<OutDir>$(SolutionDir)\Lib\x64\</OutDir>
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
<TargetName>$(ProjectName)-debug</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>$(SolutionDir)\Lib\x86\</OutDir>
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<OutDir>$(SolutionDir)\Lib\x64\</OutDir>
<IntDir>$(SolutionDir)Intermediate\$(Configuration)\$(Platform)\$(ProjectName)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
</ClCompile>
<Lib />
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Lib />
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<YASM Include="SST_Atomic_x86-64-win64.asm">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
</YASM>
<YASM Include="SST_Atomic_x86.asm">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</YASM>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Lib\Include\SST\SST_Atomic.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\vsyasm.targets" />
</ImportGroup>
</Project>

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<YASM Include="SST_Atomic_x86-64-win64.asm">
<Filter>Source Files</Filter>
</YASM>
<YASM Include="SST_Atomic_x86.asm">
<Filter>Source Files</Filter>
</YASM>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Lib\Include\SST\SST_Atomic.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

Binary file not shown.