Files
libsst/libsst-os/SST_CPU_Win32.c
2026-04-03 00:22:39 -05:00

355 lines
9.3 KiB
C

/*
SST_CPU_Win32.c
Author: Patrick Baggett <ptbaggett@762studios.com>
Created: 12/23/2011
Purpose:
libsst-os CPU querying functions for Win32 systems (Windows 7 or later)
License:
This program is free software. It comes without any warranty, to
the extent permitted by applicable law. You can redistribute it
and/or modify it under the terms of the Do What The Fuck You Want
To Public License, Version 2, as published by Sam Hocevar. See
http://sam.zoy.org/wtfpl/COPYING for more details.
*/
#include "Win32Private.h"
#include <malloc.h> /* _alloca() */
#include <SST/SST_CPU.h>
#include <SST/SST_Assert.h>
/*
SUPPORT FOR > 64 CPUS
----------------------
This code does its best to be compatible with large numbers of logical CPUs. Processor groups are
a bit weird, see MSDN (http://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx)
*/
/*************************************************************************/
static const uint8_t popbits[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
/*************************************************************************/
/* popcount of an 8-bit value */
static uint8_t popcount8(uint8_t v)
{
return popbits[v & 0x0Fu] + popbits[v>>4];
}
/*************************************************************************/
/* popcount on the Win32 KAFFINITY value (64-bit value) */
static int popcount(KAFFINITY mask)
{
int count = 0, i;
for(i=0; i<sizeof(KAFFINITY); i++)
{
/* No more bits set? */
if(!mask)
break;
count += popcount8(mask & 0xFF);
mask >>= 8;
}
return count;
}
/*************************************************************************/
int SST_OS_GetNumberCPUChips()
{
DWORD length = 0;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION* relat;
DWORD nr;
int nrChips = 0;
/* Query data structure size */
GetLogicalProcessorInformation(NULL, &length);
if(GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
/* Really get the info this time */
relat = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION*)_alloca((size_t)length);
if(GetLogicalProcessorInformation(relat, &length) == FALSE)
return -2;
/* Scan list */
nr = length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
while(nr > 0)
{
if(relat->Relationship == RelationProcessorPackage)
nrChips++;
relat++;
nr--;
}
return nrChips;
}
/*************************************************************************/
int SST_OS_GetNumberPhysicalCPUs(void)
{
DWORD length = 0;
char* ptr, *end;
int nrCores = 0;
/* Query data structure size */
GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &length);
if(GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
/* Really get the info this time */
ptr = (char*)_alloca((size_t)length);
end = ptr + length;
if(GetLogicalProcessorInformationEx(
RelationProcessorCore,
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr,
&length) == FALSE)
return -2;
while(ptr < end)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
nrCores++;
ptr += (size_t)info->Size;
}
return nrCores;
}
/*************************************************************************/
int SST_OS_GetNumberLogicalCPUs(void)
{
DWORD length = 0;
char* ptr, *end;
int nrThreads = 0;
/* Query data structure size */
GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &length);
if(GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
/* Really get the info this time */
ptr = (char*)_alloca((size_t)length);
end = ptr + length;
if(GetLogicalProcessorInformationEx(
RelationProcessorPackage,
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr,
&length) == FALSE)
return -2;
while(ptr < end)
{
WORD i;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
for(i=0; i<info->Processor.GroupCount; i++)
nrThreads += popcount(info->Processor.GroupMask[i].Mask);
ptr += info->Size;
}
return nrThreads;
}
/*************************************************************************/
int SST_OS_MapPhysicalToLogicalCPU(int physCpuId, int* logCpuIds)
{
DWORD length = 0;
char* ptr, *end;
WORD i;
int j;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info;
SST_OS_DebugAssert(logCpuIds != NULL, "The return array for logical CPUs may not be NULL");
/* Query data structure size */
GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &length);
if(GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -2;
/* Really get the info this time */
ptr = (char*)_alloca((size_t)length);
end = ptr + length;
if(GetLogicalProcessorInformationEx(
RelationProcessorCore,
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr,
&length) == FALSE)
return -3;
info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
#ifdef _DEBUG
{
/* Figure out how many elements in this array */
int nr = (int)(length / info->Size);
/* Bounds checking */
SST_OS_DebugAssert(!(physCpuId >= nr), "Array too small to contain logical CPUs. Use SST_OS_GetNumberLogicalCPUs() to determine correct size");
}
#endif
/* This union is large, so we can't just do info[index], we have to do
'base_address + size * index'. Ugly, but necessary. */
info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(ptr + (size_t)(info->Size*physCpuId));
j = 0; /* j = number of logical CPUs mapped */
for(i=0; i<info->Processor.GroupCount; i++)
{
/* Figure out base logical CPU ID for processor group. Usually 64 CPUs / group. */
int groupbase = (int)(info->Processor.GroupMask[i].Group * 8 * sizeof(KAFFINITY));
size_t k;
KAFFINITY mask = info->Processor.GroupMask[i].Mask;
/*
Algorithm: Each GROUP_AFFINITY structure has a mask. Scan through it and
if bit 'k' is set, then add 'k+groupbase' to the array.
*/
/* Scan through bits and add logical CPUs */
for(k=0; k<(8 * sizeof(KAFFINITY)); k++)
{
/* No more bits set */
if(!mask)
break;
/* Is this bit set? */
if(mask & 1)
{
/* Add the logical CPU to the array */
logCpuIds[j] = (int)k + groupbase;
j++;
}
/* Next bit... */
mask >>= 1;
}
}
/* Return the number of logical CPUs written to logCpuIds[] */
return j;
}
/*************************************************************************/
int SST_OS_GetCPUAffinity(unsigned char* cpuMaskReturn, unsigned int cpuMaskSize)
{
WORD maxGroup;
GROUP_AFFINITY aff;
SST_OS_DebugAssert(cpuMaskReturn != NULL, "CPU mask may not be NULL");
SST_OS_DebugAssert(cpuMaskSize > 0, "CPU mask may not be zero");
/* Calculate the maximum number of groups that can be stored using the space provided */
maxGroup = (WORD)(cpuMaskSize / sizeof(KAFFINITY));
if(cpuMaskSize % sizeof(KAFFINITY))
maxGroup += 1;
if(maxGroup == 0)
return -1;
memset(cpuMaskReturn, 0, cpuMaskSize);
GetThreadGroupAffinity(GetCurrentThread(), &aff);
/* Ensure the real processor group doesn't exceed provided space */
if(aff.Group < maxGroup)
{
uint32_t byteOffset = aff.Group * sizeof(KAFFINITY); /* i.e. where to begin in cpuMaskReturn[] */
uint32_t bytesLeft = cpuMaskSize - byteOffset; /* i.e. how many bytes are left */
uint32_t i;
KAFFINITY mask = aff.Mask;
unsigned char* maskReturn = cpuMaskReturn + byteOffset;
for(i=0; i<bytesLeft; i++)
{
if(!mask)
break;
maskReturn[i] = (uint8_t)(mask & 0xFF);
mask >>= 8;
}
}
return 1;
}
/*************************************************************************/
int SST_OS_SetCPUAffinity(const unsigned char* cpuMask, unsigned int cpuMaskSize)
{
uint8_t groupPopCount[4096/(sizeof(KAFFINITY) * 8)];
KAFFINITY groupMask[4096/(sizeof(KAFFINITY) * 8)];
uint8_t lowest = UINT8_MAX;
uint8_t lowestGroup = 0;
unsigned int i;
GROUP_AFFINITY aff;
SST_OS_DebugAssert(cpuMask != NULL, "CPU mask may not be NULL");
/* Windows only allows a thread to be a member of a single processor group, which makes sense
when you consider that migrating threads across NUMA nodes is a bad idea. Here, we select the
processor group that has the fewest bits set (except for 0...) */
if(cpuMaskSize > 4096) /* Disallow arbitrarily large masks. 4096 processors is fine, really */
cpuMaskSize = 4096;
memset(groupPopCount, 0, sizeof(groupPopCount));
memset(groupMask, 0, sizeof(groupMask));
/* Operate on each byte */
for(i=0; i<cpuMaskSize; i++)
{
uint8_t groupIndex = (uint8_t)(i / sizeof(KAFFINITY)); /* i is in bytes, sizeof() is in bytes */
uint8_t groupByte = (uint8_t)(i % 8);
/* Sum all popcount8() of the bytes that make up a processor group's mask */
groupPopCount[groupIndex] += popcount8(cpuMask[i]);
/* OR the 8-bit mask into KAFFINITY values (essentially merge them) */
groupMask[groupIndex] |= (KAFFINITY)cpuMask[i] << (KAFFINITY)(groupByte * 8);
/* Record the smallest group that is not 0 */
if(groupPopCount[groupIndex] > 0 && groupPopCount[groupIndex] < lowest)
lowest = groupIndex;
}
aff.Group = (WORD)((uint16_t)lowestGroup);
aff.Mask = groupMask[lowestGroup];
SetThreadGroupAffinity(GetCurrentThread(), &aff, NULL);
return 1;
}
/*************************************************************************/
void SST_OS_SetPreferredCPU(int logCpuId)
{
PROCESSOR_NUMBER ideal;
SST_OS_DebugAssert(logCpuId < SST_OS_GetNumberLogicalCPUs(), "Logical CPU ID is out of range");
ideal.Group = (WORD)logCpuId / (WORD)(sizeof(KAFFINITY)*8);
ideal.Number = (WORD)logCpuId % (WORD)(sizeof(KAFFINITY)*8);
ideal.Reserved = 0;
SetThreadIdealProcessorEx(GetCurrentThread(), &ideal, NULL);
}