Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osx topology experimental #620

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions src/MacMSRDriver/MSRKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,4 @@ typedef struct {
uint32_t msr_num;
} pcm_msr_data_t;

/*
// The topologyEntry struct that is used by PCM
typedef struct{
uint32_t os_id;
uint32_t socket;
uint32_t core_id;
} topologyEntry;

// A kernel version of the topology entry structure. It has
// an extra unused int to explicitly align the struct on a 64bit
// boundary, preventing the compiler from adding extra padding.
enum {
kOpenDriver,
kCloseDriver,
kReadMSR,
kWriteMSR,
kBuildTopology,
kGetNumInstances,
kIncrementNumInstances,
kDecrementNumInstances,
kNumberOfMethods
};
*/
#endif
26 changes: 15 additions & 11 deletions src/MacMSRDriver/PcmMsr/PcmMsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdmsr(msr,lo,hi) \
asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
#define cpuid(func1, func2, a, b, c, d) \
asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));

extern "C" {
extern void mp_rendezvous_no_intrs(void (*func)(void *),
Expand Down Expand Up @@ -58,14 +56,18 @@ void cpuWriteMSR(void* pIDatas){

void cpuGetTopoData(void* pTopos){
TopologyEntry* entries = (TopologyEntry*)pTopos;
int cpu = cpu_number();
int info[4];
entries[cpu].os_id = cpu;
cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
entries[cpu].socket = info[3] >> info[0] & 0xF;

cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
entries[cpu].core_id = info[3] >> info[0] & 0xF;
const int cpu = cpu_number();

TopologyEntry & entry = entries[cpu];
entry.os_id = cpu;

uint32 smtMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;
initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
PCM_CPUID_INFO cpuid_args;
pcm_cpuid(0xb, 0x0, cpuid_args);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
}

OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)
Expand Down Expand Up @@ -188,8 +190,10 @@ IOReturn PcmMsrDriverClassName::buildTopology(TopologyEntry* odata, uint32_t inp

for(uint32_t i = 0; i < num_cores && i < input_num_cores; i++)
{
odata[i].core_id = topologies[i].core_id;
odata[i].os_id = topologies[i].os_id;
odata[i].thread_id = topologies[i].thread_id;
odata[i].core_id = topologies[i].core_id;
odata[i].tile_id = topologies[i].tile_id;
odata[i].socket = topologies[i].socket;
}

Expand Down
92 changes: 9 additions & 83 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
}
#endif

/* Adding the new version of cpuid with leaf and subleaf as an input */
void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
{
#ifdef _MSC_VER
__cpuidex(info.array, leaf, subleaf);
#else
__asm__ __volatile__ ("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
#endif
}

#ifdef __linux__
bool isNMIWatchdogEnabled(const bool silent);
bool keepNMIWatchdogEnabled();
Expand Down Expand Up @@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
socketIdMap_type socketIdMap;

PCM_CPUID_INFO cpuid_args;
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;

struct domain
{
Expand All @@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
std::unordered_map<int, domain> topologyDomainMap;
{
TemporalThreadAffinity aff0(0);
do

if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

int subleaf = 0;

std::vector<domain> topologyDomains;
if (max_cpuid >= 0x1F)
Expand Down Expand Up @@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
}
}

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

uint32 l2CacheMaskShift = 0;
#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif

#ifndef __APPLE__
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
{
Expand Down Expand Up @@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
}
else
{
const int apic_id = getAPICID(0xb);
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
}
};
#endif
Expand Down
81 changes: 81 additions & 0 deletions src/topologyentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,86 @@ struct PCM_API TopologyEntry // describes a core
}
};

inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id)
{
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
}

inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
{
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
PCM_CPUID_INFO cpuid_args;
if (true)
{
uint32 corePlusSMTMaskWidth = 0;
int subleaf = 0, levelType, levelShift;
do
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif
}
return true;
}

}

Loading