Skip to content

Commit

Permalink
Merge pull request #622 from intel/push-2023-12-13
Browse files Browse the repository at this point in the history
Push 2023 12 13
  • Loading branch information
rdementi authored Dec 14, 2023
2 parents f7be22a + d4c0972 commit f632877
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 229 deletions.
23 changes: 0 additions & 23 deletions src/MacMSRDriver/MSRKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,4 @@ typedef struct {
uint32_t msr_num;
} pcm_msr_data_t;

/*
// The topologyEntry struct that is used by PCM
typedef struct{
uint32_t os_id;
uint32_t socket;
uint32_t core_id;
} topologyEntry;
// A kernel version of the topology entry structure. It has
// an extra unused int to explicitly align the struct on a 64bit
// boundary, preventing the compiler from adding extra padding.
enum {
kOpenDriver,
kCloseDriver,
kReadMSR,
kWriteMSR,
kBuildTopology,
kGetNumInstances,
kIncrementNumInstances,
kDecrementNumInstances,
kNumberOfMethods
};
*/
#endif
26 changes: 15 additions & 11 deletions src/MacMSRDriver/PcmMsr/PcmMsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdmsr(msr,lo,hi) \
asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
#define cpuid(func1, func2, a, b, c, d) \
asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));

extern "C" {
extern void mp_rendezvous_no_intrs(void (*func)(void *),
Expand Down Expand Up @@ -58,14 +56,18 @@ void cpuWriteMSR(void* pIDatas){

void cpuGetTopoData(void* pTopos){
TopologyEntry* entries = (TopologyEntry*)pTopos;
int cpu = cpu_number();
int info[4];
entries[cpu].os_id = cpu;
cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
entries[cpu].socket = info[3] >> info[0] & 0xF;

cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
entries[cpu].core_id = info[3] >> info[0] & 0xF;
const int cpu = cpu_number();

TopologyEntry & entry = entries[cpu];
entry.os_id = cpu;

uint32 smtMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;
initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
PCM_CPUID_INFO cpuid_args;
pcm_cpuid(0xb, 0x0, cpuid_args);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
}

OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)
Expand Down Expand Up @@ -188,8 +190,10 @@ IOReturn PcmMsrDriverClassName::buildTopology(TopologyEntry* odata, uint32_t inp

for(uint32_t i = 0; i < num_cores && i < input_num_cores; i++)
{
odata[i].core_id = topologies[i].core_id;
odata[i].os_id = topologies[i].os_id;
odata[i].thread_id = topologies[i].thread_id;
odata[i].core_id = topologies[i].core_id;
odata[i].tile_id = topologies[i].tile_id;
odata[i].socket = topologies[i].socket;
}

Expand Down
92 changes: 9 additions & 83 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
}
#endif

/* Adding the new version of cpuid with leaf and subleaf as an input */
void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
{
#ifdef _MSC_VER
__cpuidex(info.array, leaf, subleaf);
#else
__asm__ __volatile__ ("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
#endif
}

#ifdef __linux__
bool isNMIWatchdogEnabled(const bool silent);
bool keepNMIWatchdogEnabled();
Expand Down Expand Up @@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
socketIdMap_type socketIdMap;

PCM_CPUID_INFO cpuid_args;
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;

struct domain
{
Expand All @@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
std::unordered_map<int, domain> topologyDomainMap;
{
TemporalThreadAffinity aff0(0);
do

if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

int subleaf = 0;

std::vector<domain> topologyDomains;
if (max_cpuid >= 0x1F)
Expand Down Expand Up @@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
}
}

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

uint32 l2CacheMaskShift = 0;
#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif

#ifndef __APPLE__
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
{
Expand Down Expand Up @@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
}
else
{
const int apic_id = getAPICID(0xb);
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
}
};
#endif
Expand Down
34 changes: 16 additions & 18 deletions src/pcm-iio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ using namespace pcm;
#define SKX_UNC_SOCKETID_UBOX_LNID_OFFSET 0xC0
#define SKX_UNC_SOCKETID_UBOX_GID_OFFSET 0xD4

const uint8_t max_sockets = 4;
static const std::string iio_stack_names[6] = {
"IIO Stack 0 - CBDMA/DMI ",
"IIO Stack 1 - PCIe0 ",
Expand Down Expand Up @@ -239,8 +238,7 @@ struct iio_counter : public counter {
std::vector<result_content> data;
};

//TODO: remove binding to stacks amount
result_content results(max_sockets, stack_content(12, ctr_data()));
result_content results;

typedef struct
{
Expand Down Expand Up @@ -1444,6 +1442,7 @@ void print_usage(const string& progname)
cout << " -csv-delimiter=<value> | /csv-delimiter=<value> => set custom csv delimiter\n";
cout << " -human-readable | /human-readable => use human readable format for output (for csv only)\n";
cout << " -root-port | /root-port => add root port devices to output (for csv only)\n";
cout << " -list | --list => provide platform topology info\n";
cout << " -i[=number] | /i[=number] => allow to determine number of iterations\n";
cout << " Examples:\n";
cout << " " << progname << " 1.0 -i=10 => print counters every second 10 times and exit\n";
Expand All @@ -1456,22 +1455,18 @@ PCM_MAIN_NOTHROW;

int mainThrows(int argc, char * argv[])
{
if(print_version(argc, argv))
if (print_version(argc, argv))
exit(EXIT_SUCCESS);

null_stream nullStream;
check_and_set_silent(argc, argv, nullStream);

set_signal_handlers();

std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION << "\n";
std::cout << "\n This utility measures IIO information\n\n";

string program = string(argv[0]);

vector<struct iio_counter> counters;
PCIDB pciDB;
load_PCIDB(pciDB);
bool csv = false;
bool human_readable = false;
bool show_root_port = false;
Expand All @@ -1480,11 +1475,9 @@ int mainThrows(int argc, char * argv[])
double delay = PCM_DELAY_DEFAULT;
bool list = false;
MainLoop mainLoop;
PCM * m = PCM::getInstance();
iio_evt_parse_context evt_ctx;
// Map with metrics names.
map<string,std::pair<h_id,std::map<string,v_id>>> nameMap;
map<string,uint32_t> opcodeFieldMap;

while (argc > 1) {
argv++;
Expand All @@ -1511,7 +1504,7 @@ int mainThrows(int argc, char * argv[])
else if (check_argument_equals(*argv, {"-human-readable", "/human-readable"})) {
human_readable = true;
}
else if (check_argument_equals(*argv, {"--list"})) {
else if (check_argument_equals(*argv, {"-list", "--list"})) {
list = true;
}
else if (check_argument_equals(*argv, {"-root-port", "/root-port"})) {
Expand All @@ -1526,13 +1519,14 @@ int mainThrows(int argc, char * argv[])
}
}

set_signal_handlers();

print_cpu_details();

//TODO: remove binding to max sockets count.
if (m->getNumSockets() > max_sockets) {
cerr << "Only systems with up to " << max_sockets << " sockets are supported! Program aborted\n";
exit(EXIT_FAILURE);
}
PCM * m = PCM::getInstance();

PCIDB pciDB;
load_PCIDB(pciDB);

auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUModel(), m->getNumSockets());
if (!mapping) {
Expand Down Expand Up @@ -1568,6 +1562,7 @@ int mainThrows(int argc, char * argv[])
exit(EXIT_FAILURE);
}

map<string,uint32_t> opcodeFieldMap;
opcodeFieldMap["opcode"] = PCM::OPCODE;
opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT;
opcodeFieldMap["umask"] = PCM::UMASK;
Expand Down Expand Up @@ -1600,8 +1595,11 @@ int mainThrows(int argc, char * argv[])
exit(EXIT_FAILURE);
}

//print_nameMap(nameMap);
//TODO: Taking from cli
#ifdef PCM_DEBUG
print_nameMap(nameMap);
#endif

results.resize(m->getNumSockets(), stack_content(m->getMaxNumOfIIOStacks(), ctr_data()));

mainLoop([&]()
{
Expand Down
Loading

0 comments on commit f632877

Please sign in to comment.