diff --git a/src/MacMSRDriver/MSRKernel.h b/src/MacMSRDriver/MSRKernel.h
index 0667d21c..808d4a0b 100644
--- a/src/MacMSRDriver/MSRKernel.h
+++ b/src/MacMSRDriver/MSRKernel.h
@@ -13,27 +13,4 @@ typedef struct {
     uint32_t msr_num;
 } pcm_msr_data_t;
 
-/*
-// The topologyEntry struct that is used by PCM
-typedef struct{
-    uint32_t os_id;
-    uint32_t socket;
-    uint32_t core_id;
-} topologyEntry;
-
-// A kernel version of the topology entry structure. It has
-// an extra unused int to explicitly align the struct on a 64bit
-// boundary, preventing the compiler from adding extra padding.
-enum {
-    kOpenDriver,
-    kCloseDriver,
-    kReadMSR,
-    kWriteMSR,
-    kBuildTopology,
-    kGetNumInstances,
-    kIncrementNumInstances,
-    kDecrementNumInstances,
-    kNumberOfMethods 
-};
-*/
 #endif
diff --git a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp
index dd008cd8..3f51b740 100644
--- a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp
+++ b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp
@@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
 asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
 #define rdmsr(msr,lo,hi) \
 asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
-#define cpuid(func1, func2, a, b, c, d) \
-asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));
 
 extern "C" {
     extern void mp_rendezvous_no_intrs(void (*func)(void *),
@@ -58,14 +56,18 @@ void cpuWriteMSR(void* pIDatas){
 
 void cpuGetTopoData(void* pTopos){
     TopologyEntry* entries = (TopologyEntry*)pTopos;
-    int cpu = cpu_number();
-    int info[4];
-    entries[cpu].os_id = cpu;
-    cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
-    entries[cpu].socket = info[3] >> info[0] & 0xF;
-
-    cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
-    entries[cpu].core_id = info[3] >> info[0] & 0xF;
+    const int cpu = cpu_number();
+
+    TopologyEntry & entry = entries[cpu];
+    entry.os_id = cpu;
+
+    uint32 smtMaskWidth = 0;
+    uint32 coreMaskWidth = 0;
+    uint32 l2CacheMaskShift = 0;
+    initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
+    PCM_CPUID_INFO cpuid_args;
+    pcm_cpuid(0xb, 0x0, cpuid_args);
+    fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
 }
 
 OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)
@@ -188,8 +190,10 @@ IOReturn PcmMsrDriverClassName::buildTopology(TopologyEntry* odata, uint32_t inp
 
     for(uint32_t i = 0; i < num_cores && i < input_num_cores; i++)
     {
-        odata[i].core_id = topologies[i].core_id;
         odata[i].os_id = topologies[i].os_id;
+        odata[i].thread_id = topologies[i].thread_id;
+        odata[i].core_id = topologies[i].core_id;
+        odata[i].tile_id = topologies[i].tile_id;
         odata[i].socket = topologies[i].socket;
     }
 
diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp
index bdd8417c..e2b4609e 100644
--- a/src/cpucounters.cpp
+++ b/src/cpucounters.cpp
@@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
 }
 #endif
 
-/* Adding the new version of cpuid with leaf and subleaf as an input */
-void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
-{
-    #ifdef _MSC_VER
-    __cpuidex(info.array, leaf, subleaf);
-    #else
-    __asm__ __volatile__ ("cpuid" : \
-                          "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
-    #endif
-}
-
 #ifdef __linux__
 bool isNMIWatchdogEnabled(const bool silent);
 bool keepNMIWatchdogEnabled();
@@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
     socketIdMap_type socketIdMap;
 
     PCM_CPUID_INFO cpuid_args;
-    // init constants for CPU topology leaf 0xB
-    // adapted from Topology Enumeration Reference code for Intel 64 Architecture
-    // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
-    int wasCoreReported = 0, wasThreadReported = 0;
-    int subleaf = 0, levelType, levelShift;
-    //uint32 coreSelectMask = 0, smtSelectMask = 0;
     uint32 smtMaskWidth = 0;
-    //uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
-    uint32 corePlusSMTMaskWidth = 0;
     uint32 coreMaskWidth = 0;
+    uint32 l2CacheMaskShift = 0;
 
     struct domain
     {
@@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
     std::unordered_map<int, domain> topologyDomainMap;
     {
         TemporalThreadAffinity aff0(0);
-        do
+
+        if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
         {
-            pcm_cpuid(0xb, subleaf, cpuid_args);
-            if (cpuid_args.array[1] == 0)
-            { // if EBX ==0 then this subleaf is not valid, we can exit the loop
-                break;
-            }
-            levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
-            levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
-            switch (levelType)
-            {
-            case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
-                smtMaskWidth = levelShift;
-                wasThreadReported = 1;
-                break;
-            case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
-                corePlusSMTMaskWidth = levelShift;
-                wasCoreReported = 1;
-                break;
-            default:
-                break;
-            }
-            subleaf++;
-        } while (1);
+            std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
+            return false;
+        }
+
+        int subleaf = 0;
 
         std::vector<domain> topologyDomains;
         if (max_cpuid >= 0x1F)
@@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
         }
     }
 
-    if (wasThreadReported && wasCoreReported)
-    {
-        coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
-    }
-    else if (!wasCoreReported && wasThreadReported)
-    {
-        coreMaskWidth = smtMaskWidth;
-    }
-    else
-    {
-        std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
-        return false;
-    }
-
-    (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
-
-    uint32 l2CacheMaskShift = 0;
-#ifdef PCM_DEBUG_TOPOLOGY
-    uint32 threadsSharingL2;
-#endif
-    uint32 l2CacheMaskWidth;
-
-    pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
-    l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
-#ifdef PCM_DEBUG_TOPOLOGY
-    threadsSharingL2 = l2CacheMaskWidth;
-#endif
-    for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
-    {
-        l2CacheMaskShift++;
-    }
-#ifdef PCM_DEBUG_TOPOLOGY
-    std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
-              << " [the most significant bit = " << l2CacheMaskShift << "]\n";
-#endif
-
 #ifndef __APPLE__
     auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
     {
@@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
         }
         else
         {
-            const int apic_id = getAPICID(0xb);
-            entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
-            entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
-            entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
-            entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
+            fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
         }
     };
 #endif
diff --git a/src/topologyentry.h b/src/topologyentry.h
index 55647c3e..39ffe153 100644
--- a/src/topologyentry.h
+++ b/src/topologyentry.h
@@ -69,5 +69,86 @@ struct PCM_API TopologyEntry // describes a core
     }
 };
 
+inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id)
+{
+    entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
+    entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
+    entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
+    entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
+}
+
+inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
+{
+    // init constants for CPU topology leaf 0xB
+    // adapted from Topology Enumeration Reference code for Intel 64 Architecture
+    // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
+    int wasCoreReported = 0, wasThreadReported = 0;
+    PCM_CPUID_INFO cpuid_args;
+    if (true)
+    {
+        uint32 corePlusSMTMaskWidth = 0;
+        int subleaf = 0, levelType, levelShift;
+        do
+        {
+            pcm_cpuid(0xb, subleaf, cpuid_args);
+            if (cpuid_args.array[1] == 0)
+            { // if EBX ==0 then this subleaf is not valid, we can exit the loop
+                break;
+            }
+            levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
+            levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
+            switch (levelType)
+            {
+            case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
+                smtMaskWidth = levelShift;
+                wasThreadReported = 1;
+                break;
+            case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
+                corePlusSMTMaskWidth = levelShift;
+                wasCoreReported = 1;
+                break;
+            default:
+                break;
+            }
+            subleaf++;
+        } while (1);
+
+        if (wasThreadReported && wasCoreReported)
+        {
+            coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
+        }
+        else if (!wasCoreReported && wasThreadReported)
+        {
+            coreMaskWidth = smtMaskWidth;
+        }
+        else
+        {
+            return false;
+        }
+
+        (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
+
+    #ifdef PCM_DEBUG_TOPOLOGY
+        uint32 threadsSharingL2;
+    #endif
+        uint32 l2CacheMaskWidth;
+
+        pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
+        l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
+    #ifdef PCM_DEBUG_TOPOLOGY
+        threadsSharingL2 = l2CacheMaskWidth;
+    #endif
+        for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
+        {
+            l2CacheMaskShift++;
+        }
+    #ifdef PCM_DEBUG_TOPOLOGY
+        std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
+                << " [the most significant bit = " << l2CacheMaskShift << "]\n";
+    #endif
+    }
+    return true;
+}
+
 }
 
diff --git a/src/types.h b/src/types.h
index ba70c223..884c219c 100644
--- a/src/types.h
+++ b/src/types.h
@@ -20,9 +20,11 @@
 #include <sstream>
 #include <iomanip>
 #include <string.h>
+#include <assert.h>
 
 #ifdef _MSC_VER
 #include <windows.h>
+#include <intrin.h>
 #endif
 
 #endif // #ifndef KERNEL
@@ -1434,6 +1436,120 @@ struct MCFGHeader
 
 #endif // #ifndef KERNEL
 
+
+inline uint32 build_bit_ui(uint32 beg, uint32 end)
+{
+    assert(end <= 31);
+    uint32 myll = 0;
+    if (end > 31)
+    {
+        end = 31;
+    }
+    if (beg > 31)
+    {
+        return 0;
+    }
+    if (end == 31)
+    {
+        myll = (uint32)(-1);
+    }
+    else
+    {
+        myll = (1 << (end + 1)) - 1;
+    }
+    myll = myll >> beg;
+    return myll;
+}
+
+inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end)
+{
+    uint32 myll = 0;
+    uint32 beg1, end1;
+
+    // Let the user reverse the order of beg & end.
+    if (beg <= end)
+    {
+        beg1 = beg;
+        end1 = end;
+    }
+    else
+    {
+        beg1 = end;
+        end1 = beg;
+    }
+    myll = myin >> beg1;
+    myll = myll & build_bit_ui(beg1, end1);
+    return myll;
+}
+
+inline uint64 build_bit(uint32 beg, uint32 end)
+{
+    uint64 myll = 0;
+    if (end > 63)
+    {
+        end = 63;
+    }
+    if (end == 63)
+    {
+        myll = static_cast<uint64>(-1);
+    }
+    else
+    {
+        myll = (1LL << (end + 1)) - 1;
+    }
+    myll = myll >> beg;
+    return myll;
+}
+
+inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end)
+{
+    uint64 myll = 0;
+    uint32 beg1, end1;
+
+    // Let the user reverse the order of beg & end.
+    if (beg <= end)
+    {
+        beg1 = beg;
+        end1 = end;
+    }
+    else
+    {
+        beg1 = end;
+        end1 = beg;
+    }
+    myll = myin >> beg1;
+    myll = myll & build_bit(beg1, end1);
+    return myll;
+}
+
+union PCM_CPUID_INFO
+{
+    int array[4];
+    struct { unsigned int eax, ebx, ecx, edx; } reg;
+};
+
+inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info)
+{
+#ifdef _MSC_VER
+    // version for Windows
+    __cpuid(info.array, leaf);
+#else
+    __asm__ __volatile__("cpuid" : \
+        "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf));
+#endif
+}
+
+/* Adding the new version of cpuid with leaf and subleaf as an input */
+inline void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
+{
+    #ifdef _MSC_VER
+    __cpuidex(info.array, leaf, subleaf);
+    #else
+    __asm__ __volatile__ ("cpuid" : \
+                          "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
+    #endif
+}
+
 //IDX accel device/func number(PCIe).
 //The device/function number from SPR register guide.
 #define SPR_IDX_IAA_REGISTER_DEV_ADDR  (2)
diff --git a/src/utils.h b/src/utils.h
index f80478df..06ebd823 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -436,23 +436,6 @@ bool match(const std::string& subtoken, const std::string& sname, uint64* result
 
 uint64 read_number(const char* str);
 
-union PCM_CPUID_INFO
-{
-    int array[4];
-    struct { unsigned int eax, ebx, ecx, edx; } reg;
-};
-
-inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info)
-{
-#ifdef _MSC_VER
-    // version for Windows
-    __cpuid(info.array, leaf);
-#else
-    __asm__ __volatile__("cpuid" : \
-        "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf));
-#endif
-}
-
 inline void clear_screen() {
 #ifdef _MSC_VER
     system("cls");
@@ -461,83 +444,6 @@ inline void clear_screen() {
 #endif
 }
 
-inline uint32 build_bit_ui(uint32 beg, uint32 end)
-{
-    assert(end <= 31);
-    uint32 myll = 0;
-    if (end == 31)
-    {
-        myll = (uint32)(-1);
-    }
-    else
-    {
-        myll = (1 << (end + 1)) - 1;
-    }
-    myll = myll >> beg;
-    return myll;
-}
-
-inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end)
-{
-    uint32 myll = 0;
-    uint32 beg1, end1;
-
-    // Let the user reverse the order of beg & end.
-    if (beg <= end)
-    {
-        beg1 = beg;
-        end1 = end;
-    }
-    else
-    {
-        beg1 = end;
-        end1 = beg;
-    }
-    myll = myin >> beg1;
-    myll = myll & build_bit_ui(beg1, end1);
-    return myll;
-}
-
-inline uint64 build_bit(uint32 beg, uint32 end)
-{
-    uint64 myll = 0;
-    if (end > 63)
-    {
-        end = 63;
-    }
-    if (end == 63)
-    {
-        myll = static_cast<uint64>(-1);
-    }
-    else
-    {
-        myll = (1LL << (end + 1)) - 1;
-    }
-    myll = myll >> beg;
-    return myll;
-}
-
-inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end)
-{
-    uint64 myll = 0;
-    uint32 beg1, end1;
-
-    // Let the user reverse the order of beg & end.
-    if (beg <= end)
-    {
-        beg1 = beg;
-        end1 = end;
-    }
-    else
-    {
-        beg1 = end;
-        end1 = beg;
-    }
-    myll = myin >> beg1;
-    myll = myll & build_bit(beg1, end1);
-    return myll;
-}
-
 #ifdef _MSC_VER
 
 #define PCM_MSR_DRV_NAME TEXT("\\\\.\\RDMSR")