diff --git a/bestla/CMakeLists.txt b/bestla/CMakeLists.txt
index 9b9fa4be7..9e59b1c41 100644
--- a/bestla/CMakeLists.txt
+++ b/bestla/CMakeLists.txt
@@ -88,14 +88,6 @@ install(
 
 if(WIN32)
 	target_compile_definitions(${PROJECT_NAME} INTERFACE _CRT_SECURE_NO_WARNINGS NOMINMAX)
-	target_compile_options(${PROJECT_NAME} INTERFACE /wd4068 /wd4849 /wd6262 /wd4702 /wd4100) 
-	#4068 ignore unroll and GCC flags
-	#4849 ignore collapse
-	#6262 ignore stack too large
-	#4702 unreachable code(false warning on constexpr condition)
-	#4100 unreferenced formal parameter
-
-	target_link_options(${PROJECT_NAME} INTERFACE /STACK:5242880) #Stack requires up to L2 cache size
 endif(WIN32)
 
 
diff --git a/bestla/bestla/bestla_device.h b/bestla/bestla/bestla_device.h
index ebfc7d8de..6b73e5a15 100644
--- a/bestla/bestla/bestla_device.h
+++ b/bestla/bestla/bestla_device.h
@@ -17,12 +17,16 @@
 #include <vector>
 #include "bestla.h"
 #include "xbyak/xbyak_util.h"
+#include "bestla_utils.h"
 #ifdef _WIN32
 #include <windows.h>
 #else
 #include <sched.h>
 #endif
 
+#define FIXED_CACHE_SIZE ((1 << 20) - (32 << 10))
+#define FIXED_CACHE 1
+
 namespace bestla {
 
 namespace device {
@@ -244,6 +248,9 @@ class CpuDevice {
     ADD_FLAG(AVX512_BF16);
     ADD_FLAG(AVX512_FP16);
     numcores = _cpu.getNumCores(Xbyak::util::IntelCpuTopologyLevel::CoreLevel);
+    if (mHasAMX_BF16 || mHasAMX_INT8) {
+      utils::request_perm_xtile_data();
+    }
     static bool p = false;
     {
       uint32_t tmp[4];
@@ -315,6 +322,10 @@ class CpuDevice {
       L2Cache = _cpu.getDataCacheSize(1);
       numthreads = numcores;
     }
+#if FIXED_CACHE
+    L2Cache = L2Cache >= FIXED_CACHE_SIZE ? FIXED_CACHE_SIZE : L2Cache;
+    E_L2Cache = E_L2Cache >= FIXED_CACHE_SIZE ? FIXED_CACHE_SIZE : E_L2Cache;
+#endif
   }
 
   static CpuDevice* getInstance() {
diff --git a/neural_speed/models/model_utils/gguf.h b/neural_speed/models/model_utils/gguf.h
index 71cf1b86a..ffed28e45 100644
--- a/neural_speed/models/model_utils/gguf.h
+++ b/neural_speed/models/model_utils/gguf.h
@@ -135,10 +135,7 @@ enum gguf_type {
 };
 
 static const char* GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
-    [GGUF_TYPE_UINT8] = "u8",    [GGUF_TYPE_INT8] = "i8",   [GGUF_TYPE_UINT16] = "u16",  [GGUF_TYPE_INT16] = "i16",
-    [GGUF_TYPE_UINT32] = "u32",  [GGUF_TYPE_INT32] = "i32", [GGUF_TYPE_FLOAT32] = "f32", [GGUF_TYPE_BOOL] = "bool",
-    [GGUF_TYPE_STRING] = "str",  [GGUF_TYPE_ARRAY] = "arr", [GGUF_TYPE_UINT64] = "u64",  [GGUF_TYPE_INT64] = "i64",
-    [GGUF_TYPE_FLOAT64] = "f64",
+    "u8", "i8", "u16", "i16", "u32", "i32", "f32", "bool", "str", "arr", "u64", "i64", "f64",
 };
 
 union gguf_value {
@@ -201,19 +198,19 @@ struct gguf_context {
 #define GGUF_DEFAULT_ALIGNMENT 32
 
 static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
-    [GGUF_TYPE_UINT8] = sizeof(uint8_t),
-    [GGUF_TYPE_INT8] = sizeof(int8_t),
-    [GGUF_TYPE_UINT16] = sizeof(uint16_t),
-    [GGUF_TYPE_INT16] = sizeof(int16_t),
-    [GGUF_TYPE_UINT32] = sizeof(uint32_t),
-    [GGUF_TYPE_INT32] = sizeof(int32_t),
-    [GGUF_TYPE_FLOAT32] = sizeof(float),
-    [GGUF_TYPE_BOOL] = sizeof(bool),
-    [GGUF_TYPE_STRING] = sizeof(struct gguf_str),
-    [GGUF_TYPE_ARRAY] = 0,  // undefined
-    [GGUF_TYPE_UINT64] = sizeof(uint64_t),
-    [GGUF_TYPE_INT64] = sizeof(int64_t),
-    [GGUF_TYPE_FLOAT64] = sizeof(double),
+    sizeof(uint8_t),
+    sizeof(int8_t),
+    sizeof(uint16_t),
+    sizeof(int16_t),
+    sizeof(uint32_t),
+    sizeof(int32_t),
+    sizeof(float),
+    sizeof(bool),
+    sizeof(struct gguf_str),
+    0,  // undefined
+    sizeof(uint64_t),
+    sizeof(int64_t),
+    sizeof(double),
 };
 static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
 
@@ -296,6 +293,10 @@ inline static void* ggml_aligned_malloc(size_t size) {
     return NULL;
   }
   void* aligned_memory = NULL;
+#ifdef _MSC_VER
+  aligned_memory = _aligned_malloc(size, GGML_MEM_ALIGN);
+  int result = aligned_memory ? 0 : 1;
+#else
 #ifdef GGML_USE_CPU_HBM
   int result = hbw_posix_memalign(&aligned_memory, 16, size);
 #elif GGML_USE_METAL
@@ -303,6 +304,8 @@ inline static void* ggml_aligned_malloc(size_t size) {
 #else
   int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
 #endif
+#endif
+
   if (result != 0) {
     // Handle allocation failure
     const char* error_desc = "unknown allocation error";