Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Remove options from bestla& fix error of MSVC (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
luoyu-intel authored Jan 12, 2024
1 parent 6dbaa02 commit ffd5ade
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 25 deletions.
8 changes: 0 additions & 8 deletions bestla/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,6 @@ install(

if(WIN32)
target_compile_definitions(${PROJECT_NAME} INTERFACE _CRT_SECURE_NO_WARNINGS NOMINMAX)
target_compile_options(${PROJECT_NAME} INTERFACE /wd4068 /wd4849 /wd6262 /wd4702 /wd4100)
#4068 ignore unroll and GCC flags
#4849 ignore collapse
#6262 ignore stack too large
#4702 unreachable code(false warning on constexpr condition)
#4100 unreferenced formal parameter

target_link_options(${PROJECT_NAME} INTERFACE /STACK:5242880) #Stack requires up to L2 cache size
endif(WIN32)


Expand Down
11 changes: 11 additions & 0 deletions bestla/bestla/bestla_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@
#include <vector>
#include "bestla.h"
#include "xbyak/xbyak_util.h"
#include "bestla_utils.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sched.h>
#endif

#define FIXED_CACHE_SIZE ((1 << 20) - (32 << 10))
#define FIXED_CACHE 1

namespace bestla {

namespace device {
Expand Down Expand Up @@ -244,6 +248,9 @@ class CpuDevice {
ADD_FLAG(AVX512_BF16);
ADD_FLAG(AVX512_FP16);
numcores = _cpu.getNumCores(Xbyak::util::IntelCpuTopologyLevel::CoreLevel);
if (mHasAMX_BF16 || mHasAMX_INT8) {
utils::request_perm_xtile_data();
}
static bool p = false;
{
uint32_t tmp[4];
Expand Down Expand Up @@ -315,6 +322,10 @@ class CpuDevice {
L2Cache = _cpu.getDataCacheSize(1);
numthreads = numcores;
}
#if FIXED_CACHE
L2Cache = L2Cache >= FIXED_CACHE_SIZE ? FIXED_CACHE_SIZE : L2Cache;
E_L2Cache = E_L2Cache >= FIXED_CACHE_SIZE ? FIXED_CACHE_SIZE : E_L2Cache;
#endif
}

static CpuDevice* getInstance() {
Expand Down
37 changes: 20 additions & 17 deletions neural_speed/models/model_utils/gguf.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,7 @@ enum gguf_type {
};

static const char* GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8", [GGUF_TYPE_INT8] = "i8", [GGUF_TYPE_UINT16] = "u16", [GGUF_TYPE_INT16] = "i16",
[GGUF_TYPE_UINT32] = "u32", [GGUF_TYPE_INT32] = "i32", [GGUF_TYPE_FLOAT32] = "f32", [GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "str", [GGUF_TYPE_ARRAY] = "arr", [GGUF_TYPE_UINT64] = "u64", [GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64",
"u8", "i8", "u16", "i16", "u32", "i32", "f32", "bool", "str", "arr", "u64", "i64", "f64",
};

union gguf_value {
Expand Down Expand Up @@ -201,19 +198,19 @@ struct gguf_context {
#define GGUF_DEFAULT_ALIGNMENT 32

static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = sizeof(uint8_t),
[GGUF_TYPE_INT8] = sizeof(int8_t),
[GGUF_TYPE_UINT16] = sizeof(uint16_t),
[GGUF_TYPE_INT16] = sizeof(int16_t),
[GGUF_TYPE_UINT32] = sizeof(uint32_t),
[GGUF_TYPE_INT32] = sizeof(int32_t),
[GGUF_TYPE_FLOAT32] = sizeof(float),
[GGUF_TYPE_BOOL] = sizeof(bool),
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
[GGUF_TYPE_ARRAY] = 0, // undefined
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
[GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double),
sizeof(uint8_t),
sizeof(int8_t),
sizeof(uint16_t),
sizeof(int16_t),
sizeof(uint32_t),
sizeof(int32_t),
sizeof(float),
sizeof(bool),
sizeof(struct gguf_str),
0, // undefined
sizeof(uint64_t),
sizeof(int64_t),
sizeof(double),
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");

Expand Down Expand Up @@ -296,13 +293,19 @@ inline static void* ggml_aligned_malloc(size_t size) {
return NULL;
}
void* aligned_memory = NULL;
#ifdef _MSC_VER
aligned_memory = _aligned_malloc(size, GGML_MEM_ALIGN);
int result = aligned_memory ? 0 : 1;
#else
#ifdef GGML_USE_CPU_HBM
int result = hbw_posix_memalign(&aligned_memory, 16, size);
#elif GGML_USE_METAL
int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
#else
int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
#endif
#endif

if (result != 0) {
// Handle allocation failure
const char* error_desc = "unknown allocation error";
Expand Down

0 comments on commit ffd5ade

Please sign in to comment.