Revert "Revert "set flags to optimize for mmq""

This reverts commit 7959e93.
LostRuins · Jun 26, 2024 · 70000b4 · 70000b4
1 parent 7959e93
commit 70000b4
Showing 1 changed file with 1 addition and 0 deletions.
diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh
@@ -8,6 +8,7 @@
 #include <cstdint>
 
 #define MMQ_DP4A_MAX_BATCH_SIZE 64 // Max. batch size to use for dp4a MMQ kernels when FP16 tensor cores are available.
+#define GGML_CUDA_FORCE_MMQ
 
 typedef void (*load_tiles_mmq_t)(const char * __restrict__ x, int * x_tile, const int & kbx0, const int & i_max, const int & stride);
 typedef void (*vec_dot_mmq_t)(const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k0);