diff --git a/Makefile b/Makefile
index 10e2a0904ba60..3ceea61967486 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ BUILD_TARGETS = \
 	llama-imatrix \
 	llama-infill \
 	llama-llava-cli \
-    llama-minicpmv-cli\
+	llama-minicpmv-cli\
 	llama-lookahead \
 	llama-lookup \
 	llama-lookup-create \
@@ -1352,7 +1352,7 @@ llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp examples/llava/clip.h exampl
 	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp  -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
 	$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
-	
+
 llama-export-lora: examples/export-lora/export-lora.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
diff --git a/examples/llava/README-minicpmv2.5.md b/examples/llava/README-minicpmv2.5.md
index 0dfcd5f71c802..4affc1d0f26ff 100644
--- a/examples/llava/README-minicpmv2.5.md
+++ b/examples/llava/README-minicpmv2.5.md
@@ -96,4 +96,4 @@ Now, you can start chatting:
 ```
 $cd /data/data/com.termux/files/home/bin
 $./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?"
-```
\ No newline at end of file
+```
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index c66e140606edd..acb3a3464b582 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -1863,7 +1863,7 @@ static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_imag
 
         std::pair<int, int> best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio);
         LOG_TEE("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second);
-        
+
         auto refine_size = uhd_get_refine_size(original_size, best_grid, scale_resolution, patch_size, true);
         clip_image_u8 * refine_image = clip_image_u8_init();
         bicubic_resize(*img, *refine_image, refine_size.first, refine_size.second);
@@ -2558,6 +2558,6 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
     throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
 }
 
-bool clip_is_minicpmv(const struct clip_ctx * ctx) { 
+bool clip_is_minicpmv(const struct clip_ctx * ctx) {
     return ctx->has_minicpmv_projector;
-}
\ No newline at end of file
+}
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index d64a11b65d2bd..916d9dc401dc4 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -254,7 +254,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
             image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip));
             int patch_size=14;
             load_image_size->width = img_res_v.data[i].nx;
-            load_image_size->height = img_res_v.data[i].ny; 
+            load_image_size->height = img_res_v.data[i].ny;
             clip_add_load_image_size(ctx_clip, load_image_size);
             const bool encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
             if (!encoded) {
@@ -278,7 +278,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
         }
         image_embd_v.clear();
         load_image_size->width = img->nx;
-        load_image_size->height = img->ny; 
+        load_image_size->height = img->ny;
         clip_add_load_image_size(ctx_clip, load_image_size);
         LOG_TEE("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height);
     }
@@ -292,7 +292,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
 
             return false;
         }
-    } 
+    }
     else {
         // spatial_unpad llava-1.6 type embedding
         // TODO: CLIP needs batching support - in HF the llm projection is separate after encoding, which might be a solution to quickly get batching working
diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp
index 6a1ae358dd9ed..f951b57b29158 100644
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@@ -122,7 +122,7 @@ static bool eval_string(struct llama_context * ctx_llama, const char* str, int n
 static void process_eval_image_embed(struct llava_context * ctx_llava, const struct llava_image_embed * embeds, int n_batch, int * n_past, int idx) {
     float * image_embed = (float *)malloc(clip_embd_nbytes(ctx_llava->ctx_clip));
     std::memcpy(image_embed, embeds->embed + idx * clip_n_patches(ctx_llava->ctx_clip) * clip_n_mmproj_embd(ctx_llava->ctx_clip), clip_embd_nbytes(ctx_llava->ctx_clip));
-    
+
     auto slice_embed = (llava_image_embed*)malloc(sizeof(llava_image_embed));
     slice_embed->embed = image_embed;
     slice_embed->n_image_pos = clip_n_patches(ctx_llava->ctx_clip);
@@ -223,7 +223,7 @@ static struct llama_sampling_context * llama_init(struct llava_context * ctx_lla
 }
 
 static const char * llama_loop(struct llava_context * ctx_llava,struct llama_sampling_context * ctx_sampling, int &n_past){
-    
+
     const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
     return tmp;
 }
@@ -272,7 +272,7 @@ int main(int argc, char ** argv) {
                 if (strstr(tmp, "###")) break; // Yi-VL behavior
                 have_tmp = true;
                 printf("%s", tmp);
-                if (strstr(response.c_str(), "<user>")) break; // minicpm-v 
+                if (strstr(response.c_str(), "<user>")) break; // minicpm-v
 
                 fflush(stdout);
             }
@@ -292,18 +292,18 @@ int main(int argc, char ** argv) {
                     if (strcmp(tmp, "</s>") == 0) break;
                     if (strstr(tmp, "###")) break; // Yi-VL behavior
                     printf("%s", tmp);// mistral llava-1.6
-                    if (strstr(response.c_str(), "<user>")) break; // minicpm-v 
+                    if (strstr(response.c_str(), "<user>")) break; // minicpm-v
                     fflush(stdout);
                 }
                 llama_sampling_free(ctx_sampling);
             }
         }
         printf("\n");
-        llama_print_timings(ctx_llava->ctx_llama);        
+        llama_print_timings(ctx_llava->ctx_llama);
 
         ctx_llava->model = NULL;
         llava_free(ctx_llava);
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/llava/requirements.txt b/examples/llava/requirements.txt
index 8e18985aceff9..dfe5fbe62cea6 100644
--- a/examples/llava/requirements.txt
+++ b/examples/llava/requirements.txt
@@ -2,4 +2,4 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
 torch~=2.2.1
-torchvision==0.16.2
\ No newline at end of file
+torchvision==0.17.1