From 743687020dce95a22c4ef62ccfcfaf54d2636024 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 6 Apr 2024 17:29:44 +0800
Subject: [PATCH] fixed img2img

---
 expose.h                           |  2 +
 klite.embd                         | 32 ++++++++++----
 koboldcpp.py                       | 20 ++++++---
 otherarch/sdcpp/sdtype_adapter.cpp | 71 +++++++++++++++++++++++++++---
 4 files changed, 103 insertions(+), 22 deletions(-)

diff --git a/expose.h b/expose.h
index e4047941db356..dc04300998289 100644
--- a/expose.h
+++ b/expose.h
@@ -117,6 +117,8 @@ struct sd_generation_inputs
 {
     const char * prompt;
     const char * negative_prompt;
+    const char * init_images = "";
+    const float denoising_strength;
     const float cfg_scale;
     const int sample_steps;
     const int width;
diff --git a/klite.embd b/klite.embd
index 60761d307669b..b7bbfcebef124 100644
--- a/klite.embd
+++ b/klite.embd
@@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
 Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
-Current version: 126
+Current version: 127
 -Concedo
 -->
 
@@ -197,8 +197,10 @@ Current version: 126
 			margin-top: 6px;
 		}
 
-		#actionmenuitems button {
-			width: 80px;
+		#actionmenuitems button,#actionmenuitems2 button {
+			width: 60px;
+			padding: 4px 4px;
+			font-size: 12px;
 		}
 
 		#messagefield {
@@ -1873,13 +1875,13 @@ Current version: 126
 	{
 		height: calc(98vh - 240px);
 	}
-	@media (max-width: 720px) {
+	@media (max-width: 598px) {
 		.normal_viewport_height
 		{
 			height: calc(98vh - 270px);
 		}
 	}
-	@media (max-width: 406px) {
+	@media (max-width: 342px) {
 		.normal_viewport_height
 		{
 			height: calc(98vh - 300px);
@@ -1903,7 +1905,7 @@ Current version: 126
 	}
 	.aesthetic_viewport_height.withmenu
 	{
-		height: calc(98vh - 206px);
+		height: calc(98vh - 198px);
 	}
 	.aesthetic_viewport_height.withtyping
 	{
@@ -1911,7 +1913,7 @@ Current version: 126
 	}
 	.aesthetic_viewport_height.withmenu.withtyping
 	{
-		height: calc(98vh - 256px);
+		height: calc(98vh - 248px);
 	}
 
 	/**
@@ -4263,7 +4265,7 @@ Current version: 126
 		}
 
 		let ep = a1111_txt2img_endpoint;
-		if(req_payload.source_image!="")
+		if(req_payload.source_image && req_payload.source_image!="")
 		{
 			ep = a1111_img2img_endpoint;
 			a1111_t2i_payload.init_images = [req_payload.source_image];
@@ -7475,6 +7477,8 @@ Current version: 126
 		});
 	}
 
+	var cached_model_list = null;
+	var stale_cached_model_time = performance.now();
 	function fetch_models(onDoneCallback)
 	{
 		if(localflag)
@@ -7482,6 +7486,14 @@ Current version: 126
 			onDoneCallback(selected_models);
 			return;
 		}
+
+		if(cached_model_list!=null && cached_model_list.length>1 && performance.now() < stale_cached_model_time)
+		{
+			console.log("Reuse cached model list");
+			onDoneCallback(cached_model_list);
+			return;
+		}
+
 		//fetch the model list
 		multifetch(models_endpoints,(resArr,errArr)=>{
 			if(resArr && resArr.length>0)
@@ -7500,6 +7512,8 @@ Current version: 126
 					}
 				}
 
+				cached_model_list = mdls;
+				stale_cached_model_time = performance.now() + 30000; //cache model list for 1m
 				onDoneCallback(mdls);
 			}
 			else
@@ -10524,7 +10538,7 @@ Current version: 126
 			"replacement_filter": true,
 			"r2": false
 		}
-		if(base64img!="")
+		if(base64img!=null && base64img!="")
 		{
 			genimg_payload["source_image"] = base64img;
 			genimg_payload["params"]["denoising_strength"] = localsettings.img_img2imgstr;
diff --git a/koboldcpp.py b/koboldcpp.py
index 6b614d943270e..b3907502bebf9 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -108,6 +108,8 @@ class sd_load_model_inputs(ctypes.Structure):
 class sd_generation_inputs(ctypes.Structure):
     _fields_ = [("prompt", ctypes.c_char_p),
                 ("negative_prompt", ctypes.c_char_p),
+                ("init_images", ctypes.c_char_p),
+                ("denoising_strength", ctypes.c_float),
                 ("cfg_scale", ctypes.c_float),
                 ("sample_steps", ctypes.c_int),
                 ("width", ctypes.c_int),
@@ -527,6 +529,9 @@ def sd_generate(genparams):
     global maxctx, args, currentusergenkey, totalgens, pendingabortkey
     prompt = genparams.get("prompt", "high quality")
     negative_prompt = genparams.get("negative_prompt", "")
+    init_images_arr = genparams.get("init_images", [])
+    init_images = ("" if (not init_images_arr or len(init_images_arr)==0 or not init_images_arr[0]) else init_images_arr[0])
+    denoising_strength = genparams.get("denoising_strength", 0.6)
     cfg_scale = genparams.get("cfg_scale", 5)
     sample_steps = genparams.get("steps", 20)
     width = genparams.get("width", 512)
@@ -535,7 +540,6 @@ def sd_generate(genparams):
     sample_method = genparams.get("sampler_name", "k_euler_a")
     is_quiet = True if args.quiet else False
 
-
     #clean vars
     width = width - (width%64)
     height = height - (height%64)
@@ -569,7 +573,9 @@ def sd_generate(genparams):
     inputs = sd_generation_inputs()
     inputs.prompt = prompt.encode("UTF-8")
     inputs.negative_prompt = negative_prompt.encode("UTF-8")
+    inputs.init_images = init_images.encode("UTF-8")
     inputs.cfg_scale = cfg_scale
+    inputs.denoising_strength = denoising_strength
     inputs.sample_steps = sample_steps
     inputs.width = width
     inputs.height = height
@@ -1219,7 +1225,7 @@ def do_POST(self):
             sse_stream_flag = False
 
             api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
-            is_txt2img = False
+            is_imggen = False
 
             if self.path.endswith('/request'):
                 api_format = 1
@@ -1249,14 +1255,14 @@ def do_POST(self):
                     return
                 api_format = 5
 
-            if self.path.endswith('/sdapi/v1/txt2img'):
-                is_txt2img = True
+            if self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
+                is_imggen = True
 
-            if is_txt2img or api_format > 0:
+            if is_imggen or api_format > 0:
                 global last_req_time
                 last_req_time = time.time()
 
-                if not is_txt2img and api_format<5:
+                if not is_imggen and api_format<5:
                     if not self.secure_endpoint():
                         return
 
@@ -1297,7 +1303,7 @@ def do_POST(self):
                         time.sleep(0.2) #short delay
                     return
 
-                elif is_txt2img: #image gen
+                elif is_imggen: #image gen
                     try:
                         gen = sd_generate(genparams)
                         genresp = (json.dumps({"images":[gen],"parameters":{},"info":""}).encode())
diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp
index 948c8e3fa7fb8..b168b86e3b80b 100644
--- a/otherarch/sdcpp/sdtype_adapter.cpp
+++ b/otherarch/sdcpp/sdtype_adapter.cpp
@@ -18,6 +18,8 @@
 #include "model.cpp"
 #include "zip.c"
 
+#include "otherarch/utils.h"
+
 // #include "preprocessing.hpp"
 #include "stable-diffusion.h"
 
@@ -264,6 +266,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
     //sanitize prompts, remove quotes and limit lengths
     std::string cleanprompt = clean_input_prompt(inputs.prompt);
     std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt);
+    std::string img2img_data = std::string(inputs.init_images);
 
     sd_params->prompt = cleanprompt;
     sd_params->negative_prompt = cleannegprompt;
@@ -272,6 +275,13 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
     sd_params->seed = inputs.seed;
     sd_params->width = inputs.width;
     sd_params->height = inputs.height;
+    sd_params->strength = inputs.denoising_strength;
+    sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG);
+
+    //for img2img
+    sd_image_t input_image = {0,0,0,nullptr};
+    std::vector<uint8_t> image_buffer;
+    int nx, ny, nc;
 
     if(!is_quiet)
     {
@@ -314,9 +324,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
 
     if (sd_params->mode == TXT2IMG) {
 
-         if(!is_quiet && sddebugmode==1)
+        if(!is_quiet && sddebugmode==1)
         {
-            printf("\nPROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n",
+            printf("\nTXT2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n",
             sd_params->prompt.c_str(),
             sd_params->negative_prompt.c_str(),
             sd_params->clip_skip,
@@ -344,10 +354,59 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
                           control_image,
                           sd_params->control_strength);
     } else {
-        sd_image_t input_image = {(uint32_t)sd_params->width,
-                                  (uint32_t)sd_params->height,
-                                  3,
-                                  input_image_buffer};
+
+        if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
+            printf("\nKCPP SD: bad request image dimensions!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+
+        image_buffer = kcpp_base64_decode(img2img_data);
+
+        if(input_image_buffer!=nullptr) //just in time free old buffer
+        {
+             stbi_image_free(input_image_buffer);
+             input_image_buffer = nullptr;
+        }
+
+        input_image_buffer = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &nx, &ny, &nc, 3);
+
+        if (nx <= 0 || nx % 64 != 0 || ny <= 0 || ny % 64 != 0 || nc!= 3) {
+            printf("\nKCPP SD: bad input image dimensions!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+        if (!input_image_buffer) {
+            printf("\nKCPP SD: load image from memory failed!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+
+        input_image.width = nx;
+        input_image.height = ny;
+        input_image.channel = nc;
+        input_image.data = input_image_buffer;
+
+        if(!is_quiet && sddebugmode==1)
+        {
+            printf("\nIMG2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nSTR:%f\n\n",
+            sd_params->prompt.c_str(),
+            sd_params->negative_prompt.c_str(),
+            sd_params->clip_skip,
+            sd_params->cfg_scale,
+            sd_params->width,
+            sd_params->height,
+            sd_params->sample_method,
+            sd_params->sample_steps,
+            sd_params->seed,
+            sd_params->batch_count,
+            control_image,
+            sd_params->strength);
+        }
+
         results = img2img(sd_ctx,
                             input_image,
                             sd_params->prompt.c_str(),