From 743687020dce95a22c4ef62ccfcfaf54d2636024 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:29:44 +0800 Subject: [PATCH] fixed img2img --- expose.h | 2 + klite.embd | 32 ++++++++++---- koboldcpp.py | 20 ++++++--- otherarch/sdcpp/sdtype_adapter.cpp | 71 +++++++++++++++++++++++++++--- 4 files changed, 103 insertions(+), 22 deletions(-) diff --git a/expose.h b/expose.h index e4047941db356..dc04300998289 100644 --- a/expose.h +++ b/expose.h @@ -117,6 +117,8 @@ struct sd_generation_inputs { const char * prompt; const char * negative_prompt; + const char * init_images = ""; + const float denoising_strength; const float cfg_scale; const int sample_steps; const int width; diff --git a/klite.embd b/klite.embd index 60761d307669b..b7bbfcebef124 100644 --- a/klite.embd +++ b/klite.embd @@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 126 +Current version: 127 -Concedo --> @@ -197,8 +197,10 @@ Current version: 126 margin-top: 6px; } - #actionmenuitems button { - width: 80px; + #actionmenuitems button,#actionmenuitems2 button { + width: 60px; + padding: 4px 4px; + font-size: 12px; } #messagefield { @@ -1873,13 +1875,13 @@ Current version: 126 { height: calc(98vh - 240px); } - @media (max-width: 720px) { + @media (max-width: 598px) { .normal_viewport_height { height: calc(98vh - 270px); } } - @media (max-width: 406px) { + @media (max-width: 342px) { .normal_viewport_height { height: calc(98vh - 300px); @@ -1903,7 +1905,7 @@ Current version: 126 } .aesthetic_viewport_height.withmenu { - height: calc(98vh - 206px); + height: calc(98vh - 198px); } .aesthetic_viewport_height.withtyping { @@ -1911,7 +1913,7 @@ Current version: 126 } .aesthetic_viewport_height.withmenu.withtyping { - height: calc(98vh - 256px); + height: calc(98vh - 248px); } /** @@ -4263,7 +4265,7 @@ Current version: 126 } let ep = a1111_txt2img_endpoint; - if(req_payload.source_image!="") + if(req_payload.source_image && req_payload.source_image!="") { ep = a1111_img2img_endpoint; a1111_t2i_payload.init_images = [req_payload.source_image]; @@ -7475,6 +7477,8 @@ Current version: 126 }); } + var cached_model_list = null; + var stale_cached_model_time = performance.now(); function fetch_models(onDoneCallback) { if(localflag) @@ -7482,6 +7486,14 @@ Current version: 126 onDoneCallback(selected_models); return; } + + if(cached_model_list!=null && cached_model_list.length>1 && performance.now() < stale_cached_model_time) + { + console.log("Reuse cached model list"); + onDoneCallback(cached_model_list); + return; + } + //fetch the model list multifetch(models_endpoints,(resArr,errArr)=>{ if(resArr && resArr.length>0) @@ -7500,6 +7512,8 @@ Current version: 126 } } + cached_model_list = mdls; + stale_cached_model_time = performance.now() + 30000; //cache model list for 1m onDoneCallback(mdls); } else @@ -10524,7 +10538,7 @@ Current version: 126 "replacement_filter": true, "r2": false } - if(base64img!="") + if(base64img!=null && base64img!="") { genimg_payload["source_image"] = base64img; genimg_payload["params"]["denoising_strength"] = localsettings.img_img2imgstr; diff --git a/koboldcpp.py b/koboldcpp.py index 6b614d943270e..b3907502bebf9 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -108,6 +108,8 @@ class sd_load_model_inputs(ctypes.Structure): class sd_generation_inputs(ctypes.Structure): _fields_ = [("prompt", ctypes.c_char_p), ("negative_prompt", ctypes.c_char_p), + ("init_images", ctypes.c_char_p), + ("denoising_strength", ctypes.c_float), ("cfg_scale", ctypes.c_float), ("sample_steps", ctypes.c_int), ("width", ctypes.c_int), @@ -527,6 +529,9 @@ def sd_generate(genparams): global maxctx, args, currentusergenkey, totalgens, pendingabortkey prompt = genparams.get("prompt", "high quality") negative_prompt = genparams.get("negative_prompt", "") + init_images_arr = genparams.get("init_images", []) + init_images = ("" if (not init_images_arr or len(init_images_arr)==0 or not init_images_arr[0]) else init_images_arr[0]) + denoising_strength = genparams.get("denoising_strength", 0.6) cfg_scale = genparams.get("cfg_scale", 5) sample_steps = genparams.get("steps", 20) width = genparams.get("width", 512) @@ -535,7 +540,6 @@ def sd_generate(genparams): sample_method = genparams.get("sampler_name", "k_euler_a") is_quiet = True if args.quiet else False - #clean vars width = width - (width%64) height = height - (height%64) @@ -569,7 +573,9 @@ def sd_generate(genparams): inputs = sd_generation_inputs() inputs.prompt = prompt.encode("UTF-8") inputs.negative_prompt = negative_prompt.encode("UTF-8") + inputs.init_images = init_images.encode("UTF-8") inputs.cfg_scale = cfg_scale + inputs.denoising_strength = denoising_strength inputs.sample_steps = sample_steps inputs.width = width inputs.height = height @@ -1219,7 +1225,7 @@ def do_POST(self): sse_stream_flag = False api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate - is_txt2img = False + is_imggen = False if self.path.endswith('/request'): api_format = 1 @@ -1249,14 +1255,14 @@ def do_POST(self): return api_format = 5 - if self.path.endswith('/sdapi/v1/txt2img'): - is_txt2img = True + if self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'): + is_imggen = True - if is_txt2img or api_format > 0: + if is_imggen or api_format > 0: global last_req_time last_req_time = time.time() - if not is_txt2img and api_format<5: + if not is_imggen and api_format<5: if not self.secure_endpoint(): return @@ -1297,7 +1303,7 @@ def do_POST(self): time.sleep(0.2) #short delay return - elif is_txt2img: #image gen + elif is_imggen: #image gen try: gen = sd_generate(genparams) genresp = (json.dumps({"images":[gen],"parameters":{},"info":""}).encode()) diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 948c8e3fa7fb8..b168b86e3b80b 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -18,6 +18,8 @@ #include "model.cpp" #include "zip.c" +#include "otherarch/utils.h" + // #include "preprocessing.hpp" #include "stable-diffusion.h" @@ -264,6 +266,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) //sanitize prompts, remove quotes and limit lengths std::string cleanprompt = clean_input_prompt(inputs.prompt); std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt); + std::string img2img_data = std::string(inputs.init_images); sd_params->prompt = cleanprompt; sd_params->negative_prompt = cleannegprompt; @@ -272,6 +275,13 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) sd_params->seed = inputs.seed; sd_params->width = inputs.width; sd_params->height = inputs.height; + sd_params->strength = inputs.denoising_strength; + sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG); + + //for img2img + sd_image_t input_image = {0,0,0,nullptr}; + std::vector image_buffer; + int nx, ny, nc; if(!is_quiet) { @@ -314,9 +324,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) if (sd_params->mode == TXT2IMG) { - if(!is_quiet && sddebugmode==1) + if(!is_quiet && sddebugmode==1) { - printf("\nPROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n", + printf("\nTXT2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n", sd_params->prompt.c_str(), sd_params->negative_prompt.c_str(), sd_params->clip_skip, @@ -344,10 +354,59 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) control_image, sd_params->control_strength); } else { - sd_image_t input_image = {(uint32_t)sd_params->width, - (uint32_t)sd_params->height, - 3, - input_image_buffer}; + + if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) { + printf("\nKCPP SD: bad request image dimensions!\n"); + output.data = ""; + output.status = 0; + return output; + } + + image_buffer = kcpp_base64_decode(img2img_data); + + if(input_image_buffer!=nullptr) //just in time free old buffer + { + stbi_image_free(input_image_buffer); + input_image_buffer = nullptr; + } + + input_image_buffer = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &nx, &ny, &nc, 3); + + if (nx <= 0 || nx % 64 != 0 || ny <= 0 || ny % 64 != 0 || nc!= 3) { + printf("\nKCPP SD: bad input image dimensions!\n"); + output.data = ""; + output.status = 0; + return output; + } + if (!input_image_buffer) { + printf("\nKCPP SD: load image from memory failed!\n"); + output.data = ""; + output.status = 0; + return output; + } + + input_image.width = nx; + input_image.height = ny; + input_image.channel = nc; + input_image.data = input_image_buffer; + + if(!is_quiet && sddebugmode==1) + { + printf("\nIMG2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nSTR:%f\n\n", + sd_params->prompt.c_str(), + sd_params->negative_prompt.c_str(), + sd_params->clip_skip, + sd_params->cfg_scale, + sd_params->width, + sd_params->height, + sd_params->sample_method, + sd_params->sample_steps, + sd_params->seed, + sd_params->batch_count, + control_image, + sd_params->strength); + } + results = img2img(sd_ctx, input_image, sd_params->prompt.c_str(),