From b7368332e24c5b2c8038bf8267f43632783fcc35 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 27 Apr 2024 17:50:48 +0200 Subject: [PATCH 01/35] ci: server: tests python env on github container ubuntu latest / fix n_predict (#6935) * ci: server: fix python env * ci: server: fix server tests after #6638 * ci: server: fix windows is not building PR branch --- .github/workflows/server.yml | 31 ++++++++++++++----------------- examples/server/server.cpp | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index a044d6044ce86..79cd7d643d20e 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -41,24 +41,16 @@ jobs: sanitizer: "" fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken - container: - image: ubuntu:latest - ports: - - 8888 - options: --cpus 4 - steps: - name: Dependencies id: depends run: | - apt-get update - apt-get -y install \ + sudo apt-get update + sudo apt-get -y install \ build-essential \ xxd \ git \ cmake \ - python3-pip \ - python3-venv \ curl \ wget \ language-pack-en \ @@ -71,6 +63,17 @@ jobs: fetch-depth: 0 ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + - name: Verify server deps id: verify_server_deps run: | @@ -101,13 +104,6 @@ jobs: -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server - - name: Setup python env - id: pipenv - run: | - cd examples/server/tests - python3 -m venv venv - . venv/bin/activate - pip install -r requirements.txt - name: Tests id: server_integration_tests @@ -133,6 +129,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - name: libCURL id: get_libcurl diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6f8ba3fc65d9f..2760aea8fd3e9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1208,7 +1208,7 @@ struct server_context { } auto n_ctx_train = llama_n_ctx_train(model); - if (slot.params.n_predict < 1 && slot.ga_n == 1 + if (slot.params.n_predict < 1 && slot.n_predict < 1 && slot.ga_n == 1 && slot.n_prompt_tokens + slot.n_decoded >= n_ctx_train) { LOG_WARNING("n_predict is not set and self-context extend is disabled." " Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", { From 4dba7e8114d84241c842b986e008af8b88d1a019 Mon Sep 17 00:00:00 2001 From: mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:02:06 +0200 Subject: [PATCH 02/35] Replace "alternative" boolean operator in conditional compilation directive (#6949) --- common/log.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/log.h b/common/log.h index e4edcac7d7735..2b2f0e4551d7a 100644 --- a/common/log.h +++ b/common/log.h @@ -234,7 +234,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std:: // INTERNAL, DO NOT USE // USE LOG() INSTEAD // -#if !defined(_MSC_VER) or defined(__INTEL_LLVM_COMPILER) +#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) #define LOG_IMPL(str, ...) \ do { \ if (LOG_TARGET != nullptr) \ @@ -257,7 +257,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std:: // INTERNAL, DO NOT USE // USE LOG_TEE() INSTEAD // -#if !defined(_MSC_VER) or defined(__INTEL_LLVM_COMPILER) +#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) #define LOG_TEE_IMPL(str, ...) \ do { \ if (LOG_TARGET != nullptr) \ From 6c3fd5b685427df0d4272aae32d4daf5bb743b55 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:01:13 +0800 Subject: [PATCH 03/35] updated lite (+2 squashed commit) Squashed commit: [d10a731e] update lite [2554b8e6] update docs --- kcpp_docs.embd | 7 ++ klite.embd | 179 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 153 insertions(+), 33 deletions(-) diff --git a/kcpp_docs.embd b/kcpp_docs.embd index 08a5b85f04bc5..83a6708d676c0 100644 --- a/kcpp_docs.embd +++ b/kcpp_docs.embd @@ -203,6 +203,13 @@ "description": "KoboldCpp ONLY. If true, prints special tokens as text for GGUF models", "type": "boolean" }, + "banned_tokens": { + "description": "An array of string sequences to remove from model vocab. All matching tokens with matching substrings are removed.", + "items": { + "type": "string" + }, + "type": "array" + }, "logit_bias": { "default": {}, "description": "KoboldCpp ONLY. An dictionary of key-value pairs, which indicate the token IDs (int) and logit bias (float) to apply for that token. Up to 16 value can be provided.", diff --git a/klite.embd b/klite.embd index 3f33826efb323..6edb9e3ad1c17 100644 --- a/klite.embd +++ b/klite.embd @@ -3651,6 +3651,7 @@ Current version: 136 var personal_notes = ""; var logitbiasdict = {}; var regexreplace_data = []; + var placeholder_tags_data = []; const num_regex_rows = 4; var localsettings = { @@ -5103,6 +5104,7 @@ Current version: 136 new_save_storyobj.personal_notes = personal_notes; new_save_storyobj.logitbiasdict = JSON.parse(JSON.stringify(logitbiasdict)); new_save_storyobj.regexreplace_data = JSON.parse(JSON.stringify(regexreplace_data)); + new_save_storyobj.placeholder_tags_data = JSON.parse(JSON.stringify(placeholder_tags_data)); if (export_settings) { new_save_storyobj.savedsettings = JSON.parse(JSON.stringify(localsettings)); @@ -5276,6 +5278,7 @@ Current version: 136 let old_tokenbans = tokenbans; let old_notes = personal_notes; let old_regexreplace_data = regexreplace_data; + let old_placeholder_tags_data = placeholder_tags_data; //determine if oldui file or newui file format restart_new_game(false); @@ -5363,6 +5366,10 @@ Current version: 136 if (storyobj.regexreplace_data) { regexreplace_data = storyobj.regexreplace_data; } + if(storyobj.placeholder_tags_data) + { + placeholder_tags_data = storyobj.placeholder_tags_data; + } } else { //v2 load if(storyobj.prompt!="") @@ -5424,6 +5431,7 @@ Current version: 136 extrastopseq = old_extrastopseq; regexreplace_data = old_regexreplace_data; tokenbans = old_tokenbans; + placeholder_tags_data = old_placeholder_tags_data; } if (storyobj.savedsettings && storyobj.savedsettings != "") @@ -6756,7 +6764,7 @@ Current version: 136 function expand_tokens_section(targetid) { - let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias"]; + let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias","expandplaceholdertags"]; for(let i=0;i + Placeholder ?The placeholder to match against + Replacement ?The text to substitude on display. Actual context is unchanged. + `; + let regextable = document.getElementById("placeholder_replace_table"); + + let hardcoded1 = ["{{user}}","{{char}}","\\n{{[INPUT]}}\\n","\\n{{[OUTPUT]}}\\n"]; + let hardcoded2 = [localsettings.chatname,localsettings.chatopponent,localsettings.instruct_starttag,localsettings.instruct_endtag]; + + for(let i=0;i + ${hardcoded1[i]} + + + `; + } + + for(let i=0;i + + + + `; + } + + regextable.innerHTML = regextablehtml; + + document.getElementById("placeholder_tags2").checked = localsettings.placeholder_tags; + + for(let i=0;i" + match.substring(0,match.length-2).trim() + "

"; }); - // input = input.replaceAll(othernamesregex2, function(match) { - // return "{{botplaceholder}}

" + match.substring(0,match.length-2).trim() + "

"; - // }); } else { input = input.replaceAll(othernamesregex, "{{botplaceholder}}"); - //input = input.replaceAll(othernamesregex2, "{{botplaceholder}}"); } you = "{{userplaceholder}}"; @@ -15193,7 +15293,8 @@ Current version: 136 - + Language +
@@ -15450,8 +15551,8 @@ Current version: 136
Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0). Leave blank to disable.
Input is a JSON object, reference here.
- - + +
@@ -15478,14 +15579,26 @@ Current version: 136 - +
From f6ab0f873fcf05efff133ec698f55e32a608dcba Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 28 Apr 2024 11:42:26 +0800 Subject: [PATCH 04/35] gui benchmark --- koboldcpp.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index 7557a3fd8ddd1..3f82bd3c42480 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -658,6 +658,7 @@ def string_contains_sequence_substring(inputstr,sequences): last_req_time = time.time() last_non_horde_req_time = time.time() currfinishreason = "null" +using_gui_launcher = False def transform_genparams(genparams, api_format): #alias all nonstandard alternative names for rep pen. @@ -1499,6 +1500,9 @@ def show_new_gui(): from tkinter.filedialog import askopenfilename from tkinter.filedialog import asksaveasfile + global using_gui_launcher + using_gui_launcher = True + # if args received, launch if len(sys.argv) != 1: import tkinter as tk @@ -1946,6 +1950,10 @@ def togglectxshift(a,b,c): else: smartcontextbox.grid_forget() + def guibench(): + args.benchmark = "stdout" + launchbrowser.set(0) + guilaunch() def changerunmode(a,b,c): global runmode_untouched @@ -2086,6 +2094,8 @@ def changerunmode(a,b,c): makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 16, set=5,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.") # force version makelabelentry(hardware_tab, "Force Version:" , version_var, 100, 50,"If the autodetected version is wrong, you can change it here.\nLeave as 0 for default.") + ctk.CTkButton(hardware_tab , text = "Run Benchmark", command = guibench ).grid(row=110,column=0, stick="se", padx= 0, pady=2) + runopts_var.trace('w', changerunmode) changerunmode(1,1,1) @@ -2174,7 +2184,6 @@ def togglehorde(a,b,c): makelabelentry(images_tab, "Image threads:" , sd_threads_var, 6, 50,"How many threads to use during image generation.\nIf left blank, uses same value as threads.") makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.") - # launch def guilaunch(): if model_var.get() == "" and sd_model_var.get() == "": @@ -3218,6 +3227,11 @@ def onready_subprocess(): else: # Flush stdout for previous win32 issue so the client can see output. print(f"Server was not started, main function complete. Idling.", flush=True) + global using_gui_launcher + if using_gui_launcher: + print("===") + print("Press a key to exit", flush=True) + input() def run_in_queue(launch_args, input_queue, output_queue): main(launch_args, start_server=False) From 8faa6ed84d94b8bb2f92120d37bed7aea3e2a0b7 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 28 Apr 2024 15:16:02 +0800 Subject: [PATCH 05/35] clarify some descriptions --- klite.embd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/klite.embd b/klite.embd index 6edb9e3ad1c17..bb175f13e3550 100644 --- a/klite.embd +++ b/klite.embd @@ -13386,7 +13386,7 @@ Current version: 136 `; let regextable = document.getElementById("placeholder_replace_table"); - let hardcoded1 = ["{{user}}","{{char}}","\\n{{[INPUT]}}\\n","\\n{{[OUTPUT]}}\\n"]; + let hardcoded1 = ["{{user}}","{{char}}","{{[INPUT]}}","{{[OUTPUT]}}"]; let hardcoded2 = [localsettings.chatname,localsettings.chatopponent,localsettings.instruct_starttag,localsettings.instruct_endtag]; for(let i=0;i
-
Token Bans ?Token Filter ?Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||