Unlocked

- Unlock CTX - Stt fragmentation cache to zero - Improve Auto-Rope - bench max ctx 200000 - Add an install.bat batch to install the requirements
Nexesenex · Feb 24, 2024 · 87e6975 · 87e6975
1 parent 53dbb2b
commit 87e6975
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 12 deletions.
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -97,7 +97,7 @@ static std::string concat_output = "";
 static std::string concat_output_reader_copy = "";
 static std::vector<logit_bias> logit_biases;
 
-const int extra_context_handle_fragmentation = 80;
+const int extra_context_handle_fragmentation = 0;
 
 inline bool IsNanCheck(float f)
 {
@@ -758,9 +758,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
                 float factor = file_format_meta.n_ctx_train/2048;
                 effectivenctx = effectivenctx/factor;
             }
-            rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
-            (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
-
+			rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 2176 ? 10000.0f : (effectivenctx <= 2304 ? 11000.0f : (effectivenctx <= 2432 ? 12000.0f : (effectivenctx <= 2560 ? 13000.0f : (effectivenctx <= 2688 ? 14000.0f : (effectivenctx <= 2816 ? 15000.0f : (effectivenctx <= 2944 ? 16000.0f : (effectivenctx <= 3072 ? 17000.0f : (effectivenctx <= 3200 ? 18000.0f : (effectivenctx <= 3328 ? 19000.0f : (effectivenctx <= 3456 ? 20000.0f : (effectivenctx <= 3584 ? 21000.0f : (effectivenctx <= 3712 ? 22000.0f : (effectivenctx <= 3840 ? 23000.0f : (effectivenctx <= 3968 ? 24000.0f : (effectivenctx <= 4096 ? 25000.0f : (effectivenctx <= 4224 ? 26000.0f : (effectivenctx <= 4352 ? 27000.0f : (effectivenctx <= 4480 ? 28500.0f : (effectivenctx <= 4608 ? 30000.0f : (effectivenctx <= 4736 ? 31500.0f : (effectivenctx <= 4864 ? 33000.0f : (effectivenctx <= 4992 ? 34500.0f : (effectivenctx <= 5120 ? 36000.0f : (effectivenctx <= 5248 ? 38000.0f : (effectivenctx <= 5376 ? 40000.0f : (effectivenctx <= 5504 ? 42000.0f : (effectivenctx <= 5632 ? 44000.0f : (effectivenctx <= 5760 ? 46000.0f : (effectivenctx <= 5888 ? 48000.0f : (effectivenctx <= 6016 ? 51000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 6288 ? 57000.0f : (effectivenctx <= 6400 ? 61000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))))))))))))))))))))))))))))))))))));
         }
 
         printf("Using automatic RoPE scaling. If the model has customized RoPE settings, they will be used directly instead!\n");

diff --git a/install.bat b/install.bat
@@ -0,0 +1,2 @@
+cd /d "%~dp0"
+python -m pip install -r requirements.txt
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -1157,7 +1157,7 @@ def show_new_gui():
 
     import customtkinter as ctk
     nextstate = 0 #0=exit, 1=launch
-    windowwidth = 540
+    windowwidth = 920
     windowheight = 500
     ctk.set_appearance_mode("dark")
     root = ctk.CTk()
@@ -1202,7 +1202,7 @@ def show_new_gui():
     # slider data
     blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
     blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
-    contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536"]
+    contextsize_text = ["128" ,"256" ,"384" ,"512" ,"640" ,"768" ,"896" ,"1024" ,"1152" ,"1280" ,"1408" ,"1536" ,"1664" ,"1792" ,"1920" ,"2048" ,"2176" ,"2304" ,"2432" ,"2560" ,"2688" ,"2816" ,"2944" ,"3072" ,"3200" ,"3328" ,"3456" ,"3584" ,"3712" ,"3840" ,"3968" ,"4096" ,"4224" ,"4352" ,"4480" ,"4608" ,"4736" ,"4864" ,"4992" ,"5120" ,"5248" ,"5376" ,"5504" ,"5632" ,"5760" ,"5888" ,"6016" ,"6144" ,"6272" ,"6400" ,"6528" ,"6656" ,"6784" ,"6912" ,"7040" ,"7168" ,"7296" ,"7424" ,"7552" ,"7680" ,"7808" ,"7936" ,"8064" ,"8192" ,"8320" ,"8448" ,"8576" ,"8704" ,"8832" ,"8960" ,"9088" ,"9216" ,"9344" ,"9472" ,"9600" ,"9728" ,"9856" ,"9984" ,"10112" ,"10240" ,"10368" ,"10496" ,"10624" ,"10752" ,"10880" ,"11008" ,"11136" ,"11264" ,"11392" ,"11520" ,"11648" ,"11776" ,"11904" ,"12032" ,"12160" ,"12288" ,"12416" ,"12544" ,"12672" ,"12800" ,"12928" ,"13056" ,"13184" ,"13312" ,"13440" ,"13568" ,"13696" ,"13824" ,"13952" ,"14080" ,"14208" ,"14336" ,"14464" ,"14592" ,"14720" ,"14848" ,"14976" ,"15104" ,"15232" ,"15360" ,"15488" ,"15616" ,"15744" ,"15872" ,"16000" ,"16128" ,"16256" ,"16384" ,"16512" ,"16640" ,"16768" ,"16896" ,"17024" ,"17152" ,"17280" ,"17408" ,"17536" ,"17664" ,"17792" ,"17920" ,"18048" ,"18176" ,"18304" ,"18432" ,"18560" ,"18688" ,"18816" ,"18944" ,"19072" ,"19200" ,"19328" ,"19456" ,"19584" ,"19712" ,"19840" ,"19968" ,"20096" ,"20224" ,"20352" ,"20480" ,"20608" ,"20736" ,"20864" ,"20992" ,"21120" ,"21248" ,"21376" ,"21504" ,"21632" ,"21760" ,"21888" ,"22016" ,"22144" ,"22272" ,"22400" ,"22528" ,"22656" ,"22784" ,"22912" ,"23040" ,"23168" ,"23296" ,"23424" ,"23552" ,"23680" ,"23808" ,"23936" ,"24064" ,"24192" ,"24320" ,"24448" ,"24576" ,"24704" ,"24832" ,"24960" ,"25088" ,"25216" ,"25344" ,"25472" ,"25600" ,"25728" ,"25856" ,"25984" ,"26112" ,"26240" ,"26368" ,"26496" ,"26624" ,"26752" ,"26880" ,"27008" ,"27136" ,"27264" ,"27392" ,"27520" ,"27648" ,"27776" ,"27904" ,"28032" ,"28160" ,"28288" ,"28416" ,"28544" ,"28672" ,"28800" ,"28928" ,"29056" ,"29184" ,"29312" ,"29440" ,"29568" ,"29696" ,"29824" ,"29952" ,"30080" ,"30208" ,"30336" ,"30464" ,"30592" ,"30720" ,"30848" ,"30976" ,"31104" ,"31232" ,"31360" ,"31488" ,"31616" ,"31744" ,"31872" ,"32000" ,"32128" ,"32256" ,"32384" ,"32512" ,"32640" ,"32768" ,"32896" ,"33024" ,"33152" , "33280" ,"33408" ,"33536" ,"33664" ,"33792" ,"33920" ,"34048" ,"34176" ,"34304" ,"34432" ,"34560" ,"34688" ,"34816" ,"34944" ,"35072" ,"35200" ,"35328" ,"35456" ,"35584" ,"35712" ,"35840" ,"35968" ,"36096" ,"36224" ,"36352" ,"36480" ,"36608" ,"36736" ,"36864" ,"36992" ,"37120" ,"37248" ,"37376" ,"37504" ,"37632" ,"37760" ,"37888" ,"38016" ,"38144" ,"38272" ,"38400" ,"38528" ,"38656" ,"38784" ,"38912" ,"39040" ,"39168" ,"39296" ,"39424" ,"39552" ,"39680" ,"39808" ,"39936" ,"40064" ,"40192" ,"40320" ,"40448" ,"40576" ,"40704" ,"40832" ,"40960" ,"41088" ,"41216" ,"41344" ,"41472" ,"41600" ,"41728" ,"41856" ,"41984" ,"42112" ,"42240" ,"42368" ,"42496" ,"42624" ,"42752" ,"42880" ,"43008" ,"43136" ,"43264" ,"43392" ,"43520" ,"43648" ,"43776" ,"43904" ,"44032" ,"44160" ,"44288" ,"44416" ,"44544" ,"44672" ,"44800" ,"44928" ,"45056" ,"45184" ,"45312" ,"45440" ,"45568" ,"45696" ,"45824" ,"45952" ,"46080" ,"46208" ,"46336" ,"46464" ,"46592" ,"46720" ,"46848" ,"46976" ,"47104" ,"47232" ,"47360" ,"47488" ,"47616" ,"47744" ,"47872" ,"48000" ,"48128" ,"48256" ,"48384" ,"48512" ,"48640" ,"48768" ,"48896" ,"49024" ,"49152" ,"49280" ,"49408" ,"49536" ,"49664" ,"49792" ,"49920" ,"50048" ,"50176" ,"50304" ,"50432" ,"50560" ,"50688" ,"50816" ,"50944" ,"51072" ,"51200" ,"51328" ,"51456" ,"51584" ,"51712" ,"51840" ,"51968" ,"52096" ,"52224" ,"52352" ,"52480" ,"52608" ,"52736" ,"52864" ,"52992" ,"53120" ,"53248" ,"53376" ,"53504" ,"53632" ,"53760" ,"53888" ,"54016" ,"54144" ,"54272" ,"54400" ,"54528" ,"54656" ,"54784" ,"54912" ,"55040" ,"55168" ,"55296" ,"55424" ,"55552" ,"55680" ,"55808" ,"55936" ,"56064" ,"56192" ,"56320" ,"56448" ,"56576" ,"56704" ,"56832" ,"56960" ,"57088" ,"57216" ,"57344" ,"57472" ,"57600" ,"57728" ,"57856" ,"57984" ,"58112" ,"58240" ,"58368" ,"58496" ,"58624" ,"58752" ,"58880" ,"59008" ,"59136" ,"59264" ,"59392" ,"59520" ,"59648" ,"59776" ,"59904" ,"60032" ,"60160" ,"60288" ,"60416" ,"60544" ,"60672" ,"60800" ,"60928" ,"61056" ,"61184" ,"61312" ,"61440" ,"61568" ,"61696" ,"61824" ,"61952" ,"62080" ,"62208" ,"62336" ,"62464" ,"62592" ,"62720" ,"62848" ,"62976" ,"63104" ,"63232" ,"63360" ,"63488" ,"63616" ,"63744" ,"63872" ,"64000" ,"64128" ,"64256" ,"64384" ,"64512" ,"64640" ,"64768" ,"64896" ,"65024" ,"65152" ,"65280" ,"65408" , "65536"]
     runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
     antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
 
@@ -1303,7 +1303,7 @@ def makelabel(parent, text, row, column=0, tooltiptxt=""):
             temp.bind("<Leave>", hide_tooltip)
         return temp
 
-    def makeslider(parent, label, options, var, from_ , to,  row=0, width=160, height=10, set=0, tooltip=""):
+    def makeslider(parent, label, options, var, from_ , to,  row=0, width=512, height=10, set=0, tooltip=""):
         sliderLabel = makelabel(parent, options[set], row + 1, 1)
         makelabel(parent, label, row,0,tooltip)
 
@@ -1625,7 +1625,7 @@ def changerunmode(a,b,c):
     for idx, name, in enumerate(quick_boxes):
         makecheckbox(quick_tab, name, quick_boxes[name], int(idx/2) +20, idx%2,tooltiptxt=quick_boxes_desc[name])
     # context size
-    makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=3,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")
+    makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=15,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")
 
     # load model
     makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
@@ -1690,7 +1690,8 @@ def changerunmode(a,b,c):
     togglectxshift(1,1,1)
 
     # context size
-    makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, len(contextsize_text)-1, 20, set=3,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")
+    makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, len(contextsize_text)-1, 20, set=15,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")
+
 
 
     customrope_scale_entry, customrope_scale_label = makelabelentry(tokens_tab, "RoPE Scale:", customrope_scale,tooltip="For Linear RoPE scaling. RoPE frequency scale.")
@@ -2575,7 +2576,7 @@ def onready_subprocess():
         global libname
         start_server = False
         save_to_file = (args.benchmark!="stdout" and args.benchmark!="")
-        benchmaxctx =  (2048 if maxctx>2048 else maxctx)
+        benchmaxctx =  (200000 if maxctx>200000 else maxctx)
         benchlen = 100
         benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
         if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
@@ -2673,8 +2674,8 @@ def start_in_seperate_process(launch_args):
     parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
     parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
     parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
-    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[256, 512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536], default=2048)
-    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
+    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int, default=2048)
+    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int, default=512)
     parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
     parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
     parser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')