Skip to content

Commit

Permalink
tidy up the new sse stream code
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Sep 7, 2023
1 parent 4908937 commit a8673be
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,11 +434,12 @@ async def handle_sse_stream(self):

incomplete_token_buffer = bytearray()
while not handle.has_finished():
while current_token < handle.get_stream_count():
streamcount = handle.get_stream_count()
while current_token < streamcount:
token = handle.new_token(current_token)

if token is None: # Token isnt ready yet, received nullpointer
continue
break

current_token += 1

Expand All @@ -451,7 +452,7 @@ async def handle_sse_stream(self):
event_str = json.dumps(event_data)
await self.send_sse_event("message", event_str)

await asyncio.sleep(0.1)
await asyncio.sleep(0.02) #this should keep things responsive

# flush buffers, sleep a bit to make sure all data sent, and then force close the connection
self.wfile.flush()
Expand Down Expand Up @@ -1803,4 +1804,4 @@ def main(launch_args,start_server=True):
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')

main(parser.parse_args(),start_server=True)
main(parser.parse_args(),start_server=True)

0 comments on commit a8673be

Please sign in to comment.