-
Notifications
You must be signed in to change notification settings - Fork 0
/
sl_TermChat.py
394 lines (340 loc) · 18.8 KB
/
sl_TermChat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
import os
import json
import sys
from datetime import datetime
import openai
import streamlit as st
from streamlit_chat import message
from streamlit_shortcuts import add_keyboard_shortcuts
# Sometimes we might want a UI. Streamlit is pretty lightweight and easy to use
# so we'll make one.
# Run like this from the command line for 3 different conversations:
# streamlit run --browser.gatherUsageStats false --server.port 8501 --theme.base dark python/sl_TermChat.py conversation1
# streamlit run --browser.gatherUsageStats false --server.port 8502 --theme.base dark python/sl_TermChat.py conversation2
# streamlit run --browser.gatherUsageStats false --server.port 8503 --theme.base dark python/sl_TermChat.py conversation3
chat_title = "TermiChat"
if len(sys.argv) > 1:
chat_title = sys.argv[1]
# Setting page title and header and allow wide mode (so you can widen your browser
# to see the chat history better).
st.set_page_config(page_title=chat_title, layout="wide", page_icon=":robot_face:")
st.header(f"Termi-chat 🔥: {chat_title}")
# Get this from .streamlit/secrets.toml
# OPENAI_API_KEY = "..."
openai.api_key = os.environ.get("OPENAI_API_KEY")
# Initialise session state variables
DEFAULT_SYSTEM_CONTEXT = "You are a helpful assistant."
if 'output_mode' not in st.session_state:
# Tracks the output mode (e.g., chat, code, etc.)
st.session_state['output_mode'] = "old"
if 'assistant' not in st.session_state:
# Tracks the bot responses
st.session_state['assistant'] = []
if 'user' not in st.session_state:
# Tracks the user inputs
st.session_state['user'] = []
if 'system_context' not in st.session_state:
# Tracks the system context
st.session_state['system_context'] = DEFAULT_SYSTEM_CONTEXT
if 'messages' not in st.session_state:
# Tracks what we send to the model (including past messages and the current user input)
st.session_state['messages'] = [
{"role": "system", "content": st.session_state['system_context']}
]
if 'model_name' not in st.session_state:
# Keep model name equal to the actual model name for simplicity when adding a new one
st.session_state['model_name'] = []
if 'uploaded_file' not in st.session_state:
st.session_state['uploaded_file'] = None
# Hacky but works: we need to track the key for the file uploader so we can clear it
# when a new file is uploaded.
if 'uploaded_file_key' not in st.session_state:
st.session_state['uploaded_file_key'] = 1
if 'file_name' not in st.session_state:
st.session_state['file_name'] = "termi-chat1.json"
if 'timestmp' not in st.session_state:
st.session_state['timestamp'] = []
# These 3 track cost for paid models to help the user now what they're spending
if 'cost' not in st.session_state:
st.session_state['cost'] = []
if 'total_tokens' not in st.session_state:
st.session_state['total_tokens'] = []
if 'total_cost' not in st.session_state:
st.session_state['total_cost'] = 0.0
# Use a reasonable start value for max_tokens; the user can change it
# using the slider.
max_tokens = 512
# We use a map for the model name and the API type. This is so we can
# use the correct API for the model. We have 4 types of models:
# 1. ollama - a locally running model
# 2. openai - a model from OpenAI
# 3. openrouter - a model from OpenRouter
# 4. deepseek - a model from DeepSeek
# The "test" model is for error testing
# For paid models, we track the cost of the conversation so add code below
# to calculate cost and a comment to the link to the pricing page.
# see https://openai.com/pricing#language-models
# see https://openrouter.ai/models (cost is differet for each model)
# for other models (like ollama based), we don't update cost.
model_map = {
"llama3:8b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0.1, "output_token_cost": 0.1},
"llama3.1:8b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0.1, "output_token_cost": 0.1},
"deepseek-coder:6.7b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"llama2-uncensored:7b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"wizard-vicuna-uncensored:13b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"dolphin-mixtral:8x7b-v2.7-q4_K_M": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"codellama:13b-python-q4_K_M": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"llama3-gradient:8b": {"vendor": "ollama", "context_size": 4096, "input_token_cost": 0, "output_token_cost": 0},
"--- Below spends Money ---": "ollama",
"gpt-3.5-turbo-0125": {"vendor": "openai", "context_size": 16384, "input_token_cost": 0.50, "output_token_cost": 1.50},
"gpt-4o": {"vendor": "openai", "context_size": 128000, "input_token_cost": 5.00, "output_token_cost": 15.00},
"gpt-4-turbo-2024-04-09": {"vendor": "openai", "context_size": 128000, "input_token_cost": 10.00, "output_token_cost": 30.00},
"mistralai/mixtral-8x7b-instruct": {"vendor": "openrouter", "context_size": 32768, "input_token_cost": 0.24, "output_token_cost": 0.24},
"openai/gpt-3.5-turbo-0125": {"vendor": "openrouter", "context_size": 16384, "input_token_cost": 0.50, "output_token_cost": 1.50},
"anthropic/claude-3-haiku": {"vendor": "openrouter", "context_size": 200000, "input_token_cost": 0.25, "output_token_cost": 1.25},
"deepseek-chat": {"vendor": "deepseek", "context_size": 32768, "input_token_cost": 0.14, "output_token_cost": 0.28},
"deepseek-coder": {"vendor": "deepseek", "context_size": 16384, "input_token_cost": 0.14, "output_token_cost": 0.28},
}
def calculate_cost(prompt_tokens, completion_tokens, model_name):
cost = 0.0
if model_name in model_map:
input_token_cost = model_map[model_name].get("input_token_cost", 0)
output_token_cost = model_map[model_name].get("output_token_cost", 0)
cost = (prompt_tokens * input_token_cost + completion_tokens * output_token_cost) / 1000000
else:
print(f"Model {model_name} not found in our model_map cost list")
return f"cost is invalid for this model: {model_name}"
return cost
# Sidebar - used to show the conversation title and model selection
with st.sidebar:
st.title(chat_title)
st.form(key='conversation_form', )
# Create the model selector based on the keys of models_map.
model_menu_items = list(model_map.keys())
selected_model_name = st.radio("Choose a model:", model_menu_items)
if model_map[selected_model_name]['input_token_cost'] > 0 or model_map[selected_model_name]['output_token_cost'] > 0:
max_tokens = st.slider("Max tokens", min_value=20,
max_value=model_map[selected_model_name]['context_size'],
value=512, step=1)
temperature = st.slider("temperature", min_value=0.0, max_value=2.0, value=0.8, step=.1)
counter_placeholder = st.empty()
tmp_input_cost = model_map[selected_model_name].get("input_token_cost", 0)
tmp_output_cost = model_map[selected_model_name].get("output_token_cost", 0)
counter_placeholder.write(f"Total cost (model: \${tmp_input_cost:.2f}, \${tmp_output_cost:.2f}): ${st.session_state['total_cost']:.2f}")
uploaded_file = st.file_uploader("Load Conversation", type="json", key=f"load{st.session_state['uploaded_file_key']}", help="Load a conversation from a JSON file")
file_name = st.text_input('File to export to:', 'termi-chat1.json')
users_name= st.text_input('User name:', 'Dennis')
system_context = st.text_area('System context:', st.session_state['system_context'], height=100)
# Create columns for buttons; this gets then side by side
col1, col2 = st.columns(2, gap='small')
clear_button = col1.button("Clear", key="clear", help="Clear the conversation history")
save_button = col1.button("Save", key="save", help="Save the conversation to a JSON file")
dump_button = col2.button("Dump", key="dump", help="Dump the conversation history to the console")
refresh_button = col2.button("Update", key="refresh", help="Refresh the page")
switch_mode = col2.button("Style", key="switch", help="Switch conversation output style")
if system_context != st.session_state['system_context']:
# The system prompt has changed, so apply it to our saved messages.
st.session_state['system_context'] = system_context
new_messages = st.session_state['messages'][1:]
st.session_state['messages'] = []
st.session_state['messages'].append({"role": "system", "content": st.session_state['system_context']})
for msg in new_messages:
st.session_state['messages'].append(msg)
if switch_mode:
if st.session_state['output_mode'] == "old":
st.session_state['output_mode'] = "new"
else:
st.session_state['output_mode'] = "old"
if save_button:
# Serialize and save the conversation to a JSON string
conversation_json = json.dumps(st.session_state['messages'], indent=4)
# Create a download button for the JSON string
st.sidebar.download_button(label="Download Conversation",
data=conversation_json,
file_name=file_name,
mime="application/json")
#print(f"\nUploaded file: {uploaded_file}")
#print(f"\nSession file: {st.session_state['uploaded_file']}")
if uploaded_file != None and uploaded_file != st.session_state['uploaded_file']:
string_data = uploaded_file.getvalue().decode("utf-8")
data = json.loads(string_data)
# Update the session state with the loaded conversation
if len(data) > 0 and data[0]['role'] == "system":
system_context = data[0]['content']
new_messages = data[1:]
else:
system_context = DEFAULT_SYSTEM_CONTEXT
new_messages = data
st.session_state['system_context'] = system_context
st.session_state['messages'] = []
st.session_state['messages'].append({"role": "system", "content": system_context})
for msg in new_messages:
st.session_state['messages'].append(msg)
if msg['role'] == "assistant":
# Protect against missing fields in the JSON file
st.session_state['model_name'].append(msg.get('model', "None"))
st.session_state['total_tokens'].append(msg.get('total_tokens', 0.0))
st.session_state['cost'].append(msg.get('cost', 0.0))
st.session_state['timestamp'].append(msg.get('timestamp', "2024-09-09-09:09"))
st.session_state['system_context'] = system_context
print(f"Loaded conversation: {uploaded_file}")
# Fill in the user and assistant lists
st.session_state['user'] = []
st.session_state['assistant'] = []
for msg in new_messages:
if msg['role'] == "user":
st.session_state['user'].append(msg['content'])
elif msg['role'] == "assistant":
st.session_state['assistant'].append(msg['content'])
if len(st.session_state['assistant']) != len(st.session_state['user']):
print("Error loading: assistant and user lists are not the same length")
st.session_state['uploaded_file'] = uploaded_file
# Increment the key and refresh so the file uploader will clear
st.session_state['uploaded_file_key'] += 1
st.rerun()
#print("running")
if refresh_button:
print("")
if dump_button:
json.dump(st.session_state['messages'], sys.stdout, indent=4)
print("")
# flush the output buffer
sys.stdout.flush()
# Reset all variables including conversation history
if clear_button:
st.session_state['assistant'] = []
st.session_state['user'] = []
st.session_state['messages'] = [
{"role": "system", "content": system_context}
]
st.session_state['number_tokens'] = []
st.session_state['model_name'] = []
st.session_state['cost'] = []
st.session_state['total_cost'] = 0.0
st.session_state['total_tokens'] = []
#counter_placeholder.write(f"Total cost of this conversation: ${st.session_state['total_cost']:.5f}")
# Take a list of messages and strip anything but role and content
# because the api calls only want role and content.
def extract_role_and_content(input_messages):
messages = []
for msg in input_messages:
messages.append({"role": msg["role"], "content": msg["content"]})
return messages
# Get responses from ollama models. Cost = $0
# Sadly, ollma api doesn't support max_tokens so we can't use it.
# https://github.com/ollama/ollama/blob/4ec7445a6f678b6efc773bb9fa886d7c9b075577/docs/modelfile.md#valid-parameters-and-values
# default temperature = 0.8
def ollama_generate_response(model, max_tokens, messages, temperature):
from ollama import Client
client = Client(host='http://localhost:11434')
try:
completion = client.chat(
model=model,
messages=messages,
options = {
"temperature": temperature
}
)
response = completion['message']['content'].strip()
except Exception as e:
error_text = f"Error in ollama server: Error: {str(e)}"
response = error_text
return response, 0, 0, 0
prompt_tokens = completion['eval_count']
completion_tokens = completion['prompt_eval_count']
total_tokens = prompt_tokens + completion_tokens
return response, total_tokens, prompt_tokens, completion_tokens
# Get responses from chatgpt; cost is based on tokens and model type
# See ./.streamlist/secrets.toml for environment variants visible to
# streamlit.
def generate_response(model, max_tokens, messages, temperature):
from openai import OpenAI
from os import getenv
if model_map[model]['vendor'] == "openrouter":
# Openrouter can use the OpenAI API but we need their base URL and API key
base_url = "https://openrouter.ai/api/v1"
api_key=getenv("OPENROUTER_API_KEY")
client = OpenAI(base_url=base_url, api_key=api_key)
elif model_map[model]['vendor'] == "deepseek":
# Deepseek models can use the OpenAI API but we need their base URL and API key
base_url = "https://api.deepseek.com/"
api_key=getenv("DEEPSEEK_API_KEY")
client = OpenAI(base_url=base_url, api_key=api_key)
elif model_map[model]['vendor'] == "openai":
api_key = getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
else:
response = f"Error: model {model} not found in our model_map list"
return response, 0, 0, 0
try:
completion = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature
)
response = completion.choices[0].message.content.strip('\n')
except Exception as e:
error_text = f"Error in {model_map[model]['vendor']} server: Error: {str(e)}"
response = error_text
return response, 0, 0, 0
total_tokens = completion.usage.total_tokens
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
return response, total_tokens, prompt_tokens, completion_tokens
# container for chat history
response_container = st.container()
# container for text box
prompt_container = st.container()
with prompt_container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_area(f"{users_name}:", key='input', height=200)
submit_button = st.form_submit_button(label='Send')
add_keyboard_shortcuts({'Shift+Enter': submit_button})
if submit_button and user_input:
# Pass in a copy of the messages in case something goes wrong. This protects
# against having a user message without a matching assistant message. We also
# need a version of the messages list with just role and content for the API.
tmp_messages = extract_role_and_content(st.session_state['messages'])
tmp_messages.append({"role": "user", "content": user_input})
# During inference, the user can click buttons which will abort the inference.
with st.spinner("Thinking..."):
if model_map[selected_model_name]['vendor'] == "openai" or \
model_map[selected_model_name]['vendor'] == "openrouter" or \
model_map[selected_model_name]['vendor'] == "deepseek":
output, total_tokens, prompt_tokens, completion_tokens = generate_response(selected_model_name, max_tokens, tmp_messages, temperature)
elif model_map[selected_model_name]['vendor'] == "ollama":
output, total_tokens, prompt_tokens, completion_tokens = ollama_generate_response(selected_model_name, max_tokens, tmp_messages, temperature)
else:
output = f"Error: model {selected_model_name} not found in our list"
total_tokens = prompt_tokens = completion_tokens = 0
st.session_state['user'].append(user_input)
st.session_state['assistant'].append(output)
st.session_state['model_name'].append(selected_model_name)
st.session_state['total_tokens'].append(total_tokens)
cost = calculate_cost(prompt_tokens, completion_tokens, selected_model_name)
# Only after we successfully get a response do we update the messages with
# both user and assistant messages.
st.session_state['messages'].append({"role": "user", "content": user_input})
st.session_state['messages'].append({"role": "assistant", "content": output,
"timestamp": datetime.now().strftime("%Y-%m-%d-%H:%M"),
"model": selected_model_name,
"cost": cost })
st.session_state['cost'].append(cost)
st.session_state['total_cost'] += cost
if st.session_state['assistant']:
with response_container:
# Draw the interleaved conversation.
for i in range(len(st.session_state['assistant'])):
#with st.chat_message('user', avatar='🧑💻'):
# with st.chat_message('user'):
# st.write(st.session_state['user'][i])
message(st.session_state['user'][i], is_user=True, key=str(i) + '_user')
if st.session_state['output_mode'] == "old":
message(st.session_state['assistant'][i].replace('\t', ' '), key=str(i), avatar_style='adventurer')
else:
# 👱♀️ , 👱♀️, 🧑🏻🦰
with st.chat_message('assistant', avatar='https://raw.githubusercontent.com/dataprofessor/streamlit-chat-avatar/master/bot-icon.png'):
st.write(st.session_state['assistant'][i].replace('\t', ' '))
st.write(f"Model: {st.session_state['model_name'][i]}; Tokens: {st.session_state['total_tokens'][i]}; Cost: ${st.session_state['cost'][i]:.5f}")
counter_placeholder.write(f"Total cost of conversation: ${st.session_state['total_cost']:.5f}")