-
Notifications
You must be signed in to change notification settings - Fork 8
/
models.py
153 lines (138 loc) · 7.17 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import openai
import asyncio
from openai import AsyncOpenAI
from typing import List, Dict, Union
from tqdm.asyncio import tqdm_asyncio
import anthropic
import backoff
from dotenv import load_dotenv
load_dotenv()
completion_tokens = {"gpt-4": 0,
"gpt-4-1106-preview": 0,
"gpt-3.5-turbo": 0,
"gpt-3.5-turbo-16k": 0}
prompt_tokens = {"gpt-4": 0,
"gpt-4-1106-preview": 0,
"gpt-3.5-turbo": 0,
"gpt-3.5-turbo-16k": 0}
async def generate_from_openai_chat_completion(
messages_list: List[Dict[str, str]],
model: str,
temperature: float,
max_tokens: int,
top_p: float,
stop: Union[str, List[str]],
requests_per_minute: int = 300,
verbose=False,
**kwargs,
) -> List[str]:
client = AsyncOpenAI()
async_responses = []
for message in messages_list:
task = asyncio.create_task(generate_answer(message, client, model))
async_responses.append(task)
responses = await tqdm_asyncio.gather(*async_responses, disable=not verbose)
return responses
@backoff.on_exception(backoff.expo, openai.RateLimitError)
async def generate_answer(prompt, client, model):
"""
Send a prompt to OpenAI API and get the answer.
:param prompt: the prompt to send.
:return: the answer.
"""
response = await client.chat.completions.create(
model=model,
messages=prompt,
)
return response
async def generate_from_anthropic_chat_completion(
messages_list: List[str],
model: str,
temperature: float,
max_tokens: int,
top_p: float,
stop: Union[str, List[str]],
requests_per_minute: int = 300,
verbose=False,
**kwargs,
) -> List[str]:
client = anthropic.AsyncAnthropic()
async_responses = []
for message in messages_list:
task = asyncio.create_task(generate_answer_anthropic(message, client, model, max_tokens))
async_responses.append(task)
responses = await tqdm_asyncio.gather(*async_responses, disable=not verbose)
return responses
@backoff.on_exception(backoff.expo, anthropic.RateLimitError)
async def generate_answer_anthropic(message, client, model, max_tokens):
"""
Send a prompt to OpenAI API and get the answer.
:param prompt: the prompt to send.
:return: the answer.
"""
response = await client.messages.create(
model=model,
max_tokens=max_tokens,
messages=message,
)
return response
def gpt(prompt, model="gpt-4", temperature=0.7, max_tokens=2000, n=1, stop=None, **kwargs) -> list:
return gpts([prompt] * n, model=model, temperature=temperature, max_tokens=max_tokens, stop=stop, **kwargs)[0]
def gpts(prompts, model="gpt-4", temperature=0.7, max_tokens=2000, stop=None,
system_prompt: str = None,
**kwargs) -> list:
'''
system_prompt: string added as a special system message at the beginning of the conversation
'''
if system_prompt is not None:
messages_list = [[{'role': 'system', 'content': system_prompt},
{"role": "user", "content": prompt}] for prompt in prompts]
else:
messages_list = [[{"role": "user", "content": prompt}] for prompt in prompts]
return chatgpts(messages_list, model=model, temperature=temperature, max_tokens=max_tokens, stop=stop, **kwargs)
def chatgpt(messages, model="gpt-4", temperature=0.7, max_tokens=2000, n=1, stop=None, **kwargs) -> list:
return chatgpts([messages] * n, model=model, temperature=temperature, max_tokens=max_tokens, stop=stop, **kwargs)[0]
def chatgpt_raw(messages, model="gpt-4", temperature=0.7, max_tokens=2000, n=1, stop=None, **kwargs) -> list:
return chatgpts_raw([messages] * n, model=model, temperature=temperature, max_tokens=max_tokens, stop=stop, **kwargs)[0]
def chatgpts(messages_list, model="gpt-4", temperature=0.7, max_tokens=2000, stop=None, max_messages=400, **kwargs) -> list:
texts = []
for i in range(0, len(messages_list), max_messages):
responses = asyncio.run(generate_from_openai_chat_completion(model=model, messages_list=messages_list[i: i + max_messages], temperature=temperature, max_tokens=max_tokens, top_p=1, stop=stop, **kwargs))
texts.extend([x.choices[0].message.content for x in responses])
# global completion_tokens, prompt_tokens
# completion_tokens[model] += sum(x["usage"]["completion_tokens"] for x in responses if "usage" in x and "completion_tokens" in x["usage"])
# prompt_tokens[model] += sum(x["usage"]["prompt_tokens"] for x in responses if "usage" in x and "prompt_tokens" in x["usage"])
return texts
def chatgpts_raw(messages_list, model="gpt-4", temperature=0.7, max_tokens=2000, stop=None, max_messages=400, **kwargs) -> list:
'''
Returns raw response messages, not just the text content
'''
responses_all = []
for i in range(0, len(messages_list), max_messages):
responses = asyncio.run(generate_from_openai_chat_completion(model=model, messages_list=messages_list[i: i + max_messages], temperature=temperature, max_tokens=max_tokens, top_p=1, stop=stop, **kwargs))
responses_all.extend([x["choices"][0]["message"] for x in responses])
# global completion_tokens, prompt_tokens
# completion_tokens[model] += sum(x["usage"]["completion_tokens"] for x in responses if "usage" in x and "completion_tokens" in x["usage"])
# prompt_tokens[model] += sum(x["usage"]["prompt_tokens"] for x in responses if "usage" in x and "prompt_tokens" in x["usage"])
return responses_all
def claude(prompts, model="claude-3-sonnet-20240229", temperature=0.7, max_tokens=3000, stop=None, max_messages=400, system_prompt=None, **kwargs) -> list:
texts = []
if system_prompt is not None:
messages_list = [[{'role': 'system', 'content': system_prompt},
{"role": "user", "content": prompt}] for prompt in prompts]
else:
messages_list = [[{"role": "user", "content": prompt}] for prompt in prompts]
for i in range(0, len(prompts), max_messages):
responses = asyncio.run(generate_from_anthropic_chat_completion(model=model, messages_list=messages_list, temperature=temperature, max_tokens=max_tokens, top_p=1, stop=stop, **kwargs))
texts.extend([x.content[0].text for x in responses])
# global completion_tokens, prompt_tokens
# completion_tokens[model] += sum(x["usage"]["completion_tokens"] for x in responses if "usage" in x and "completion_tokens" in x["usage"])
# prompt_tokens[model] += sum(x["usage"]["prompt_tokens"] for x in responses if "usage" in x and "prompt_tokens" in x["usage"])
return texts
def gpt_usage():
global completion_tokens, prompt_tokens
cost = completion_tokens["gpt-4"] / 1000 * 0.06 + prompt_tokens["gpt-4"] / 1000 * 0.03
cost += completion_tokens["gpt-4-1106-preview"] / 1000 * 0.03 + prompt_tokens["gpt-4-1106-preview"] / 1000 * 0.01
cost += completion_tokens["gpt-3.5-turbo"] / 1000 * 0.002 + prompt_tokens["gpt-3.5-turbo"] / 1000 * 0.0015
cost += completion_tokens["gpt-3.5-turbo-16k"] / 1000 * 0.004 + prompt_tokens["gpt-3.5-turbo-16k"] / 1000 * 0.003
return {"completion_tokens": completion_tokens, "prompt_tokens": prompt_tokens, "cost": cost}