Skip to content

Commit

Permalink
chore: autopublish 2024-04-06T16:52:20Z
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Apr 6, 2024
1 parent 210e786 commit 3145542
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 27 deletions.
64 changes: 43 additions & 21 deletions notebooks/llm_inference_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@
"source": [
"import requests\n",
"\n",
"\n",
"class PromptPreparator:\n",
" def __init__(self, api_key, models_config):\n",
" self.api_key = api_key\n",
Expand All @@ -386,18 +387,20 @@
" self.output_token_price = 0\n",
"\n",
" def set_model(self, model_name):\n",
" for group in self.models_config['models']:\n",
" for variant in group['variants']:\n",
" if variant['model'] == model_name:\n",
" for group in self.models_config[\"models\"]:\n",
" for variant in group[\"variants\"]:\n",
" if variant[\"model\"] == model_name:\n",
" self.model = model_name\n",
" self.input_token_price = variant.get('input_price_per_token', 0)\n",
" self.output_token_price = variant.get('output_price_per_token', 0)\n",
" self.input_token_price = variant.get(\"input_price_per_token\", 0)\n",
" self.output_token_price = variant.get(\"output_price_per_token\", 0)\n",
" return\n",
" raise ValueError(f\"Model {model_name} not found in configuration.\")\n",
"\n",
" def query_openai(self, prompt_text, max_completion_tokens=100, temperature=0.7):\n",
" if not self.model:\n",
" raise ValueError(\"Model not set. Please use set_model() to set a model before querying.\")\n",
" raise ValueError(\n",
" \"Model not set. Please use set_model() to set a model before querying.\"\n",
" )\n",
" url = \"https://api.openai.com/v1/chat/completions\"\n",
" headers = {\"Authorization\": f\"Bearer {self.api_key}\"}\n",
" payload = {\n",
Expand All @@ -415,36 +418,49 @@
" usage = data[\"usage\"]\n",
" return content, usage\n",
" else:\n",
" return f\"HTTP Error {response.status_code}: {response.json().get('error', {}).get('message', 'An unspecified error occurred')}\", None\n",
" return (\n",
" f\"HTTP Error {response.status_code}: {response.json().get('error', {}).get('message', 'An unspecified error occurred')}\",\n",
" None,\n",
" )\n",
" except requests.RequestException as e:\n",
" return f\"Connection error: {e}\", None\n",
"\n",
" def calculate_inference_price(self, usage):\n",
" if usage:\n",
" total_price = (usage[\"prompt_tokens\"] * self.input_token_price) + (usage[\"completion_tokens\"] * self.output_token_price)\n",
" total_price = (usage[\"prompt_tokens\"] * self.input_token_price) + (\n",
" usage[\"completion_tokens\"] * self.output_token_price\n",
" )\n",
" return total_price\n",
" else:\n",
" return None\n",
" \n",
" def determine_expertise_area(self, user_question, max_completion_tokens=150, temperature=0.2):\n",
"\n",
" def determine_expertise_area(\n",
" self, user_question, max_completion_tokens=150, temperature=0.2\n",
" ):\n",
" prompt_text = f\"\"\"Based on the question provided, identify the relevant expertise area(s). Return your answer in the format: \n",
" 'expertise1, expertise2, ...'. Provide only the expertise areas as a comma-separated list, no additional explanations are needed.\n",
" Here is the user Question:\n",
" {user_question}\n",
" \"\"\"\n",
" response, usage = self.query_openai(prompt_text, max_completion_tokens, temperature)\n",
" return response.strip(), usage if response else \"Error determining expertise area.\"\n",
" response, usage = self.query_openai(\n",
" prompt_text, max_completion_tokens, temperature\n",
" )\n",
" return response.strip(), (\n",
" usage if response else \"Error determining expertise area.\"\n",
" )\n",
"\n",
" def prepare_prompt_for_llm(self, expertise_area, user_question, context_documents):\n",
" prompt = (f\"You are an expert in '{expertise_area}'. A user has asked for help with the following question: \"\n",
" f\"'{user_question}'. Please provide insights using only the information from the provided documents. \"\n",
" \"If certain aspects are ambiguous or the documents do not fully address the question, please make educated inferences based on your expertise.\\n\\n\"\n",
" \"Here are the documents provided:\\n\\n\")\n",
" prompt = (\n",
" f\"You are an expert in '{expertise_area}'. A user has asked for help with the following question: \"\n",
" f\"'{user_question}'. Please provide insights using only the information from the provided documents. \"\n",
" \"If certain aspects are ambiguous or the documents do not fully address the question, please make educated inferences based on your expertise.\\n\\n\"\n",
" \"Here are the documents provided:\\n\\n\"\n",
" )\n",
" for i, document in enumerate(context_documents, start=1):\n",
" prompt += f'Document {i}:\\n\"\"\"\\n{document}\\n\"\"\"\\n\\n'\n",
" prompt += \"Given your expertise and the information provided in these documents, synthesize the key insights to craft a detailed and relevant response to the above question.\\n\\n\"\n",
" prompt += \"Start your response below:\\n\\n\"\n",
" return prompt "
" return prompt"
]
},
{
Expand Down Expand Up @@ -524,17 +540,19 @@
"source": [
"import yaml\n",
"\n",
"\n",
"def load_models_config(file_path):\n",
" with open(file_path, 'r') as stream:\n",
" with open(file_path, \"r\") as stream:\n",
" try:\n",
" models_config = yaml.safe_load(stream)\n",
" return models_config\n",
" except yaml.YAMLError as exc:\n",
" print(exc)\n",
" return None\n",
"\n",
"\n",
"# Example usage\n",
"file_path = '../config/models_config.yml'\n",
"file_path = \"../config/models_config.yml\"\n",
"models_config = load_models_config(file_path)\n",
"models_config"
]
Expand Down Expand Up @@ -655,7 +673,9 @@
" \"Document 1 content about RAG...\",\n",
" \"Document 2 content about LLMs & RAGs...\",\n",
"]\n",
"prompt = inference.prepare_prompt_for_llm(expertise_area, user_question, context_documents)\n",
"prompt = inference.prepare_prompt_for_llm(\n",
" expertise_area, user_question, context_documents\n",
")\n",
"print(prompt)"
]
},
Expand Down Expand Up @@ -688,7 +708,9 @@
}
],
"source": [
"final_response, final_usage = inference.query_openai(prompt, max_completion_tokens=1500, temperature=0.7)\n",
"final_response, final_usage = inference.query_openai(\n",
" prompt, max_completion_tokens=1500, temperature=0.7\n",
")\n",
"print(final_response)"
]
},
Expand Down
29 changes: 23 additions & 6 deletions notebooks/vectorization_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,13 @@
"import re\n",
"from tqdm.auto import tqdm\n",
"\n",
"\n",
"class OpenAIEmbeddings:\n",
" def __init__(self, credentials_path=\"../secrets/credentials.yml\", embedding_model=\"text-embedding-ada-002\"):\n",
" def __init__(\n",
" self,\n",
" credentials_path=\"../secrets/credentials.yml\",\n",
" embedding_model=\"text-embedding-ada-002\",\n",
" ):\n",
" self.credentials_path = credentials_path\n",
" self.embedding_model = embedding_model\n",
" self.api_key = self.load_credentials()\n",
Expand All @@ -509,7 +514,9 @@
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
" text = file.read()\n",
" sections = re.split(r\"\\n(#{1,3} .*)\\n\", text)\n",
" self.texts = [sections[0]] + [sections[i] + sections[i + 1] for i in range(1, len(sections), 2)]\n",
" self.texts = [sections[0]] + [\n",
" sections[i] + sections[i + 1] for i in range(1, len(sections), 2)\n",
" ]\n",
" return self.texts\n",
"\n",
" def query_openai_embedding(self, text):\n",
Expand All @@ -528,7 +535,13 @@
" return None\n",
"\n",
" def generate_embeddings(self):\n",
" self.embeddings = np.array([self.query_openai_embedding(text) for text in tqdm(self.texts) if text is not None])\n",
" self.embeddings = np.array(\n",
" [\n",
" self.query_openai_embedding(text)\n",
" for text in tqdm(self.texts)\n",
" if text is not None\n",
" ]\n",
" )\n",
"\n",
" def create_faiss_index(self):\n",
" dimension = self.embeddings.shape[1]\n",
Expand All @@ -537,7 +550,9 @@
"\n",
" def search_similar_sections(self, query_text, num_results=2):\n",
" query_embedding = self.query_openai_embedding(query_text)\n",
" distances, indices = self.faiss_index.search(np.array([query_embedding], dtype=\"float32\"), num_results)\n",
" distances, indices = self.faiss_index.search(\n",
" np.array([query_embedding], dtype=\"float32\"), num_results\n",
" )\n",
" return [{\"index\": idx, \"text\": self.texts[idx]} for idx in indices[0]]\n",
"\n",
" def save_faiss_index(self, index_path=\"../faiss_index.bin\"):\n",
Expand All @@ -555,8 +570,10 @@
"outputs": [],
"source": [
"# Initialize the class\n",
"embedder = OpenAIEmbeddings(credentials_path=\"../secrets/credentials.yml\", \n",
" embedding_model=\"text-embedding-ada-002\")"
"embedder = OpenAIEmbeddings(\n",
" credentials_path=\"../secrets/credentials.yml\",\n",
" embedding_model=\"text-embedding-ada-002\",\n",
")"
]
},
{
Expand Down

0 comments on commit 3145542

Please sign in to comment.