Skip to content

Commit

Permalink
chore: autopublish 2024-04-06T23:24:17Z
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Apr 6, 2024
1 parent a90f906 commit 2fffe51
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
6 changes: 4 additions & 2 deletions notebooks/llm_inference_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@
"source": [
"import requests\n",
"\n",
"\n",
"class ModelInferenceManager:\n",
" def __init__(self, api_key, models_config):\n",
" self.api_key = api_key\n",
Expand All @@ -390,8 +391,8 @@
" for variant in group[\"variants\"]:\n",
" if variant[\"model\"] == model_name:\n",
" self.model = model_name\n",
" self.input_token_price = variant['input_price_per_token']\n",
" self.output_token_price = variant['output_price_per_token']\n",
" self.input_token_price = variant[\"input_price_per_token\"]\n",
" self.output_token_price = variant[\"output_price_per_token\"]\n",
" return\n",
" raise ValueError(f\"Model {model_name} not found in configuration.\")\n",
"\n",
Expand Down Expand Up @@ -539,6 +540,7 @@
"source": [
"import yaml\n",
"\n",
"\n",
"def load_models_config(config_file_path):\n",
" with open(config_file_path, \"r\") as config_file:\n",
" try:\n",
Expand Down
33 changes: 23 additions & 10 deletions notebooks/vectorization_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@
"from tqdm.auto import tqdm\n",
"import yaml\n",
"\n",
"\n",
"class SemanticVectorizer:\n",
" def __init__(self, api_key, models_config):\n",
" self.api_key = api_key\n",
Expand All @@ -504,27 +505,28 @@
" for variant in group[\"variants\"]:\n",
" if variant[\"model\"] == model_name:\n",
" self.model = model_name\n",
" self.usage_price_per_token = variant.get('usage_price_per_token', 0)\n",
" self.usage_price_per_token = variant.get(\"usage_price_per_token\", 0)\n",
" found = True\n",
" break\n",
" if found:\n",
" break\n",
" if not found:\n",
" raise ValueError(f\"Model {model_name} not found in configuration.\")\n",
" \n",
"\n",
" def preprocess_text(self, text):\n",
" \"\"\"\n",
" Preprocesses the text before embedding.\n",
" \"\"\"\n",
" text = text.lower()\n",
" return text.replace(\"\\n\", \" \").strip()\n",
" \n",
"\n",
" def read_and_process_markdown(self, file_path):\n",
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
" text = file.read()\n",
" sections = re.split(r\"\\n(#{1,3} .*)\\n\", text)\n",
" self.texts = [self.preprocess_text(sections[0])] + [\n",
" self.preprocess_text(f\"{sections[i]}\\n{sections[i + 1]}\") for i in range(1, len(sections), 2)\n",
" self.preprocess_text(f\"{sections[i]}\\n{sections[i + 1]}\")\n",
" for i in range(1, len(sections), 2)\n",
" ]\n",
" return self.texts\n",
"\n",
Expand All @@ -542,7 +544,9 @@
" embedding = np.array(data[\"data\"][0][\"embedding\"], dtype=\"float32\")\n",
" return embedding, data.get(\"usage\", {})\n",
" else:\n",
" print(f\"Failed to generate embedding: Status code {response.status_code}, Response: {response.text}\")\n",
" print(\n",
" f\"Failed to generate embedding: Status code {response.status_code}, Response: {response.text}\"\n",
" )\n",
" return None, None\n",
"\n",
" def generate_embeddings(self):\n",
Expand All @@ -567,11 +571,19 @@
" def search_similar_sections(self, query_text, num_results):\n",
" query_embedding, _ = self.query_openai_embedding(query_text)\n",
" if self.faiss_index is None:\n",
" raise ValueError(\"FAISS index is not initialized. Please create the index before searching.\")\n",
" raise ValueError(\n",
" \"FAISS index is not initialized. Please create the index before searching.\"\n",
" )\n",
" if query_embedding is None:\n",
" return []\n",
" distances, indices = self.faiss_index.search(np.array([query_embedding], dtype=\"float32\"), num_results)\n",
" return [{\"index\": idx, \"text\": self.texts[idx]} for idx in indices[0] if idx < len(self.texts)]\n",
" distances, indices = self.faiss_index.search(\n",
" np.array([query_embedding], dtype=\"float32\"), num_results\n",
" )\n",
" return [\n",
" {\"index\": idx, \"text\": self.texts[idx]}\n",
" for idx in indices[0]\n",
" if idx < len(self.texts)\n",
" ]\n",
"\n",
" def save_faiss_index(self, index_path):\n",
" if self.faiss_index:\n",
Expand All @@ -581,7 +593,7 @@
"\n",
" def load_faiss_index(self, index_path):\n",
" self.faiss_index = faiss.read_index(index_path)\n",
" \n",
"\n",
" def calculate_cost(self, usage):\n",
" total_tokens = usage.get(\"total_tokens\", 0)\n",
" total_price = total_tokens * self.usage_price_per_token\n",
Expand Down Expand Up @@ -665,6 +677,7 @@
"source": [
"import yaml\n",
"\n",
"\n",
"def load_models_config(config_file_path):\n",
" with open(config_file_path, \"r\") as config_file:\n",
" try:\n",
Expand Down Expand Up @@ -847,7 +860,7 @@
"# Query text for searching in the FAISS index.\n",
"query_text = \"Health\"\n",
"\n",
"# Similarity search in the FAISS index \n",
"# Similarity search in the FAISS index\n",
"results = new_embedder.search_similar_sections(query_text, num_results=2)\n",
"\n",
"# Print the search results.\n",
Expand Down

0 comments on commit 2fffe51

Please sign in to comment.