chore: autopublish 2024-04-06T23:24:17Z

labrijisaad · Apr 6, 2024 · 2fffe51 · 2fffe51
1 parent a90f906
commit 2fffe51
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 12 deletions.
diff --git a/notebooks/llm_inference_experiments.ipynb b/notebooks/llm_inference_experiments.ipynb
@@ -377,6 +377,7 @@
    "source": [
     "import requests\n",
     "\n",
+    "\n",
     "class ModelInferenceManager:\n",
     "    def __init__(self, api_key, models_config):\n",
     "        self.api_key = api_key\n",
@@ -390,8 +391,8 @@
     "            for variant in group[\"variants\"]:\n",
     "                if variant[\"model\"] == model_name:\n",
     "                    self.model = model_name\n",
-    "                    self.input_token_price = variant['input_price_per_token']\n",
-    "                    self.output_token_price = variant['output_price_per_token']\n",
+    "                    self.input_token_price = variant[\"input_price_per_token\"]\n",
+    "                    self.output_token_price = variant[\"output_price_per_token\"]\n",
     "                    return\n",
     "        raise ValueError(f\"Model {model_name} not found in configuration.\")\n",
     "\n",
@@ -539,6 +540,7 @@
    "source": [
     "import yaml\n",
     "\n",
+    "\n",
     "def load_models_config(config_file_path):\n",
     "    with open(config_file_path, \"r\") as config_file:\n",
     "        try:\n",

diff --git a/notebooks/vectorization_experiments.ipynb b/notebooks/vectorization_experiments.ipynb
@@ -488,6 +488,7 @@
     "from tqdm.auto import tqdm\n",
     "import yaml\n",
     "\n",
+    "\n",
     "class SemanticVectorizer:\n",
     "    def __init__(self, api_key, models_config):\n",
     "        self.api_key = api_key\n",
@@ -504,27 +505,28 @@
     "            for variant in group[\"variants\"]:\n",
     "                if variant[\"model\"] == model_name:\n",
     "                    self.model = model_name\n",
-    "                    self.usage_price_per_token = variant.get('usage_price_per_token', 0)\n",
+    "                    self.usage_price_per_token = variant.get(\"usage_price_per_token\", 0)\n",
     "                    found = True\n",
     "                    break\n",
     "            if found:\n",
     "                break\n",
     "        if not found:\n",
     "            raise ValueError(f\"Model {model_name} not found in configuration.\")\n",
-    "        \n",
+    "\n",
     "    def preprocess_text(self, text):\n",
     "        \"\"\"\n",
     "        Preprocesses the text before embedding.\n",
     "        \"\"\"\n",
     "        text = text.lower()\n",
     "        return text.replace(\"\\n\", \" \").strip()\n",
-    "        \n",
+    "\n",
     "    def read_and_process_markdown(self, file_path):\n",
     "        with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
     "            text = file.read()\n",
     "        sections = re.split(r\"\\n(#{1,3} .*)\\n\", text)\n",
     "        self.texts = [self.preprocess_text(sections[0])] + [\n",
-    "            self.preprocess_text(f\"{sections[i]}\\n{sections[i + 1]}\") for i in range(1, len(sections), 2)\n",
+    "            self.preprocess_text(f\"{sections[i]}\\n{sections[i + 1]}\")\n",
+    "            for i in range(1, len(sections), 2)\n",
     "        ]\n",
     "        return self.texts\n",
     "\n",
@@ -542,7 +544,9 @@
     "            embedding = np.array(data[\"data\"][0][\"embedding\"], dtype=\"float32\")\n",
     "            return embedding, data.get(\"usage\", {})\n",
     "        else:\n",
-    "            print(f\"Failed to generate embedding: Status code {response.status_code}, Response: {response.text}\")\n",
+    "            print(\n",
+    "                f\"Failed to generate embedding: Status code {response.status_code}, Response: {response.text}\"\n",
+    "            )\n",
     "            return None, None\n",
     "\n",
     "    def generate_embeddings(self):\n",
@@ -567,11 +571,19 @@
     "    def search_similar_sections(self, query_text, num_results):\n",
     "        query_embedding, _ = self.query_openai_embedding(query_text)\n",
     "        if self.faiss_index is None:\n",
-    "            raise ValueError(\"FAISS index is not initialized. Please create the index before searching.\")\n",
+    "            raise ValueError(\n",
+    "                \"FAISS index is not initialized. Please create the index before searching.\"\n",
+    "            )\n",
     "        if query_embedding is None:\n",
     "            return []\n",
-    "        distances, indices = self.faiss_index.search(np.array([query_embedding], dtype=\"float32\"), num_results)\n",
-    "        return [{\"index\": idx, \"text\": self.texts[idx]} for idx in indices[0] if idx < len(self.texts)]\n",
+    "        distances, indices = self.faiss_index.search(\n",
+    "            np.array([query_embedding], dtype=\"float32\"), num_results\n",
+    "        )\n",
+    "        return [\n",
+    "            {\"index\": idx, \"text\": self.texts[idx]}\n",
+    "            for idx in indices[0]\n",
+    "            if idx < len(self.texts)\n",
+    "        ]\n",
     "\n",
     "    def save_faiss_index(self, index_path):\n",
     "        if self.faiss_index:\n",
@@ -581,7 +593,7 @@
     "\n",
     "    def load_faiss_index(self, index_path):\n",
     "        self.faiss_index = faiss.read_index(index_path)\n",
-    "        \n",
+    "\n",
     "    def calculate_cost(self, usage):\n",
     "        total_tokens = usage.get(\"total_tokens\", 0)\n",
     "        total_price = total_tokens * self.usage_price_per_token\n",
@@ -665,6 +677,7 @@
    "source": [
     "import yaml\n",
     "\n",
+    "\n",
     "def load_models_config(config_file_path):\n",
     "    with open(config_file_path, \"r\") as config_file:\n",
     "        try:\n",
@@ -847,7 +860,7 @@
     "# Query text for searching in the FAISS index.\n",
     "query_text = \"Health\"\n",
     "\n",
-    "# Similarity search in the FAISS index \n",
+    "# Similarity search in the FAISS index\n",
     "results = new_embedder.search_similar_sections(query_text, num_results=2)\n",
     "\n",
     "# Print the search results.\n",