Skip to content

Commit

Permalink
chore: autopublish 2024-04-06T12:50:50Z
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Apr 6, 2024
1 parent 3d517cf commit d0da39b
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions notebooks/vectorization_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@
"path_to_markdown_file = \"../data/raw/mock_markdown.md\"\n",
"\n",
"# Read Markdown File\n",
"with open(path_to_markdown_file, 'r', encoding='utf-8') as file:\n",
"with open(path_to_markdown_file, \"r\", encoding=\"utf-8\") as file:\n",
" markdown_text = file.read()\n",
"\n",
"# Split into sections by headers\n",
"sections = re.split(r\"\\n(#{1,3} .*)\\n\", markdown_text)\n",
"processed_sections = [sections[0]]\n",
"for i in range(1, len(sections), 2):\n",
" processed_sections.append(sections[i] + sections[i+1])\n",
" processed_sections.append(sections[i] + sections[i + 1])\n",
"\n",
"print(processed_sections[0])\n",
"print(processed_sections[-1])"
Expand Down Expand Up @@ -126,7 +126,7 @@
" response = requests.post(url, headers=headers, json=payload)\n",
" if response.status_code == 200:\n",
" data = response.json()\n",
" embedding = np.array(data['data'][0]['embedding'], dtype='float32')\n",
" embedding = np.array(data[\"data\"][0][\"embedding\"], dtype=\"float32\")\n",
" return embedding\n",
" else:\n",
" error_message = f\"HTTP Error {response.status_code}\"\n",
Expand Down Expand Up @@ -248,15 +248,15 @@
"# Query\n",
"query_text = \"Artificial Intelligence\"\n",
"query_embedding = query_openai_embedding(OPENAI_CREDENTIALS, query_text)\n",
"query_embedding = np.array(query_embedding, dtype='float32')\n",
"query_embedding = np.array(query_embedding, dtype=\"float32\")\n",
"\n",
"# Search the FAISS index\n",
"distances, indices = index.search(query_embedding.reshape(1, -1), 2) \n",
"distances, indices = index.search(query_embedding.reshape(1, -1), 2)\n",
"\n",
"# Display Top Similar Sections\n",
"print(\"Top similar sections to the query:\")\n",
"for idx in indices[0]:\n",
" print(f\"\\nSection {idx+1}: {processed_sections[idx][:150]}...\") "
" print(f\"\\nSection {idx+1}: {processed_sections[idx][:150]}...\")"
]
},
{
Expand Down

0 comments on commit d0da39b

Please sign in to comment.