-
Notifications
You must be signed in to change notification settings - Fork 0
/
load-and-search-only.py
97 lines (80 loc) · 3.27 KB
/
load-and-search-only.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import weaviate
import os
import requests
import json
import weaviate.classes
# Connect to a Weaviate instance
# Use 'connect_to_local' for local connections and 'connect_to_embedded' for embedded connections
# Replace with the appropriate function based on your setup
client = weaviate.connect_to_embedded(
headers={
"X-OpenAI-Api-Key": os.environ['OPENAI_API_KEY'],
"X-Cohere-Api-Key": os.environ['COHERE_API_KEY'],
}
)
# Check if the client is ready
if client.is_ready():
print("Connected to Weaviate instance:", client)
else:
print("Failed to connect to Weaviate instance.")
exit()
# Function to load data from a URL
def load_data(path):
response = requests.get(path)
return json.loads(response.text)
# URLs for the sample data
sample_10_url = "https://github.com/glauberss2007/AI-multimodal-weaviate/blob/main/data-samples/jeopardy_tiny.json"
sample_1k_url = "https://github.com/glauberss2007/AI-multimodal-weaviate/blob/main/data-samples/jeopardy_1k.json"
# Load the sample data
data_10 = load_data(sample_10_url)
data_1k = load_data(sample_1k_url)
# Check if the "Questions" collection exists and delete if it does
if client.collections.exists("Questions"):
client.collections.delete("Questions")
# Create a "Questions" collection using Cohere as a vectorizer and GPT-4 for generative tasks
client.collections.create(
name="Questions",
vectorizer_config=weaviate.classes.Configure.Vectorizer.text2vec_cohere(),
generative_config=weaviate.classes.Configure.Generative.openai(model="gpt-4")
)
# Import data into the collection
questions_collection = client.collections.get("Questions")
questions_collection.data.insert_many(data_10)
# Uncomment to insert more data
# questions_collection.data.insert_many(data_1k)
# Fetch and print a sample of the imported data
sample_response = questions_collection.query.fetch_objects(limit=5)
print(sample_response.objects[0].properties)
# Perform a vector search
def vector_search(collection, query, limit=5):
query_response = collection.query.near_text(query=query, limit=limit)
for item in query_response:
print(item.properties)
vector_search(questions_collection, "pigments")
# Perform a search with filters
def search_with_filters(collection, query, filter_value, limit=5):
query_response = collection.query.near_text(
query=query,
limit=limit,
filters=weaviate.classes.Filter("value").greater_than(filter_value)
)
for item in query_response:
print(item.properties)
search_with_filters(questions_collection, "musical instruments", 500)
# Perform a hybrid search
def hybrid_search(collection, query, alpha, limit=5):
query_response = collection.query.hybrid(query=query, alpha=alpha, limit=limit)
for item in query_response:
print(item.properties)
hybrid_search(questions_collection, "musical instruments", 0.7)
# Group task: Vector search and generate content using GPT-4
def group_task_generate_and_tweet(collection, query, limit=4):
response = collection.generate.near_text(
query=query,
limit=limit,
single_prompt=f"Write a short tweet about: {query}"
)
for item in response:
print(item.properties)
print(item.generated)
group_task_generate_and_tweet(questions_collection, "musical instruments")