-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
198 lines (168 loc) · 7.8 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import os
from time import sleep
import base64
import numpy as np
def find_image_pairs(folder_path):
# List to store the pairs
image_pairs = []
# Find the query image
query_image = None
for file in os.listdir(folder_path):
if file.lower() == "query.png":
query_image = os.path.join(folder_path, file)
break
# If the query image is found, pair it with each candidate
if query_image:
for file in os.listdir(folder_path):
if file.startswith("Top") and file.endswith(".png"):
candidate_image = os.path.join(folder_path, file)
image_pairs.append((query_image, candidate_image))
return image_pairs
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def generate_reranking(image_pairs,client,temperature = 0.2):
prompt = f"I have 2 images of outdoor scenes: a query and a candidate. In this task, you must not talk about weather,lighting, vehicles, people, or animals. Refrain from mentioning any elements that are not directly observable or are obscure. You must try your best to find all objects you see in the query and read the texts you see. Then, for every object you see in the query, you should do the following: \n1. You should find all objects you see in the query.\n2. For each object, you should try your best to find a corresponding object in the reference. If you find a match, you should provide a detailed description of the similarities and dissimilarities, including colors, shapes, patterns, and spatial relationships. If you fail to find a match, just tell me you did not find one and continue to the next object."
#prompt = f"I have 2 images of outdoor scenes: a query and a candidate. In this task, you must not talk about weather,lighting, vehicles, people, or animals. Refrain from mentioning any elements that are not directly observable or are obscure. You should do the following: \n1. You should provide a detailed description of the similarities and dissimilarities, including colors, shapes, patterns, and spatial relationships."
## this one removes object matching
#prompt = f"I have 2 images of outdoor scenes: a query and a candidate. You should do the following: \n1. You should provide a detailed description of the similarities and dissimilarities, including colors, shapes, patterns, and spatial relationships."
## this one removes all constriants
for pair in image_pairs:
sleep(10)
query_image = encode_image(pair[0])
db_image = encode_image(pair[1])
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{"role":"system",
"content":[
{
"type":"text",
"text":f"You are an expert to analyze images. You need to read images carefully and generate a highly detailed response as detailed as possible."
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url":f"data:image/png;base64,{query_image}"
},
},
{
"type": "image_url",
"image_url": {
"url":f"data:image/png;base64,{db_image}"
},
},
],
}
],
max_tokens=4096,
temperature = temperature
)
folder_path = os.path.split(pair[0])[0]
result_file = os.path.join(folder_path, "text.txt")
with open(result_file, 'a', encoding='utf-8') as f:
f.write(os.path.split(pair[1])[-1].replace(".png", "").replace("_True","").replace("_False","") + ":\n" + response.choices[0].message.content + "\n\n")
return result_file
def rerank(result_file,client,temperature=0.2):
try:
with open(result_file, 'r', encoding='utf-8') as file:
content = file.read()
except FileNotFoundError:
print("File not found. Please check the file path.")
except Exception as e:
print(f"An error occurred: {e}")
# First round of conversation
response = client.chat.completions.create(
model="gpt-4-1106-preview",
messages=[
{
"role":"system",
"content":[
{
"type":"text",
"text":"You are an expert in text-based place recognition. You need to read texts carefully."
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "I am using a VPR model to retrieve the most similar images to a given query image. However, it possibly gives me wrong results. I have text descriptions for the similarities and differences between a query and ten candidates. I will provide you the text description of each query-candidate pair, and you should return a ranking of the likelihood that a candidate is at the sample place with the query. Keep in mind that weather, lighting, time, season, and viewpoint does not influence VPR results. Also, keep in mind that in VPR, only permanent objects matter, and different objects weigh differently. Make sure you have detailed and thorough reasoning." + "\n\n" + content
},
],
}
],
max_tokens=4096,
temperature = temperature
)
first_round_reply = response.choices[0].message.content
directory = os.path.split(result_file)[0]
reply_file = os.path.join(directory,'rerank.txt')
with open(reply_file, 'a', encoding='utf-8') as f:
f.write(first_round_reply)
sleep(0.5)
return first_round_reply
def from_text_to_rank(first_round_reply,client,temperature = 0.2):
response = client.chat.completions.create(
model="gpt-4-1106-preview",
messages=[
{
"role":"system",
"content":[
{
"type":"text",
"text":"You only should return integers seperated by spaces."
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": """
You need to summarize the following rankings in a sequence of integers, and return me only the integers seperated by spaces"""
+ "\n\n" + first_round_reply
},
],
}
],
max_tokens=2048,
temperature = temperature
)
second_round_reply = response.choices[0].message.content
return second_round_reply
def get_topk(second_round_reply,image_pairs,Top_K):
try:
reranked = np.array(second_round_reply.split(',')).astype(int).reshape(-1,1)
except:
reranked = np.array(second_round_reply.split(' ')).astype(int).reshape(-1,1)
GT = []
for p in image_pairs:
tmp = os.path.split(p[1])[-1].replace('.png','').replace('Top','').split('_')
if 'True' in tmp:
GT.append(int(tmp[0]))
i = 0
for k in [1,5,10]:
if (np.array(GT).reshape(1,-1) == reranked[:k]).any():
Top_K[i] += 1
i += 1
return Top_K
def get_ind(folder_name):
folders = os.listdir(folder_name)
folder_int =[]
for f in folders:
folder_int.append(int(f))
ind = sorted(folder_int)
return ind