-
Notifications
You must be signed in to change notification settings - Fork 1
/
VaryLighting.py
242 lines (192 loc) · 8.94 KB
/
VaryLighting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
""" The dataloader for custom dataset
# SPDX-FileCopyrightText: 2021 Binbin Xu
# SPDX-License-Identifier: BSD-3-Clause
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys, os, random
import os.path as osp
import torch.utils.data as data
from imageio import imread
from cv2 import resize, INTER_NEAREST
import os.path
import glob
# import third party
import numpy as np
import random
def get_depth_from_corresponding_rgb(rgb_file_abs):
rgb_dir, rgb_file_rel = os.path.split(rgb_file_abs)
depth_dir = rgb_dir.replace("rgb", "depth")
depth_file = os.path.join(depth_dir, rgb_file_rel)
return depth_file
class VaryLighting(data.Dataset):
"""
Dataset class for our varying lighting dataset
"""
base = 'data'
IMAGE_HEIGHT = 480
IMAGE_WIDTH = 640
DEPTH_SCALE = 1.0/1000.0
K = [525.0, 525.0, 319.5, 239.5]
def __init__(self, root='', category='keyframe',
keyframes=[1,], data_transform=None, select_traj=None,
image_resize=0.25, truncate_depth=True, pair='incremental',
):
super(VaryLighting, self).__init__()
assert pair in ['incremental', 'keyframe']
self.pair = pair
self.image_seq = [] # list (seq) of list (frame) of string (rgb image path)
self.timestamp = [] # empty
self.depth_seq = [] # list (seq) of list (frame) of string (depth image path)
self.invalid_seq = [] # empty
self.cam_pose_seq = [] # list (seq) of list (frame) of 4 X 4 ndarray
self.calib = [] # list (seq) of list (intrinsics: fx, fy, cx, cy)
self.seq_names = [] # list (seq) of string (seq name)
self.ids = 0
self.seq_acc_ids = [0]
self.keyframes = keyframes
self.transforms = data_transform
if category == 'test':
# self.set_test_mode()
self.__load_test(root, select_traj)
elif category in ['train', 'validation']: # train and validation
raise NotImplementedError()
elif category == 'kf':
self.__load_kf(root, select_traj)
else:
raise NotImplementedError
# downscale the input image to a quarter
self.fx_s = image_resize
self.fy_s = image_resize
self.truncate_depth = truncate_depth
print('Vary Lighting dataloader for {:} using keyframe {:}: \
{:} valid frames'.format(category, keyframes, self.ids))
def __load_test(self, root, select_traj=None):
""" Note:
The test trajectory is loaded slightly different from the train/validation trajectory.
We only select keyframes from the entire trajectory, rather than use every individual frame.
For a given trajectory of length N, using key-frame 2, the train/validation set will use
[[1, 3], [2, 4], [3, 5],...[N-1, N]],
while test set will use pair
[[1, 3], [3, 5], [5, 7],...[N-1, N]]
This difference result in a change in the trajectory length when using different keyframes.
The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory;
And in training/validation, we fully leverage every pair of image.
"""
assert(len(self.keyframes) == 1)
kf = self.keyframes[0]
self.keyframes = [1]
track_scene = osp.join(root, "*/")
scene_lists = glob.glob(track_scene, recursive=True)
self._num_scenes = len(scene_lists)
if self._num_scenes is None:
raise ValueError("No sub-folder data in the training or validation dataset")
for scene in scene_lists:
scene_name = osp.basename(osp.dirname(scene))
if select_traj is not None:
if scene_name != select_traj: continue
rgb_images_regex = os.path.join(scene, "rgb/*.png")
all_rgb_images_in_scene = sorted(glob.glob(rgb_images_regex))
total_num = len(all_rgb_images_in_scene)
self.calib.append(self.K)
images = [all_rgb_images_in_scene[idx] for idx in range(0, total_num, kf)]
# fake timestamps
timestamp = [os.path.splitext(os.path.basename(image))[0] for image in images]
depths = [get_depth_from_corresponding_rgb(rgb_file) for rgb_file in images]
extrin = [None] * len(images) # [tq2mat(frames[idx][0]) for idx in range(0, total_num, kf)]
self.image_seq.append(images)
self.timestamp.append(timestamp)
self.depth_seq.append(depths)
self.cam_pose_seq.append(extrin)
self.seq_names.append(scene)
self.ids += max(0, len(images)-1)
self.seq_acc_ids.append(self.ids)
def __load_rgb_tensor(self, path):
""" Load the rgb image
"""
image = imread(path)[:, :, :3]
image = image.astype(np.float32) / 255.0
image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
return image
def __load_depth_tensor(self, path):
""" Load the depth:
The depth images are scaled by a factor of 5000, i.e., a pixel
value of 5000 in the depth image corresponds to a distance of
1 meter from the camera, 10000 to 2 meter distance, etc.
A pixel value of 0 means missing value/no data.
"""
depth = imread(path).astype(np.float32) / 5e3
depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
if self.truncate_depth:
depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth
return depth[np.newaxis, :]
def __getitem__(self, index):
# pair in the way like [[1, 3], [3, 5], [5, 7],...[N-1, N]]
seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
frame_idx = index - self.seq_acc_ids[seq_idx]
this_idx = frame_idx
next_idx = frame_idx + random.choice(self.keyframes)
color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
if self.transforms:
color0, color1 = self.transforms([color0, color1])
# normalize the coordinate
calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
calib[0] *= self.fx_s
calib[1] *= self.fy_s
calib[2] *= self.fx_s
calib[3] *= self.fy_s
# cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
# cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
# transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
transform = None
name = {'seq': self.seq_names[seq_idx],
'frame0': this_idx,
'frame1': next_idx}
# camera_info = dict()
camera_info = {"height": color0.shape[0],
"width": color0.shape[1],
"fx": calib[0],
"fy": calib[1],
"ux": calib[2],
"uy": calib[3]}
return color0, color1, depth0, depth1, transform, calib, name, camera_info
def get_keypair(self, index, kf_idx=0):
# pair in the way like [[1, 3], [1, 5], [1, 7],...[1, N]]
seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
frame_idx = index - self.seq_acc_ids[seq_idx]
this_idx = kf_idx
next_idx = frame_idx
color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
if self.transforms:
color0, color1 = self.transforms([color0, color1])
# normalize the coordinate
calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
calib[0] *= self.fx_s
calib[1] *= self.fy_s
calib[2] *= self.fx_s
calib[3] *= self.fy_s
# cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
# cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
# transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
transform = None
name = {'seq': self.seq_names[seq_idx],
'frame0': this_idx,
'frame1': next_idx}
# camera_info = dict()
camera_info = {"height": color0.shape[0],
"width": color0.shape[1],
"fx": calib[0],
"fy": calib[1],
"ux": calib[2],
"uy": calib[3]}
return color0, color1, depth0, depth1, transform, calib, name, camera_info
def __len__(self):
return self.ids