-
Notifications
You must be signed in to change notification settings - Fork 2
/
icdar_dataset.py
executable file
·83 lines (75 loc) · 3.07 KB
/
icdar_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from __future__ import print_function, division
import math
import numpy
from PIL import Image, ImageDraw
import cv2
import json
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
IMG_DIR = "/data/shudeng/IC15/test_images/"
LABEL_DIR = "/data/shudeng/IC15/Challenge4_Test_Task1_GT/"
class ICDARDataset(Dataset):
def __init__(self, img_dir=IMG_DIR, label_dir=LABEL_DIR):
self.img_dir = img_dir
self.label_path = label_dir
self.images = os.listdir(img_dir)
def __len__(self):
return len(self.images)
def resize_image(self, img):
height, width, _ = img.shape
if height < width:
#new_height = self.args['image_short_side']
new_height = 800
new_width = int(math.ceil(new_height / height * width / 32) * 32)
else:
#new_width = self.args['image_short_side']
new_width = 800
new_height = int(math.ceil(new_width / width * height / 32) * 32)
new_width, new_height = 512, 512
resized_img = cv2.resize(img, (new_width, new_height))
return resized_img
def load_image(self, image_path):
img = cv2.imread(image_path, cv2.IMREAD_COLOR).astype('float32')
original_shape = img.shape[:2]
#img = self.resize_image(img)
# img /= 255.
# img = torch.from_numpy(img).permute(2, 0, 1).float()
return img, original_shape
def make_mask(self, width, height, polygons):
img = Image.new('L', (width, height), 0)
for i, polygon in enumerate(polygons):
polygon = [(polygon[i][0], polygon[i][1]) for i in range(len(polygon))]
try:
ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
except Exception:
continue
mask = numpy.array(img)
return mask
def __getitem__(self, idx):
img_name = self.images[idx].split('.')[0]
img, original_shape = self.load_image(os.path.join(self.img_dir, img_name+".jpg"))
lines = []
polygons = []
reader = open(self.label_path+"gt_"+img_name+".txt", 'r', encoding='utf-8', errors='ignore').readlines()
for line in reader:
line = line.encode('ascii', 'ignore').decode('ascii')
points = line.strip().split(",")[:8]
points = [int(points[i]) for i in range(len(points))]
poly = np.array(points).reshape((-1, 2)).tolist()
polygons.append(poly)
mask = self.make_mask(original_shape[1],original_shape[0],polygons)
mask = torch.from_numpy(mask).float()
img = torch.from_numpy(img).float()
return {'image': img, 'mask': mask, 'filename': os.path.join(self.img_dir, img_name+".jpg")}
if __name__ == "__main__":
dataset = ICDARDataset()
item = dataset.__getitem__(2)
cv2.imwrite("icdar_img.jpg", item['image'].numpy())
cv2.imwrite("icdar_mask.jpg", item['mask'].numpy() * 255)
print(item)