-
Notifications
You must be signed in to change notification settings - Fork 7
/
dataset_gen.py
269 lines (223 loc) · 9.53 KB
/
dataset_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# -*- coding:utf-8 -*-
import pickle
import numpy as np
f = open('word_dict_3825.pickle', 'r')
word_dict_3825 = pickle.load(f)
f.close()
# #####-----generate hanzi dataset ----------------------
# hanzi = []
# for i in word_dict_3825:
# hanzi.append(i.rstrip())
# f = open('corpus.txt', 'w')
# for j in hanzi:
# f.write(j.encode('gbk'))
# f.write('\n')
# ########## generate the text line images ------------------
#!/usr/bin/env python
import os
import cv2
from PIL import Image, ImageFont, ImageDraw
import numpy as np
import random
#import importlib
import sys
import pickle
"""""""""""""""""""""""""""""""""""""""
可设置参数
"""""""""""""""""""""""""""""""""""""""
Image_size=40 #输出每个字图片尺寸
# Word_size=28 #每幅图中字体尺寸
En_Noise=1 #1:噪声使能;0:噪声关闭
Gaussian_size=1 #高斯滤波窗口大小
Gaussian_e=2 #高斯滤波标准差
Min_random_size=5 #字体随机位置下限值
Max_random_size=8 #字体随机位置上限值
Run_times=5 #运行次数,一个字的样本个数=运行次数*字体数量
"""""""""""""""""""""""""""""""""""""""
"""""""""""""""""""""""""""""""""""""""
def rotate(im_cv, angle=3, label=None):
rows, cols = np.array(im_cv).shape[0:2]
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1.0)
img = cv2.warpAffine(im_cv, M, (cols, rows), borderValue=[255, 255, 255])
# get rotated split point
c = np.c_[np.reshape(label, (len(label), 1)), np.zeros((len(label), 1))]
landmark = np.asarray([(M[0][0] * x + M[0][1] * y + M[0][2],
M[1][0] * x + M[1][1] * y + M[1][2]) for (x, y) in c])
return img, landmark[:, 0]
def random_mask(im_cv):
im_cv = cv2.cvtColor(im_cv, cv2.COLOR_BGR2GRAY)
img = 255 - im_cv
mask = 0.5 * np.ones((img.shape[0], img.shape[1]))
mask = mask > np.random.rand(img.shape[0], img.shape[1])
mask = np.asarray(mask, dtype=int)
res = np.multiply(img, mask)
res = 255 - res
# cv2.imwrite("./res.jpg", res)
return res
def erode(img_cv):
arr = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
kernel=np.uint8(np.zeros((2,2)))
for x in range(2):
kernel[x,1]=1;
kernel[1,x]=1;
#腐蚀图像
eroded=cv2.erode(arr,kernel);
return eroded
def dilate(img_cv):
arr = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
kernel=np.uint8(np.zeros((2,2)))
for x in range(2):
kernel[x,1]=1;
kernel[1,x]=1;
#膨胀图像
dilated = cv2.dilate(arr,kernel)
return dilated
#GetFileList遍历所有的字体文件,输出文件路径及文件名
def GetFileList(dir, fileList):
newDir = dir
if os.path.isfile(dir):
fileList.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
#如果需要忽略某些文件夹,使用以下代码
#if s == "xxx":
#continue
newDir=os.path.join(dir,s)
GetFileList(newDir, fileList)
return fileList
def Synthesizing_from_seg_data(data_dir=u'./result_word-segmented-from-IDcards-without-expandto500samples/'):
image_names = []
dataset = []
label = [] # [1 2 00 500]
subfolds = os.listdir(data_dir)
for subfold in subfolds:
filenames = os.listdir(os.path.join(data_dir, subfold))
image_names += [os.path.join(data_dir, subfold, fname) for fname in filenames]
im = Image.new("RGB", (1024, 48), (255, 255, 255))
pos_x = 0
cnt_img = 0
Times = 2
for tt in range(Times):
random.shuffle(image_names)
for fname in image_names:
print fname
offset_x = random.randint(0, 20)
pos_x = pos_x + offset_x # 字和字之间采用不定间隔
im_a = Image.open(fname)
im_a = im_a.crop((7, 7, 33, 33)) # ((left, upper, right, lower))
Word_size = np.random.randint(36, 41)
im_a = im_a.resize((Word_size, Word_size))
if pos_x + Word_size >= 1024: # save the text line image
im.save("./gen_imgs/" + str(cnt_img) + ".png")
print 'save:', str(cnt_img)+'.png'
dataset.append([str(cnt_img) + '.png'] + label)
cnt_img += 1
# re initial
im = Image.new("RGB", (1024, 48), (255, 255, 255))
pos_x = 0
offset_x = random.randint(0, 20)
pos_x = pos_x + offset_x # 字和字之间采用不定间隔
label = []
im.paste(im_a, (pos_x, (48 - Word_size) / 2))
label += [pos_x, pos_x + Word_size]
pos_x += Word_size
return dataset, cnt_img
def Synthesizing_from_gen_data(dataset, cnt_img):
result=[]
file_object = open("./corpus.txt", "r" )
all_the_text=file_object.read().decode('gbk')
print all_the_text
str_data=[]
for i in range(0,len(all_the_text)):
temp=ord(all_the_text[i])
if temp>=0x4E00 and temp<=0x9FA5:
str_data.append(all_the_text[i])
#生成英文字母和数字
start,end = (0x30, 0x7B)
for codepoint in range(int(start),int(end)):
#对每个字都新建一个文件夹 codepoint==0x58:X
if (codepoint>=0x30 and codepoint<0x3A) or (codepoint>=0x41 and codepoint<0x5B) or (codepoint>=0x61 and codepoint<0x7B):
word=unichr(codepoint)
str_data.append(word)
# 生成符号样本
str_fuhao=[u'.,:。-']
for codepoint in range(0,len(str_fuhao[0])):
word=str_fuhao[0][codepoint]
str_data.append(word)
print len(str_data),"个字"
#读取所用到的字体的文件名,获取用来输出的字符
str_Font='./get-images/Fonts'
list = GetFileList(str_Font, [])
print len(list),"种字体",':'
# Word_P=[]
# cnt_img = 0
# dataset = [] # [0.png 1 200 500]
label = [] # [1 2 00 500]
start,end = (0, len(str_data))
for e in list: # 11 kinds of fonts
# draw text on the blank image wiht size of 1024*48
(filepath, tempfilename) = os.path.split(e)
(shotname, extension) = os.path.splitext(tempfilename)
im = Image.new("RGB", (1024, 48), (255, 255, 255))
dr = ImageDraw.Draw(im)
pos_x = 0
for codepoint in range(int(start),int(end)):
#word=unichr(codepoint)
word=str_data[codepoint]
# Word_P.append(word)
print codepoint,':',word
Word_size = np.random.randint(26, 40)
font = ImageFont.truetype(os.path.join(str_Font, str(tempfilename)), Word_size, encoding="Unicode")
# x_rand=random.randint(Min_random_size,Max_random_size)
# y_rand=random.randint(Min_random_size,Max_random_size)
offset_x = random.randint(0, 20)
pos_x = pos_x + offset_x # 字和字之间采用不定间隔
if pos_x + Word_size >= 1024: # save the text line image
im = np.array(im)
if En_Noise:
im = cv2.GaussianBlur(im, (Gaussian_size, Gaussian_size), Gaussian_e)
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label)
cnt_img += 1
# erode
e_im = erode(im)
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", e_im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label)
cnt_img += 1
# dilate
d_im = dilate(im)
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", d_im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label)
cnt_img += 1
# mask
m_im = random_mask(im)
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", m_im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label)
cnt_img += 1
# rotate
r_im, label_rot = rotate(im, -1, label)
label_rot = [int(round(t)) for t in label_rot]
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", r_im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label_rot)
cnt_img += 1
r_im, label_rot = rotate(im, 1, label)
label_rot = [int(round(t)) for t in label_rot]
cv2.imwrite("./gen_imgs/" + str(cnt_img) + ".png", r_im, [int(cv2.IMWRITE_JPEG_QUALITY), 5])
dataset.append([str(cnt_img) + '.png'] + label_rot)
cnt_img += 1
# re initial
im = Image.new("RGB", (1024, 48), (255, 255, 255))
dr = ImageDraw.Draw(im)
pos_x = 0
offset_x = random.randint(0, 20)
pos_x = pos_x + offset_x # 字和字之间采用不定间隔
label = []
dr.text((pos_x, (48-Word_size)/2), word, font=font, fill="#000000")
label += [pos_x, pos_x+Word_size]
pos_x += Word_size
return dataset
# np.savetxt('dataset.txt', dataset, fmt='%s')
### main####
#data, cnt_img = Synthesizing_from_seg_data()
dataset = Synthesizing_from_gen_data(data, cnt_img)
np.savetxt('dataset.txt', dataset, fmt='%s')