-
Notifications
You must be signed in to change notification settings - Fork 226
/
dataset.py
executable file
·109 lines (81 loc) · 3.44 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import glob
import cv2
import numpy as np
import scipy.io as sio
class __AbstractDataset(object):
"""Abstract class for interface of subsequent classes.
Main idea is to encapsulate how each dataset should parse
their images and annotations.
"""
def load_img(self, path):
raise NotImplementedError
def load_ann(self, path, with_type=False):
raise NotImplementedError
####
class __Kumar(__AbstractDataset):
"""Defines the Kumar dataset as originally introduced in:
Kumar, Neeraj, Ruchika Verma, Sanuj Sharma, Surabhi Bhargava, Abhishek Vahadane,
and Amit Sethi. "A dataset and a technique for generalized nuclear segmentation for
computational pathology." IEEE transactions on medical imaging 36, no. 7 (2017): 1550-1560.
"""
def load_img(self, path):
return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
def load_ann(self, path, with_type=False):
# assumes that ann is HxW
assert not with_type, "Not support"
ann_inst = sio.loadmat(path)["inst_map"]
ann_inst = ann_inst.astype("int32")
ann = np.expand_dims(ann_inst, -1)
return ann
####
class __CPM17(__AbstractDataset):
"""Defines the CPM 2017 dataset as originally introduced in:
Vu, Quoc Dang, Simon Graham, Tahsin Kurc, Minh Nguyen Nhat To, Muhammad Shaban,
Talha Qaiser, Navid Alemi Koohbanani et al. "Methods for segmentation and classification
of digital microscopy tissue images." Frontiers in bioengineering and biotechnology 7 (2019).
"""
def load_img(self, path):
return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
def load_ann(self, path, with_type=False):
assert not with_type, "Not support"
# assumes that ann is HxW
ann_inst = sio.loadmat(path)["inst_map"]
ann_inst = ann_inst.astype("int32")
ann = np.expand_dims(ann_inst, -1)
return ann
####
class __CoNSeP(__AbstractDataset):
"""Defines the CoNSeP dataset as originally introduced in:
Graham, Simon, Quoc Dang Vu, Shan E. Ahmed Raza, Ayesha Azam, Yee Wah Tsang, Jin Tae Kwak,
and Nasir Rajpoot. "Hover-Net: Simultaneous segmentation and classification of nuclei in
multi-tissue histology images." Medical Image Analysis 58 (2019): 101563
"""
def load_img(self, path):
return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
def load_ann(self, path, with_type=False):
# assumes that ann is HxW
ann_inst = sio.loadmat(path)["inst_map"]
if with_type:
ann_type = sio.loadmat(path)["type_map"]
# merge classes for CoNSeP (in paper we only utilise 3 nuclei classes and background)
# If own dataset is used, then the below may need to be modified
ann_type[(ann_type == 3) | (ann_type == 4)] = 3
ann_type[(ann_type == 5) | (ann_type == 6) | (ann_type == 7)] = 4
ann = np.dstack([ann_inst, ann_type])
ann = ann.astype("int32")
else:
ann = np.expand_dims(ann_inst, -1)
ann = ann.astype("int32")
return ann
####
def get_dataset(name):
"""Return a pre-defined dataset object associated with `name`."""
name_dict = {
"kumar": lambda: __Kumar(),
"cpm17": lambda: __CPM17(),
"consep": lambda: __CoNSeP(),
}
if name.lower() in name_dict:
return name_dict[name]()
else:
assert False, "Unknown dataset `%s`" % name