-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
115 lines (90 loc) · 4.04 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
biclustlib: A Python library of biclustering algorithms and evaluation measures.
Copyright (C) 2017 Victor Alexandre Padilha
This file is part of biclustlib.
biclustlib is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
biclustlib is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import numpy as np
class Bicluster:
"""This class models a bicluster.
Parameters
----------
rows : numpy.array
Rows of the bicluster (assumes that row indexing starts at 0).
cols : numpy.array
Columns of the bicluster (assumes that column indexing starts at 0).
data : numpy.ndarray
bla
"""
def __init__(self, rows, cols, data=None):
# Original Code:
# if isinstance(rows, np.ndarray) and rows.dtype == np.bool and cols.dtype == np.bool:
# self.rows = np.nonzero(rows)[0]
# self.cols = np.nonzero(cols)[0]
# elif isinstance(cols, np.ndarray) and rows.dtype == np.int and cols.dtype == np.int:
# self.rows = rows
# self.cols = cols
# else:
# print(rows.dtype, cols.dtype)
# raise ValueError("rows and cols must be bool or int numpy.arrays")
# My Custom Code because of bugs:
if isinstance(rows, np.ndarray) and np.issubdtype(rows.dtype, bool) and np.issubdtype(cols.dtype, bool):
self.rows = np.nonzero(rows)[0]
self.cols = np.nonzero(cols)[0]
elif isinstance(cols, np.ndarray) and np.issubdtype(rows.dtype, np.integer) and np.issubdtype(rows.dtype, np.integer):
self.rows = rows
self.cols = cols
else:
print(np.issubdtype(rows.dtype, np.int))
raise ValueError("rows and cols must be bool or int numpy.arrays")
if data is not None:
n, m = len(self.rows), len(self.cols)
if isinstance(data, np.ndarray) and (data.shape == (n, m) or (len(data) == 0 and n == 0)):
self.data = data
else:
raise ValueError("")
def intersection(self, other):
"""Returns a bicluster that represents the area of overlap between two biclusters."""
rows_intersec = np.intersect1d(self.rows, other.rows)
cols_intersec = np.intersect1d(self.cols, other.cols)
return Bicluster(rows_intersec, cols_intersec)
def union(self, other):
rows_union = np.union1d(self.rows, other.rows)
cols_union = np.union1d(self.cols, other.cols)
return Bicluster(rows_union, cols_union)
def overlap(self, other):
min_area = min(self.area, other.area)
return self.intersection(other).area / min_area
@property
def area(self):
"""Calculates the number of matrix elements of the bicluster."""
return len(self.rows) * len(self.cols)
def sort(self):
"""Sorts the array of row and the array of column indices of the bicluster."""
self.rows.sort()
self.cols.sort()
def __str__(self):
return 'Bicluster(rows={0}, cols={1})'.format(self.rows, self.cols)
class Biclustering:
"""This class models a biclustering.
Parameters
----------
biclusters : list
A list of instances from the Bicluster class.
"""
def __init__(self, biclusters):
if all(isinstance(b, Bicluster) for b in biclusters):
self.biclusters = biclusters
else:
raise ValueError("biclusters list contains an element that is not a Bicluster instance")
def __str__(self):
return '\n'.join(str(b) for b in self.biclusters)