-
Notifications
You must be signed in to change notification settings - Fork 2
/
fkmer.py
51 lines (45 loc) · 1.35 KB
/
fkmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import utils
import itertools
import numpy as np
import save
def generate(X, seqType, args):
'''
# Reference-1: (http://rosalind.info/glossary/k-mer-composition/) # It is also called "k-mer composition".
# Reference-2: iRecSpot-EF: https://www.sciencedirect.com/science/article/abs/pii/S0010482518302981
:param X:
:param seqType:
:param args:
:return:
'''
elements = utils.sequenceElements(seqType)
# print(elements)
# print(args.gGap)
# print(args.kTuple)
T = []
for x in X:
x = x[:args.terminusLength]
t = []
for i in range(1, args.kTuple + 1, 1):
v = list(itertools.product(elements, repeat=i))
# seqLength = len(x) - i + 1
for i in v:
# print(x.count(''.join(i)), end=',')
t.append(x.count(''.join(i)))
### --- ###
t = np.array(t)
# t = t.reshape(-1, 1)
# print(t.shape)
T.append(t)
#end-for
T = np.array(T)
# print(T.shape)
totalFeature = 0
if seqType == 'DNA' or seqType == 'RNA':
totalFeature = np.sum([4**(i) for i in range(1, args.kTuple+1)])
else:
if seqType == 'PROT':
totalFeature = np.sum([20**(i) for i in range(1, args.kTuple+1)])
else: None
#end-if
save.datasetSave(T, totalFeature, 'fkmer')
#end-def