-
Notifications
You must be signed in to change notification settings - Fork 43
/
myutils.py
146 lines (127 loc) · 4.47 KB
/
myutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import sys
import re
import numpy as np
import argparse
import random
# from keras import backend as K
def tokenize(sent):
'''
data_reader.tokenize('a#b')
['a', '#', 'b']
'''
return [x.strip().lower() for x in re.split('(\W+)?', sent) if x.strip()]
def map_to_idx(x, vocab):
'''
x is a sequence of tokens
'''
# 0 is for UNK
return [ vocab[w] if w in vocab else 0 for w in x ]
def map_to_txt(x,vocab):
textify=map_to_idx(x,inverse_map(vocab))
return ' '.join(textify)
def inverse_map(vocab):
return {v: k for k, v in vocab.items()}
def inverse_ids2txt(X_inp,Y_inp,vocabx,vocaby,outp=None):
'''
takes x,y int seqs and maps them back to strings
'''
inv_map_x = inverse_map(vocabx)
inv_map_y = inverse_map(vocaby)
if outp:
for x,y,z in zip(X_inp,Y_inp,outp):
print(' '.join(map_to_idx(x,inv_map_x)))
print(' '.join(map_to_idx(y,inv_map_y)))
print(z)
else:
for x,y in zip(X_inp,Y_inp):
print(' '.join(map_to_idx(x,inv_map_x)))
print(' '.join(map_to_idx(y,inv_map_y)))
def create_train_examples(X, Y, yspace, num=-1, balanced=True):
'''
:param X: X seq
:param Y: Y seq
:param yspace: from which to sample
:param num: how many negs, -1 means all of it
:return: x,y,z such that if x,y in X,Y then z=1 else 0
'''
X_inp = []
Y_inp = []
outp = []
for x, y in zip(X, Y):
neg_samples=yspace[:] # copy
neg_samples.remove(y)
if num == -1:
pass
else:
neg_samples=[i for i in random.sample(neg_samples,num)]
if not balanced:
X_inp.append(x)
Y_inp.append(y)
outp.append([1.0, 0.0])
for yn in neg_samples:
if balanced:
X_inp.append(x)
Y_inp.append(y)
outp.append([1.0, 0.0])
X_inp.append(x)
Y_inp.append(yn)
outp.append([0.0, 1.0])
return X_inp, Y_inp, outp
# def load_word2vec_embeddings(vocab_dim,index_dict,word_vectors,output_dim):
# vocab_dim = 300 # dimensionality of your word vectors
# n_symbols = len(index_dict) + 1 # adding 1 to account for 0th index (for masking)
# embedding_weights = np.zeros((n_symbols+1,vocab_dim))
# for word,index in index_dict.items():
# embedding_weights[index,:] = word_vectors[word]
#
# return Embedding(output_dim=output_dim, input_dim=n_symbols + 1, mask_zero=True, weights=[embedding_weights]) # note you have to put embedding weights in a list by convention
def check_layer_output_shape(layer, input_data):
ndim = len(input_data.shape)
layer.input = K.placeholder(ndim=ndim)
layer.set_input_shape(input_data.shape)
expected_output_shape = layer.output_shape[1:]
function = K.function([layer.input], [layer.get_output()])
output = function([input_data])[0]
print(output.shape,expected_output_shape)
assert output.shape[1:] == expected_output_shape
def mytest():
input_data = np.random.random((10, 142, 200))
Y = np.random.random((10, 142, 200))
alpha = np.random.random((10, 142))
print(input_data.shape)
# layer = Reshape(dims=(2, 3))
# layer = Lambda(get_H_n, output_shape=(200,))
# Y = Layer()
# alpha= Layer()
# Y.set_input_shape((None,142,200))
# alpha.set_input_shape((None,142))
# ll=Merge([Y,alpha],mode='join')
# layer=Lambda(get_R, output_shape=(200,1))
# layer.set_previous(ll)
# print(layer.input)
# func = K.function([layer.input], [layer.get_output()])
# layer=Lambda(get_Y, output_shape=(110, 200))
# check_layer_output_shape(layer, input_data)
sys.exit(0)
def show_weights(model,node_name,indices=[0]):
Wr=model.nodes[node_name].get_weights()
for i in indices:
print(Wr[i][0:5])
def show_output(model,node_name,input_data_dict):
lout= K.function([model.inputs[i].input for i in model.input_order],
[model.nodes[node_name].get_output(train=False)])
output= lout([input_data_dict[i] for i in model.input_order])[0]
print('input', input_data_dict['input'][0][0:10])
print(node_name, output[0][0:5])
print('input', input_data_dict['input'][1][0:10])
print(node_name, output[1][0:5])
def categorize(ll):
new_y_train = []
for y in ll:
if y == 1:
new_y_train += [[0, 1]]
else:
new_y_train += [[1, 0]]
return np.asarray(new_y_train)
if __name__=="__main__":
pass