forked from Amelie-Schreiber/GET
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
92 lines (74 loc) · 3.25 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/python
# -*- coding:utf-8 -*-
import argparse
import json
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from train import create_dataset
from data.pdb_utils import VOCAB
def parse():
parser = argparse.ArgumentParser(description='inference dG')
parser.add_argument('--test_set', type=str, required=True, help='Path to the test set')
parser.add_argument('--task', type=str, default=None, choices=['PPA', 'PLA', 'LEP', 'PDBBind', 'NL', 'PLA_PS', 'LEP_PS'],
help='PPA: protein-protein affinity, ' + \
'PLA: protein-ligand affinity (small molecules), ' + \
'LEP: ligand efficacy prediction, ')
parser.add_argument('--fragment', type=str, default=None, help='fragmentation of small molecules')
parser.add_argument('--ckpt', type=str, required=True, help='Path to the checkpoint')
parser.add_argument('--save_path', type=str, default=None, help='Path to save the results')
parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
parser.add_argument('--num_workers', type=int, default=4, help='Number of workers to use')
parser.add_argument('--gpu', type=int, default=-1, help='GPU to use, -1 for cpu')
return parser.parse_args()
def main(args):
VOCAB.load_tokenizer(args.fragment)
# load model
model = torch.load(args.ckpt, map_location='cpu')
device = torch.device('cpu' if args.gpu == -1 else f'cuda:{args.gpu}')
model.to(device)
model.eval()
# load data
test_set = create_dataset(args.task, args.test_set, fragment=args.fragment)
test_loader = DataLoader(test_set, batch_size=args.batch_size,
num_workers=args.num_workers,
collate_fn=test_set.collate_fn)
items = test_set.indexes
# save path
if args.save_path is None:
save_path = '.'.join(args.ckpt.split('.')[:-1]) + '_results.jsonl'
else:
save_path = args.save_path
fout = open(save_path, 'w')
idx = 0
# batch_id = 0
for batch in tqdm(test_loader):
with torch.no_grad():
# move data
for k in batch:
if hasattr(batch[k], 'to'):
batch[k] = batch[k].to(device)
del batch['label']
# for attention visualization
# model.encoder.encoder.prefix = str(batch_id)
results = model.infer(batch)
if type(results) == tuple:
results = (res.tolist() for res in results)
results = (res for res in zip(*results))
else:
results = results.tolist()
for pred_label in results:
item_id = items[idx]['id']
gt = items[idx]['label'] if 'label' in items[idx] else items[idx]['affinity']['neglog_aff']
out_dict = {
'id': item_id,
'label': pred_label,
'task': args.task,
'gt': gt
}
fout.write(json.dumps(out_dict) + '\n')
idx += 1
# batch_id += 1
fout.close()
if __name__ == '__main__':
main(parse())