-
Notifications
You must be signed in to change notification settings - Fork 29
/
train.py
122 lines (96 loc) · 4.98 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
This code is based on https://github.com/okankop/Efficient-3DCNNs
'''
import torch
from torch.autograd import Variable
import time
from utils import AverageMeter, calculate_accuracy
def train_epoch_multimodal(epoch, data_loader, model, criterion, optimizer, opt,
epoch_logger, batch_logger):
print('train at epoch {}'.format(epoch))
model.train()
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end_time = time.time()
for i, (audio_inputs, visual_inputs, targets) in enumerate(data_loader):
data_time.update(time.time() - end_time)
targets = targets.to(opt.device)
if opt.mask is not None:
with torch.no_grad():
if opt.mask == 'noise':
audio_inputs = torch.cat((audio_inputs, torch.randn(audio_inputs.size()), audio_inputs), dim=0)
visual_inputs = torch.cat((visual_inputs, visual_inputs, torch.randn(visual_inputs.size())), dim=0)
targets = torch.cat((targets, targets, targets), dim=0)
shuffle = torch.randperm(audio_inputs.size()[0])
audio_inputs = audio_inputs[shuffle]
visual_inputs = visual_inputs[shuffle]
targets = targets[shuffle]
elif opt.mask == 'softhard':
coefficients = torch.randint(low=0, high=100,size=(audio_inputs.size(0),1,1))/100
vision_coefficients = 1 - coefficients
coefficients = coefficients.repeat(1,audio_inputs.size(1),audio_inputs.size(2))
vision_coefficients = vision_coefficients.unsqueeze(-1).unsqueeze(-1).repeat(1,visual_inputs.size(1), visual_inputs.size(2), visual_inputs.size(3), visual_inputs.size(4))
audio_inputs = torch.cat((audio_inputs, audio_inputs*coefficients, torch.zeros(audio_inputs.size()), audio_inputs), dim=0)
visual_inputs = torch.cat((visual_inputs, visual_inputs*vision_coefficients, visual_inputs, torch.zeros(visual_inputs.size())), dim=0)
targets = torch.cat((targets, targets, targets, targets), dim=0)
shuffle = torch.randperm(audio_inputs.size()[0])
audio_inputs = audio_inputs[shuffle]
visual_inputs = visual_inputs[shuffle]
targets = targets[shuffle]
visual_inputs = visual_inputs.permute(0,2,1,3,4)
visual_inputs = visual_inputs.reshape(visual_inputs.shape[0]*visual_inputs.shape[1], visual_inputs.shape[2], visual_inputs.shape[3], visual_inputs.shape[4])
audio_inputs = Variable(audio_inputs)
visual_inputs = Variable(visual_inputs)
targets = Variable(targets)
outputs = model(audio_inputs, visual_inputs)
loss = criterion(outputs, targets)
losses.update(loss.data, audio_inputs.size(0))
prec1, prec5 = calculate_accuracy(outputs.data, targets.data, topk=(1,5))
top1.update(prec1, audio_inputs.size(0))
top5.update(prec5, audio_inputs.size(0))
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_time.update(time.time() - end_time)
end_time = time.time()
batch_logger.log({
'epoch': epoch,
'batch': i + 1,
'iter': (epoch - 1) * len(data_loader) + (i + 1),
'loss': losses.val.item(),
'prec1': top1.val.item(),
'prec5': top5.val.item(),
'lr': optimizer.param_groups[0]['lr']
})
if i % 10 ==0:
print('Epoch: [{0}][{1}/{2}]\t lr: {lr:.5f}\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Prec@1 {top1.val:.5f} ({top1.avg:.5f})\t'
'Prec@5 {top5.val:.5f} ({top5.avg:.5f})'.format(
epoch,
i,
len(data_loader),
batch_time=batch_time,
data_time=data_time,
loss=losses,
top1=top1,
top5=top5,
lr=optimizer.param_groups[0]['lr']))
epoch_logger.log({
'epoch': epoch,
'loss': losses.avg.item(),
'prec1': top1.avg.item(),
'prec5': top5.avg.item(),
'lr': optimizer.param_groups[0]['lr']
})
def train_epoch(epoch, data_loader, model, criterion, optimizer, opt,
epoch_logger, batch_logger):
print('train at epoch {}'.format(epoch))
if opt.model == 'multimodalcnn':
train_epoch_multimodal(epoch, data_loader, model, criterion, optimizer, opt, epoch_logger, batch_logger)
return