-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.yml
53 lines (44 loc) · 1.32 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
---
model: quartznet5x5 # ["quartznet5x5", "quartznet10x5", "quartznet15x5"]
# Datasets
train:
data_list: assets/train_data.txt
data_dirs: ["LibriTTS/train-clean-360"]
apply_speed_perturbation: True
apply_masking: True
val:
data_list: assets/val_data.txt
data_dirs: ["LibriTTS/dev-clean"]
test:
weights: checkpoints/model_best.pt
data_list: assets/test_data.txt
data_dirs: [ "LibriTTS/test-clean" ]
# Training
max_length: 12 # maximum file length in seconds. Can be used to avoid OOM (out-of-memory) error
epochs: 100
batch_size: 32
learning_rate: 5e-4 # constant learning rate value. Omitted if using OneCycleLR
weight_decay: 0.0001
checkpoint_dir: checkpoints/
log_dir: logs/
use_onecyclelr: True
normalize: True
num_workers: 2 # dataloader's
# OneCycleLR parameters
# https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.OneCycleLR.html#torch.optim.lr_scheduler.OneCycleLR
max_lr: 1e-3
div_factor: 25.0
pct_start: 0.3
# Augmentation
speed_perturbation: 0.1
masking:
chunk_size: 60 # size of each spectrogram segment (time axis) to apply augmentation to. Set equal to -1 to apply original pytorch's masking
freq_masking: 10
time_masking: 10
spec_params:
sr: 22050
n_mels: 64
n_fft: 320
win_length: 320
hop_length: 160
stats: assets/stats_64.npy # file with the mean and std of the data