-
Notifications
You must be signed in to change notification settings - Fork 1
/
3_compute_spk_dvecs_no_flatten.py
88 lines (73 loc) · 3.48 KB
/
3_compute_spk_dvecs_no_flatten.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os, sys
from speaker_encoder.voice_encoder import SpeakerEncoder
from speaker_encoder.audio import preprocess_wav
from pathlib import Path
import numpy as np
from os.path import join, basename, split
from tqdm import tqdm
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import glob
import argparse
def build_from_path(in_dir, out_dir, weights_fpath, num_workers=1):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
# wavfile_paths = glob.glob(os.path.join(in_dir, '*/*/*.wav'))
# print("in dir", in_dir)
wavfile_paths = glob.glob(os.path.join(in_dir, '/*.wav'))
wavfile_paths= sorted(wavfile_paths)
for wav_path in wavfile_paths:
# print("wav path", wav_path)
futures.append(executor.submit(
partial(_compute_spkEmbed, out_dir, wav_path, weights_fpath)))
return [future.result() for future in tqdm(futures)]
def _compute_spkEmbed(out_dir, wav_path, weights_fpath):
utt_id = os.path.basename(wav_path).rstrip(".wav")
fpath = Path(wav_path)
wav = preprocess_wav(fpath)
encoder = SpeakerEncoder(weights_fpath)
embed = encoder.embed_utterance(wav)
fname_save = os.path.join(out_dir, f"{utt_id}.npy")
np.save(fname_save, embed, allow_pickle=False)
return os.path.basename(fname_save)
def preprocess(in_dir, out_dir, weights_fpath, num_workers):
os.makedirs(out_dir, exist_ok=True)
metadata = build_from_path(in_dir, out_dir, weights_fpath, num_workers)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--in_dir', type=str,
default='/home/v-jiewang/data/VCTK-corpus/VCTK-Corpus/wav16_downsample_trim_topdb30')
parser.add_argument('--num_workers', type=int, default=20)
parser.add_argument('--out_dir_root', type=str,
default='/home/v-jiewang/data/VCTK-corpus/VCTK-Corpus/wav16_downsample_trim_topdb30_d_vector')
parser.add_argument('--spk_encoder_ckpt', type=str, \
default='speaker_encoder/ckpt/pretrained_bak_5805000.pt')
args = parser.parse_args()
# split_list = ['train-clean-100', 'train-clean-360']
# split_list = ['wav16_downsample_trim_topdb30']
split_list = os.listdir(args.in_dir)
split_list.sort()
# sub_folder_list = os.listdir(args.in_dir)
# sub_folder_list.sort()
args.num_workers = args.num_workers if args.num_workers is not None else cpu_count()
print("Number of workers: ", args.num_workers)
ckpt_step = os.path.basename(args.spk_encoder_ckpt).split('.')[0].split('_')[-1]
spk_embed_out_dir = os.path.join(args.out_dir_root, f"GE2E_spkEmbed_step_{ckpt_step}")
print("[INFO] spk_embed_out_dir: ", spk_embed_out_dir)
os.makedirs(spk_embed_out_dir, exist_ok=True)
# for data_split in split_list:
# sub_folder_list = os.listdir(args.in_dir, data_split)
# for spk in sub_folder_list:
# print("Preprocessing {} ...".format(spk))
# in_dir = os.path.join(args.in_dir, dataset, spk)
# if not os.path.isdir(in_dir):
# continue
# # out_dir = os.path.join(args.out_dir, spk)
# preprocess(in_dir, spk_embed_out_dir, args.spk_encoder_ckpt, args.num_workers)
for data_split in split_list:
print("data_split", data_split)
in_dir = os.path.join(args.in_dir, data_split)
preprocess(in_dir, spk_embed_out_dir, args.spk_encoder_ckpt, args.num_workers)
print("DONE!")
sys.exit(0)