-
Notifications
You must be signed in to change notification settings - Fork 0
/
sanitycheck.py
46 lines (35 loc) · 1.55 KB
/
sanitycheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
def count_lines_in_csv(file_path):
with open(file_path, 'r') as file:
return sum(1 for _ in file)
def count_wav_files(directory):
return len([f for f in os.listdir(directory) if f.endswith('.wav')])
def main():
#csv path
metadata_file = 'metadata.csv'
#check metadata actually exist
if not os.path.isfile(metadata_file):
print("\033[91mError: metadata.csv file not found!\033[0m")
print("Please report this issue at: https://github.com/DominicTWHV/LJSpeech_Dataset_Generator/issues")
return
#count lines
num_lines = count_lines_in_csv(metadata_file)
#exclude format header
expected_wav_count = num_lines - 1
#wavs dir
wav_directory = 'wavs'
#check dir exists
if not os.path.isdir(wav_directory):
print("\033[91mError: wavs/ directory not found!\033[0m")
print("Please report this issue at: https://github.com/DominicTWHV/LJSpeech_Dataset_Generator/issues")
return
#count num of wav files
actual_wav_count = count_wav_files(wav_directory)
#check if match
if expected_wav_count == actual_wav_count:
print("\033[92mSanity check success: Number of .wav files matches the number of metadata entries.\033[0m")
else:
print("\033[91mCritical Error: Mismatch! Expected {}, but found {} .wav files.\033[0m".format(expected_wav_count, actual_wav_count))
print("Please report this issue at: https://github.com/DominicTWHV/LJSpeech_Dataset_Generator/issues")
if __name__ == '__main__':
main()