-
Notifications
You must be signed in to change notification settings - Fork 0
/
segment_WAV_by_SRT.py
142 lines (120 loc) · 5.81 KB
/
segment_WAV_by_SRT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import subprocess
from pydub import AudioSegment
import pysrt
import argparse
from tqdm import tqdm
import re
import multiprocessing
def process_wav_srt_pair(wav_file, srt_file, output_dir):
# Extract the base filename without extension for creating the output folder
base_filename = os.path.splitext(os.path.basename(wav_file))[0]
# Create the output directory based on the WAV file name
output_dir = os.path.join(output_dir, f"out_{base_filename}")
os.makedirs(output_dir, exist_ok=True)
# Check if the temporary WAV file exists, if not, perform the conversion
temp_wav_file = "TEMP_WAV.wav"
if not os.path.isfile(temp_wav_file):
print("Converting WAV file...")
ffmpeg_command = [
"ffmpeg",
"-i", wav_file,
"-acodec", "pcm_s16le", # 16-bit PCM codec
"-ar", "44100", # Sample rate (adjust if needed)
"-y", # Overwrite output file if it exists
temp_wav_file
]
# Use subprocess.Popen to capture ffmpeg progress
process = subprocess.Popen(ffmpeg_command, stderr=subprocess.PIPE, universal_newlines=True)
duration_pattern = re.compile(r"Duration: (\d+:\d+:\d+\.\d+),")
for line in process.stderr:
match = duration_pattern.search(line)
if match:
duration_str = match.group(1)
total_duration = sum(float(x) * 60 ** i for i, x in enumerate(reversed(duration_str.split(":"))))
if "time=" in line:
time_str = re.search(r"time=(\d+:\d+:\d+\.\d+)", line).group(1)
current_time = sum(float(x) * 60 ** i for i, x in enumerate(reversed(time_str.split(":"))))
progress = current_time / total_duration * 100
print(f"Conversion Progress: {progress:.2f}%\r", end="")
print("\nConversion complete.")
# Load the temporary WAV file
audio = AudioSegment.from_file(temp_wav_file)
def extract_segments(speaker, segments):
speaker_dir = os.path.join(output_dir, speaker)
os.makedirs(speaker_dir, exist_ok=True)
for i, segment in enumerate(tqdm(segments, desc=f"Processing {speaker}")):
start_time = segment.start
end_time = segment.end
start_ms = (start_time.hours * 3600 + start_time.minutes * 60 + start_time.seconds) * 1000 + start_time.milliseconds
end_ms = (end_time.hours * 3600 + end_time.minutes * 60 + end_time.seconds) * 1000 + end_time.milliseconds
segment_audio = audio[start_ms:end_ms]
segment_audio.export(os.path.join(speaker_dir, f"segment_{i}.wav"), format="wav")
# Remove the speaker label from the text
text = segment.text.split("]:", 1)[1].strip()
with open(os.path.join(speaker_dir, f"segment_{i}.txt"), "w") as text_file:
text_file.write(text)
# Parse the SRT file
subs = pysrt.open(srt_file)
# Extract segments for each speaker
speakers = set()
for sub in subs:
speaker = sub.text.split(":")[0].strip("[]")
speakers.add(speaker)
for speaker in speakers:
segments = [sub for sub in subs if sub.text.startswith(f"[{speaker}]:")]
extract_segments(speaker, segments)
# Delete the temporary WAV file
os.remove(temp_wav_file)
# Delete empty subdirectories within the output directory
for root, dirs, files in os.walk(output_dir, topdown=False):
for directory in dirs:
dir_path = os.path.join(root, directory)
if not os.listdir(dir_path):
os.rmdir(dir_path)
print("Extraction complete. Segments saved in the output directory.")
def process_all_files_in_bulk():
# Create a multiprocessing pool
pool = multiprocessing.Pool()
# Create a list to store the paths of all the output directories
output_dirs = []
# Process all WAV and SRT files in the current directory in parallel
for dirpath, dirnames, filenames in os.walk('.'):
for filename in filenames:
if filename.endswith('.wav'):
wav_file = os.path.join(dirpath, filename)
srt_file = os.path.join(dirpath, filename.replace('.wav', '.srt'))
if os.path.exists(srt_file):
output_dir = os.path.join("bulk_output", os.path.splitext(os.path.basename(wav_file))[0])
output_dirs.append(output_dir)
pool.apply_async(process_wav_srt_pair, args=(wav_file, srt_file, output_dir))
# Close the pool and wait for all processes to complete
pool.close()
pool.join()
# Delete empty subdirectories within the output directories
for output_dir in output_dirs:
for root, dirs, files in os.walk(output_dir, topdown=False):
for directory in dirs:
dir_path = os.path.join(root, directory)
if not os.listdir(dir_path):
os.rmdir(dir_path)
def main():
# Create argument parser
parser = argparse.ArgumentParser(description='Segment a WAV file by SRT subtitles.')
parser.add_argument('--all', action='store_true',
help='Process all WAV and SRT files in the current directory')
# Parse command line arguments
args, remaining_args = parser.parse_known_args()
if args.all:
# Process all files in parallel when --all is used
process_all_files_in_bulk()
else:
# Check for regular WAV and SRT file arguments
if len(remaining_args) == 2:
wav_file, srt_file = remaining_args
# Process a single pair of WAV and SRT files
process_wav_srt_pair(wav_file, srt_file, output_dir="output")
else:
print("Please provide either --all to process all files or specify WAV and SRT files.")
if __name__ == "__main__":
main()