-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
example.py
218 lines (189 loc) · 7.77 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Test WebSocket connection to SEPIA STT-Server"""
import sys
import argparse
import asyncio
import logging
from pynput.keyboard import Key, Listener as KeyListener
from microphone import MicrophoneStream
from socket_client import (
SepiaSttSocketClient, SepiaSttSocketConnectionError, SepiaSttSocketMessageError)
# Logger
logging.getLogger("sepia.stt.client").setLevel(level=logging.WARNING)
logging.getLogger("sepia.stt.microphone").setLevel(level=logging.WARNING)
logger = logging.getLogger("test")
logger.setLevel(level=logging.INFO)
logger.info("?NEVER") # TODO: message is never shown!
logging.info("SEEN?") # TODO: required to "fix" logger - WHY??
logger.info("\nWelcome to the SEPIA STT-Server Python client example") # ... now it works O_o
# Parse arguments
argv=sys.argv[1:]
ap = argparse.ArgumentParser()
ap.add_argument("-s", "--server", action="store",
help="Server URL", default="http://localhost:20741")
ap.add_argument("-l", "--language", action="store",
help="Language code, e.g.: 'de' or 'en-US'", default="en-US")
ap.add_argument("-t", "--task", action="store",
help="Model task as seen in welcome message", default="")
ap.add_argument("-m", "--model", action="store",
help="Model name as seen in welcome message (overwrites lang. and task)", default="")
args = ap.parse_args(argv)
# Server and user defaults
SERVER_URL = args.server if not None else "http://localhost:20741"
CLIENT_ID = "any" # default STT server user
ACCESS_TOKEN = "test1234" # default token for user
CONTINUOUS_STT = False # continue after (a) final result or close
ENGINE_OPTIONS = {
"language": args.language, # use short or long code (de, en-US)
"task": args.task,
"model": args.model, # NOTE: overwrites language and task
# ... there are many more options, see docs or web-demo ...
"continuous": CONTINUOUS_STT
}
# Client behavior
DISCONNECT_AFTER_RES_IF_MIC_OFF = True # abort 'continuous' mode if res is quasi-final and mic off
USE_KEYBOARD_CTRL = True # hold 'r' to start recording, release it to stop
REC_TIME = 8.0 # if keyboard control is off use timer
CHUNK_SIZE = 2048 # microphone chunk size (2048 ~= 128ms at 16khz)
event_loop = asyncio.get_event_loop() # required to sync mic, key and coroutine threads
audio_queue = asyncio.Queue()
# Set up microphone:
async def audio_queue_handler():
"""Send audio from queue when new data arrives"""
while True:
try:
logger.debug("Waiting for data in audio queue ...")
data = await audio_queue.get()
await sepia_stt_client.send_bytes(data)
# send end signal? (optional but requests final resutl)
if should_send_audio_end():
logger.info("Send audio end signal")
await sepia_stt_client.send_audio_end()
except (SepiaSttSocketConnectionError, SepiaSttSocketMessageError, TypeError) as err:
print(f'Error in audio queue: {err}')
break
async def timed_mic_control():
"""Stop recorder after specific time"""
await asyncio.sleep(REC_TIME)
if mic_stream.is_active():
print("RECORDING stop")
mic_stream.stop()
# NOTE: 'disconnect_if_mic_off_and_final_res' will close connection
# Microphone stream (with default settings: Int16, 16khz, mono)
mic_stream = MicrophoneStream(chunk_size=CHUNK_SIZE)
mic_stream.open(audio_queue, event_loop)
def load_keyboard_controls_thread():
"""Keyboard control for microphone"""
def on_press(key):
if hasattr(key, 'char'):
# start recording
if key.char == "r":
logger.debug("Pressed 'r'")
if not mic_stream.is_active():
print("RECORDING start")
mic_stream.start()
def on_release(key):
if hasattr(key, 'char'):
# stop recording
if key.char == "r":
logger.debug("Released 'r'")
if mic_stream.is_active():
print("RECORDING stop")
mic_stream.stop()
if should_auto_disconnect():
asyncio.run_coroutine_threadsafe(
disconnect(), event_loop)
elif should_send_audio_end():
asyncio.run_coroutine_threadsafe(
sepia_stt_client.send_audio_end(), event_loop)
# disconnect
if key == Key.esc:
print("Requested ABORT (ESC)")
asyncio.run_coroutine_threadsafe(disconnect(), event_loop)
return False
# Start key-listener thread
listener = KeyListener(on_press=on_press, on_release=on_release)
listener.start() # NOTE: do we need a join somewhere to clean-up?
# Connect to server:
def on_open():
"""Connection open callback"""
print("\nSTT-Server connection is: OPEN")
def on_ready(active_options: dict):
"""Connection ready callback"""
print("\nSTT-Server connection is: READY")
print(f"Active options: {active_options}")
# start recording and stop after specific time
if USE_KEYBOARD_CTRL:
print("\nPress and hold 'r' to start recording, release it to stop. Abort with ESC.")
else:
mic_stream.start()
event_loop.create_task(timed_mic_control())
print(f"\nRECORDER open for: {REC_TIME}s")
def on_result(result_json: dict):
"""Server result callback with transcription"""
is_final = result_json.get("isFinal", False)
best_transcript = result_json.get("transcript", "")
#print(f"Res: {result_json}")
if is_final or len(best_transcript) > 0:
print(f"STT-Server {'FINAL' if is_final else 'partial'} result: {best_transcript}\n")
if should_auto_disconnect():
asyncio.ensure_future(disconnect())
def on_close():
"""Connection close callback"""
print("\nSTT-Server connection is: CLOSED")
def on_error(err):
"""Connection error callback"""
print(f"\nSTT-Server connection ERROR: {err}")
server_options = {
"onopen": on_open,
"onready": on_ready,
"onresult": on_result,
"onclose": on_close,
"onerror": on_error
}
sepia_stt_client = SepiaSttSocketClient(
server_url = SERVER_URL,
client_id = CLIENT_ID,
access_token = ACCESS_TOKEN,
engine_options = ENGINE_OPTIONS,
server_options = server_options
)
async def connect():
"""Open SEPIA STT-Server connection"""
# Load some server data and check connection
print(f"\nPing server: {sepia_stt_client.ping_server()}")
server_info = sepia_stt_client.load_server_info()
print(f"\nServer info: {server_info}")
# Establish WebSocket connection
await sepia_stt_client.connect()
async def disconnect():
"""Disconnect if still connected (or ignore)"""
if sepia_stt_client.is_open():
logger.info("Closing connection")
await sepia_stt_client.close_connection()
def should_auto_disconnect():
"""If flag is enabled check if stream is done and should disconnect"""
if DISCONNECT_AFTER_RES_IF_MIC_OFF and sepia_stt_client.is_last_result_quasi_final():
if mic_stream.is_stopped() and audio_queue.empty():
return True
return False
def should_send_audio_end():
"""Check if audio-end event should be sent"""
if (not CONTINUOUS_STT and mic_stream.is_stopped() and audio_queue.empty() and
not sepia_stt_client.was_audio_end_submitted()):
return True
return False
# MAIN
def main():
"""Run test"""
# Control microphone via key listeners
if USE_KEYBOARD_CTRL:
load_keyboard_controls_thread()
# Prepare audio queue
audio_queue_cr = event_loop.create_task(audio_queue_handler())
# Connect to server and handle audio
event_loop.run_until_complete(connect())
audio_queue_cr.cancel()
mic_stream.close()
# Run
if __name__ == '__main__':
main()