Skip to content

Commit

Permalink
chat: listening wip
Browse files Browse the repository at this point in the history
  • Loading branch information
rmackay9 committed Dec 20, 2023
1 parent bee9eea commit 4a66649
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 9 deletions.
66 changes: 62 additions & 4 deletions MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
'''
AI Chat Module voice-to-text class
Randy Mackay, December 2023
Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
'''

import time
import math
import struct

try:
import pyaudio # install using, "sudo apt-get install python3-pyaudio"
Expand All @@ -19,6 +23,9 @@ def __init__(self):
self.client = None
self.assistant = None

# initialise audio recording
self.p = pyaudio.PyAudio()

# set the OpenAI API key
def set_api_key(self, api_key_str):
self.client = OpenAI(api_key = api_key_str)
Expand All @@ -37,15 +44,44 @@ def check_connection(self):
# return True if connected
return self.client is not None

# listen for noise
# returns true if noise is detected, false if not
def listen_for_noise(self):
# check pyaudio is initialised
if self.p is None:
print("chat: pyaudio not initialised")
return False

# Open stream
try:
stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
except:
print("chat: failed to connect to microphone")
return False

# listen for noise
noise_detected = False
while not noise_detected:
data = stream.read(1024)
if self.volume_over_threshold(data):
noise_detected = True

# Stop and close the stream
stream.stop_stream()
stream.close()
return True

# record audio from microphone
# returns filename of recording or None if failed
def record_audio(self):
# Initialize PyAudio
p = pyaudio.PyAudio()
# check pyaudio is initialised
if self.p is None:
print("chat: pyaudio not initialised")
return False

# Open stream
try:
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
except:
print("chat: failed to connect to microphone")
return None
Expand All @@ -64,7 +100,6 @@ def record_audio(self):
# Stop and close the stream
stream.stop_stream()
stream.close()
p.terminate()

# Save audio file
wf = wave.open("recording.wav", "wb")
Expand All @@ -89,3 +124,26 @@ def convert_audio_to_text(self, audio_filename):
file=audio_file,
response_format="text")
return transcript

# return true if the volume of a frame of audio is above a given threshold
@staticmethod
def volume_over_threshold(frame, threshold = 10000):
# calculate number of samples in the frame
sample_width = 2
num_samples = len(frame) / sample_width

# protect against divide by zero
if num_samples == 0:
return 0
format = "%dh" % (num_samples)
shorts = struct.unpack(format, frame)

# iterate over the frame and calculate the RMS volume
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0/32768)
sum_squares += n*n
volume_rms = math.sqrt(sum_squares / num_samples)

# return true if volume is above threshold
return volume_rms > threshold
20 changes: 15 additions & 5 deletions MAVProxy/modules/mavproxy_chat/chat_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ def __init__(self, mpstate):
self.horiz_sizer = wx.BoxSizer(wx.HORIZONTAL)

# add a record button
self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
self.frame.Bind(wx.EVT_BUTTON, self.record_button_click, self.record_button)
#self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
self.record_button = wx.ToggleButton(self.frame, id=-1, label="Rec", size=(75, 25))
self.frame.Bind(wx.EVT_TOGGLEBUTTON, self.record_button_click, self.record_button)
self.horiz_sizer.Add(self.record_button, proportion = 0, flag = wx.ALIGN_TOP | wx.ALL, border = 5)

# add an input text box
Expand Down Expand Up @@ -109,12 +110,21 @@ def apikey_close_button_click(self, event):

# record button clicked
def record_button_click(self, event):
# run record_button_click_execute in a new thread
th = Thread(target=self.record_button_click_execute, args=(event,))
th.start()
if self.record_button.GetValue():
# run record_button_click_execute in a new thread
th = Thread(target=self.record_button_click_execute, args=(event,))
th.start()

# record button clicked
def record_button_click_execute(self, event):
print("chat: record button clicked: " + str(self.record_button.GetValue()))

# listen for noise
if not self.chat_voice_to_text.listen_for_noise():
print("chat: failed to listen for noise")
self.set_status_text("Failed to listen for noise")
return

# record audio
rec_filename = self.chat_voice_to_text.record_audio()
if rec_filename is None:
Expand Down

0 comments on commit 4a66649

Please sign in to comment.