chat: listening wip

rmackay9 · Dec 20, 2023 · 4a66649 · 4a66649
1 parent bee9eea
commit 4a66649
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 9 deletions.
diff --git a/MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py b/MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py
@@ -1,9 +1,13 @@
 '''
 AI Chat Module voice-to-text class
 Randy Mackay, December 2023
+
+Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
 '''
 
 import time
+import math
+import struct
 
 try:
     import pyaudio  # install using, "sudo apt-get install python3-pyaudio"
@@ -19,6 +23,9 @@ def __init__(self):
         self.client = None
         self.assistant = None
 
+        # initialise audio recording
+        self.p = pyaudio.PyAudio()
+
     # set the OpenAI API key
     def set_api_key(self, api_key_str):
         self.client = OpenAI(api_key = api_key_str)
@@ -37,15 +44,44 @@ def check_connection(self):
         # return True if connected
         return self.client is not None
 
+    # listen for noise
+    # returns true if noise is detected, false if not
+    def listen_for_noise(self):
+        # check pyaudio is initialised
+        if self.p is None:
+            print("chat: pyaudio not initialised")
+            return False
+
+        # Open stream
+        try:
+            stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
+        except:
+            print("chat: failed to connect to microphone")
+            return False
+
+        # listen for noise
+        noise_detected = False
+        while not noise_detected:
+            data = stream.read(1024)
+            if self.volume_over_threshold(data):
+                noise_detected = True
+
+        # Stop and close the stream
+        stream.stop_stream()
+        stream.close()
+        return True
+
     # record audio from microphone
     # returns filename of recording or None if failed
     def record_audio(self):
-        # Initialize PyAudio
-        p = pyaudio.PyAudio()
+        # check pyaudio is initialised
+        if self.p is None:
+            print("chat: pyaudio not initialised")
+            return False
 
         # Open stream
         try:
-            stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
+            stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
         except:
             print("chat: failed to connect to microphone")
             return None
@@ -64,7 +100,6 @@ def record_audio(self):
         # Stop and close the stream
         stream.stop_stream()
         stream.close()
-        p.terminate()
 
         # Save audio file
         wf = wave.open("recording.wav", "wb")
@@ -89,3 +124,26 @@ def convert_audio_to_text(self, audio_filename):
             file=audio_file, 
             response_format="text")
         return transcript
+
+    # return true if the volume of a frame of audio is above a given threshold
+    @staticmethod
+    def volume_over_threshold(frame, threshold = 10000):
+        # calculate number of samples in the frame
+        sample_width = 2
+        num_samples = len(frame) / sample_width
+
+        # protect against divide by zero
+        if num_samples == 0:
+            return 0
+        format = "%dh" % (num_samples)
+        shorts = struct.unpack(format, frame)
+
+        # iterate over the frame and calculate the RMS volume
+        sum_squares = 0.0
+        for sample in shorts:
+            n = sample * (1.0/32768)
+            sum_squares += n*n
+        volume_rms = math.sqrt(sum_squares / num_samples)
+
+        # return true if volume is above threshold
+        return volume_rms > threshold
diff --git a/MAVProxy/modules/mavproxy_chat/chat_window.py b/MAVProxy/modules/mavproxy_chat/chat_window.py
@@ -48,8 +48,9 @@ def __init__(self, mpstate):
         self.horiz_sizer = wx.BoxSizer(wx.HORIZONTAL)
 
         # add a record button
-        self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
-        self.frame.Bind(wx.EVT_BUTTON, self.record_button_click, self.record_button)
+        #self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
+        self.record_button = wx.ToggleButton(self.frame, id=-1, label="Rec", size=(75, 25))
+        self.frame.Bind(wx.EVT_TOGGLEBUTTON, self.record_button_click, self.record_button)
         self.horiz_sizer.Add(self.record_button, proportion = 0, flag = wx.ALIGN_TOP | wx.ALL, border = 5)
 
         # add an input text box
@@ -109,12 +110,21 @@ def apikey_close_button_click(self, event):
 
     # record button clicked
     def record_button_click(self, event):
-        # run record_button_click_execute in a new thread
-        th = Thread(target=self.record_button_click_execute, args=(event,))
-        th.start()
+        if self.record_button.GetValue():
+            # run record_button_click_execute in a new thread
+            th = Thread(target=self.record_button_click_execute, args=(event,))
+            th.start()
 
     # record button clicked
     def record_button_click_execute(self, event):
+        print("chat: record button clicked: " + str(self.record_button.GetValue()))
+
+        # listen for noise
+        if not self.chat_voice_to_text.listen_for_noise():
+            print("chat: failed to listen for noise")
+            self.set_status_text("Failed to listen for noise")
+            return
+
         # record audio
         rec_filename = self.chat_voice_to_text.record_audio()
         if rec_filename is None: