From 36633b4c72276a49a8ad85be16676dc4ed90bcaf Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 14:09:08 +0800
Subject: [PATCH 01/11] update

---
 __init__.py         |  15 +++-
 nodes/Audio.py      |  92 ++++++++++++++++++++++++
 nodes/SenseVoice.py |  18 +++--
 nodes/Whisper.py    | 171 ++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt    |   1 +
 5 files changed, 289 insertions(+), 8 deletions(-)
 create mode 100644 nodes/Whisper.py

diff --git a/__init__.py b/__init__.py
index aedea7b4..9007c2a6 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1006,7 +1006,7 @@ def mix_status(request):
 # from .nodes.Vae import VAELoader,VAEDecode
 from .nodes.ScreenShareNode import ScreenShareNode,FloatingVideo
 
-from .nodes.Audio import AudioPlayNode,SpeechRecognition,SpeechSynthesis
+from .nodes.Audio import AudioPlayNode,SpeechRecognition,SpeechSynthesis,AnalyzeAudioNone
 from .nodes.Utils import CreateJsonNode,KeyInput,IncrementingListNode,ListSplit,CreateLoraNames,CreateSampler_names,CreateCkptNames,CreateSeedNode,TESTNODE_,TESTNODE_TOKEN,AppInfo,IntNumber,FloatSlider,TextInput,ColorInput,FontInput,TextToNumber,DynamicDelayProcessor,LimitNumber,SwitchByIndex,MultiplicationNode
 from .nodes.Mask import PreviewMask_,MaskListReplace,MaskListMerge,OutlineMask,FeatheredMask
 
@@ -1103,6 +1103,7 @@ def mix_status(request):
     "SpeechRecognition":SpeechRecognition,
     "SpeechSynthesis":SpeechSynthesis,
     "AudioPlay":AudioPlayNode,
+    "AnalyzeAudio":AnalyzeAudioNone,
 
     # Text
     "TextToNumber":TextToNumber,
@@ -1220,6 +1221,7 @@ def mix_status(request):
     "SpeechSynthesis":"SpeechSynthesis ♾️Mixlab",
     "SpeechRecognition":"SpeechRecognition ♾️Mixlab",
     "AudioPlay":"Preview Audio ♾️Mixlab",
+    "AnalyzeAudio":"Analyze Audio ♾️Mixlab",
 
     # Utils
     "DynamicDelayProcessor":"DynamicDelayByText ♾️Mixlab",
@@ -1433,11 +1435,20 @@ def mix_status(request):
     from .nodes.SenseVoice import SenseVoiceNode
     logging.info('SenseVoice.available')
     NODE_CLASS_MAPPINGS['SenseVoiceNode']=SenseVoiceNode
-    NODE_DISPLAY_NAME_MAPPINGS["SenseVoiceNode"]= "Sense Voice"
+    NODE_DISPLAY_NAME_MAPPINGS["SenseVoiceNode"]= "Sense Voice ♾️Mixlab"
 
 except Exception as e:
     logging.info('SenseVoice.available False' )  
 
+try:
+    from .nodes.Whisper import LoadWhisperModel,WhisperTranscribe
+    logging.info('Whisper.available')
+    NODE_CLASS_MAPPINGS['LoadWhisperModel_']=LoadWhisperModel
+    NODE_CLASS_MAPPINGS['WhisperTranscribe_']=WhisperTranscribe
+    NODE_DISPLAY_NAME_MAPPINGS["LoadWhisperModel_"]= "Load Whisper Model ♾️Mixlab"
+    NODE_DISPLAY_NAME_MAPPINGS["WhisperTranscribe_"]= "Whisper Transcribe ♾️Mixlab"
 
+except Exception as e:
+    logging.info('Whisper.available False' )  
 
 logging.info('\033[93m -------------- \033[0m')
diff --git a/nodes/Audio.py b/nodes/Audio.py
index 4db98c62..1e44f34d 100644
--- a/nodes/Audio.py
+++ b/nodes/Audio.py
@@ -3,6 +3,98 @@
 import folder_paths
 import torchaudio
 
+class AnyType(str):
+  """A special class that is always equal in not equal comparisons. Credit to pythongosssss"""
+
+  def __ne__(self, __value: object) -> bool:
+    return False
+
+any_type = AnyType("*")
+
+
+def analyze_audio_data(audio_data):
+    total_duration = 0
+    total_gap_duration = 0
+    emotion_counts = {}
+    audio_types = set()
+
+    for i, entry in enumerate(audio_data):
+        # Calculate the duration of each audio segment
+        start_time = entry['start_time']
+        end_time = entry['end_time']
+        duration = end_time - start_time
+        total_duration += duration
+
+        # Count the emotions
+        if "emotion" in entry:
+            emotion = entry['emotion']
+            if emotion in emotion_counts:
+                emotion_counts[emotion] += 1
+            else:
+                emotion_counts[emotion] = 1
+
+        # Collect the audio types
+        if "audio_type" in entry:
+            audio_types.add(entry['audio_type'])
+
+        # Calculate gap duration if not the last entry
+        if i < len(audio_data) - 1:
+            next_start_time = audio_data[i + 1]['start_time']
+            gap_duration = next_start_time - end_time
+            if gap_duration > 0:
+                total_gap_duration += gap_duration
+
+    # Get the most frequent emotion
+    if len(emotion_counts.keys())>0:
+        most_frequent_emotion = max(emotion_counts, key=emotion_counts.get)
+    else:
+        most_frequent_emotion=None
+
+    # Convert audio_types set to list for better readability
+    audio_types = list(audio_types)
+
+    # Print the results
+    print(f"Total Effective Duration: {total_duration:.2f} seconds")
+    print(f"Total Gap Duration: {total_gap_duration:.2f} seconds")
+    print(f"Emotion Changes: {emotion_counts}")
+    print(f"Most Frequent Emotion: {most_frequent_emotion}")
+    print(f"Audio Types: {audio_types}")
+
+
+    return {
+        "total_duration": total_duration,
+        "total_gap_duration": total_gap_duration,
+        "emotion_changes": emotion_counts,
+        "most_frequent_emotion": most_frequent_emotion,
+        "audio_types": audio_types
+    }
+
+# Example usage
+audio_data = [{'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:00,540 --> 00:00:10,550\n嘿，欢迎来到梦幻纷飞探索心灵迷雾的奇境之旅。在这里，你将踏上一段超现实的。\n', 'start_time': 0.54, 'end_time': 10.55, 'text': '嘿，欢迎来到梦幻纷飞探索心灵迷雾的奇境之旅。在这里，你将踏上一段超现实的。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:10,550 --> 00:00:20,559\n有奇幻之旅，想象一下啊，这个悬浮的棋盘和巨大的漂浮眼睛。哇这。\n', 'start_time': 10.55, 'end_time': 20.56, 'text': '有奇幻之旅，想象一下啊，这个悬浮的棋盘和巨大的漂浮眼睛。哇这。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:20,559 --> 00:00:25,960\n意象就会让你对其他世界为独，充满好奇。\n', 'start_time': 20.56, 'end_time': 25.96, 'text': '意象就会让你对其他世界为独，充满好奇。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:26,239 --> 00:00:36,250\n我们的艺术家梦颖就以其独特的杏人型眼睛和飘逸的黑发，穿着流动的礼服，完美。\n', 'start_time': 26.24, 'end_time': 36.25, 'text': '我们的艺术家梦颖就以其独特的杏人型眼睛和飘逸的黑发，穿着流动的礼服，完美。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:36,250 --> 00:00:41,820\n融入这个暗淡的背景啊，形成一个神秘的身影。\n', 'start_time': 36.25, 'end_time': 41.82, 'text': '融入这个暗淡的背景啊，形成一个神秘的身影。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:43,170 --> 00:00:53,179\n在这次展览中，我们将一起揭示人类心灵的秘密与欲望。那个梦影的作品通过细腻的绘画。\n', 'start_time': 43.17, 'end_time': 53.18, 'text': '在这次展览中，我们将一起揭示人类心灵的秘密与欲望。那个梦影的作品通过细腻的绘画。'}, {'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:53,179 --> 00:01:03,189\n技巧和超越常规的图像构思，带你探索内心深处的情感和欲望。这种感觉真的就是。\n', 'start_time': 53.18, 'end_time': 63.19, 'text': '技巧和超越常规的图像构思，带你探索内心深处的情感和欲望。这种感觉真的就是。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:03,189 --> 00:01:09,019\n是准备好被激发想象力和困惑了吗？哇哦。\n', 'start_time': 63.19, 'end_time': 69.02, 'text': '是准备好被激发想象力和困惑了吗？哇哦。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:09,299 --> 00:01:19,310\n通过与艺术家的互动，你将进入一个超越现实的境界，挑战你对现实和逻辑的理解。\n', 'start_time': 69.3, 'end_time': 79.31, 'text': '通过与艺术家的互动，你将进入一个超越现实的境界，挑战你对现实和逻辑的理解。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:20,939 --> 00:01:30,950\n所以快来吧，然后让我们一起投身这场奇境之旅吧，挖掘人类的秘密与欲望。\n', 'start_time': 80.94, 'end_time': 90.95, 'text': '所以快来吧，然后让我们一起投身这场奇境之旅吧，挖掘人类的秘密与欲望。'}, {'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:31,189 --> 00:01:41,200\n激发你的想象力与困惑，希望这段旅程能让你在艺术与灵魂的碰撞中找到珍贵的。\n', 'start_time': 91.19, 'end_time': 101.2, 'text': '激发你的想象力与困惑，希望这段旅程能让你在艺术与灵魂的碰撞中找到珍贵的。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:41,200 --> 00:01:47,170\n回忆并引发内心的转变与行动。那就是。\n', 'start_time': 101.2, 'end_time': 107.17, 'text': '回忆并引发内心的转变与行动。那就是。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:48,760 --> 00:01:51,790\n嗯，准备好了吗？走吧。\n', 'start_time': 108.76, 'end_time': 111.79, 'text': '嗯，准备好了吗？走吧。'}]
+
+print(analyze_audio_data(audio_data))
+
+# 分析音频数据
+class AnalyzeAudioNone:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { 
+                    "json":(any_type,),}, 
+                }
+    
+    RETURN_TYPES = (any_type,)
+    RETURN_NAMES = ("result",)
+
+    FUNCTION = "run"
+
+    CATEGORY = "♾️Mixlab/Audio"
+
+    def run(self,json):
+        result=analyze_audio_data(json)
+        return (result,)
+
+
+
 class SpeechRecognition:
     @classmethod
     def INPUT_TYPES(s):
diff --git a/nodes/SenseVoice.py b/nodes/SenseVoice.py
index 8c0d4de4..8c532dcf 100644
--- a/nodes/SenseVoice.py
+++ b/nodes/SenseVoice.py
@@ -168,8 +168,9 @@ def INPUT_TYPES(s):
 
     OUTPUT_NODE = True
     FUNCTION = "run" 
-    RETURN_TYPES = (any_type,)
-    RETURN_NAMES = ("result",)
+    
+    RETURN_TYPES = (any_type,"STRING","FLOAT",)
+    RETURN_NAMES = ("result","text","total_seconds",)
 
     def run(self,audio,device,language,num_threads,use_int8,use_itn ):
  
@@ -200,16 +201,21 @@ def run(self,audio,device,language,num_threads,use_int8,use_itn ):
 
         if 'waveform' in audio and 'sample_rate' in audio:
             waveform = audio['waveform']
+            sample_rate = audio['sample_rate']
             # print("Original shape:", waveform.shape)  # 打印原始形状
             if waveform.ndim == 3 and waveform.shape[0] == 1:  # 检查是否为三维且 batch_size 为 1
                 waveform = waveform.squeeze(0)  # 移除 batch_size 维度
-                waveform_numpy = waveform.numpy().transpose(1, 0)  # 转换为 (num_samples, num_channels)
             else:
                 raise ValueError("Unexpected waveform dimensions")
 
-            _sample_rate = audio['sample_rate']
+            print("waveform.shape:", waveform.shape) 
+            total_length_seconds = waveform.shape[1] / sample_rate
+
+            waveform_numpy = waveform.numpy().transpose(1, 0)  # 转换为 (num_samples, num_channels)
 
-        results=self.processor.process_audio(waveform_numpy, _sample_rate, language, use_itn)
+        results=self.processor.process_audio(waveform_numpy, sample_rate, language, use_itn)
 
+        srt_content="\n".join([s['srt_content'] for s in results])
 
-        return (results,)
+        return (results,srt_content,total_length_seconds,)
+ 
\ No newline at end of file
diff --git a/nodes/Whisper.py b/nodes/Whisper.py
new file mode 100644
index 00000000..b3d4f60e
--- /dev/null
+++ b/nodes/Whisper.py
@@ -0,0 +1,171 @@
+import os,re
+import sys,time
+from pathlib import Path
+import torchaudio
+import hashlib
+import torch
+import folder_paths
+import comfy.utils
+
+from faster_whisper import WhisperModel
+
+class AnyType(str):
+  """A special class that is always equal in not equal comparisons. Credit to pythongosssss"""
+
+  def __ne__(self, __value: object) -> bool:
+    return False
+
+any_type = AnyType("*")
+
+def get_model_dir(m):
+    try:
+        return folder_paths.get_folder_paths(m)[0]
+    except:
+        return os.path.join(folder_paths.models_dir, m)
+
+
+
+whisper_model_path=get_model_dir('whisper')
+
+model_sizes=[
+                d for d in os.listdir(whisper_model_path) if os.path.isdir(
+                    os.path.join(whisper_model_path, d)
+                    ) and os.path.isfile(os.path.join(os.path.join(whisper_model_path, d), "config.json"))
+                    ]
+
+
+class LoadWhisperModel:
+    def __init__(self):
+        self.model = None
+        self.device="cuda" if torch.cuda.is_available() else "cpu"
+        self.model_size=model_sizes[0]
+        self.compute_type='float16'
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "model_size": (model_sizes,),
+            "device": (["auto","cpu"],),
+            "compute_type": (["float16","int8_float16","int8"],),
+                             },
+                }
+    
+    RETURN_TYPES = ("WHISPER",)
+    RETURN_NAMES = ("whisper_model",)
+
+    FUNCTION = "run"
+
+    CATEGORY = "♾️Mixlab/Audio/Whisper"
+
+    INPUT_IS_LIST = False
+    OUTPUT_IS_LIST = (False,)
+
+    def run(self,model_size,device,compute_type):
+
+        if device=="auto" and self.device!='cuda':
+            self.device="cuda" if torch.cuda.is_available() else "cpu"
+            self.model=None
+
+        if device=='cpu' and self.device!='cpu':
+            self.device="cpu"
+            self.model=None
+        
+        if model_size!= self.model_size:
+            self.model_size=model_size
+            self.model=None
+
+        if compute_type!=self.compute_type:
+            self.compute_type=compute_type
+            self.model=None
+
+        if self.model==None:
+            self.model = WhisperModel(
+                os.path.join(whisper_model_path, self.model_size), 
+                device=self.device,
+                compute_type=self.compute_type
+                )
+
+        return (self.model,)
+    
+
+class WhisperTranscribe:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+                                "whisper_model": ("WHISPER",),
+                                "audio": ("AUDIO",),
+                             },
+                }
+    
+    RETURN_TYPES = (any_type,"STRING","FLOAT",)
+    RETURN_NAMES = ("result","text","total_seconds",)
+
+    FUNCTION = "run"
+
+    CATEGORY = "♾️Mixlab/Audio/Whisper"
+
+    INPUT_IS_LIST = False
+    # OUTPUT_IS_LIST = (False,False,False,)
+    
+    def run(self,whisper_model,audio):
+
+        if 'audio_path' in audio and (not 'waveform' in audio):
+            waveform, sample_rate = torchaudio.load(audio['audio_path'])
+            waveform=waveform.mean(0)
+            total_length_seconds = waveform.shape[0] / sample_rate
+            waveform=waveform.numpy()
+            
+        elif 'waveform' in audio and 'sample_rate' in audio:
+            print("Original shape:", audio["waveform"].shape, isinstance(audio["waveform"], torch.Tensor))  # 打印原始形状
+            waveform = audio["waveform"].squeeze(0)  # Remove the added batch dimension
+            sample_rate = audio["sample_rate"]
+
+            # if audio_sf != sampling_rate:
+            #     waveform = torchaudio.functional.resample(
+            #         waveform, orig_freq=audio_sf, new_freq=sampling_rate
+            #     )
+            
+            waveform=waveform.mean(0)
+
+            total_length_seconds = waveform.shape[0] / sample_rate
+
+            waveform=waveform.numpy() #whisper_model.transcribe 旧版不支持直接传tensor，先用numpy
+        
+        segments, info = whisper_model.transcribe(waveform, beam_size=5)
+
+        print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
+
+        # Function to format time for SRT
+        def format_time(seconds):
+            millis = int((seconds - int(seconds)) * 1000)
+            hours, remainder = divmod(int(seconds), 3600)
+            minutes, seconds = divmod(remainder, 60)
+            return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
+
+        # Prepare SRT content as a string
+        results = []
+        for i, segment in enumerate(segments):
+            start_time = format_time(segment.start)
+            end_time = format_time(segment.end)
+            srt_content = f"{i + 1}\n"
+            srt_content += f"{start_time} --> {end_time}\n"
+
+            text=segment.text.strip()
+
+            srt_content += f"{text}\n\n"
+
+            start_time=segment.start
+            end_time=segment.end
+            
+
+            results.append({ 
+                    "srt_content":srt_content,
+                    "start_time":start_time,
+                    "end_time":end_time,
+                    "text":text
+            })
+        
+        srt_content="\n".join([s['srt_content'] for s in results])
+
+        return (results,srt_content,total_length_seconds,)
+
diff --git a/requirements.txt b/requirements.txt
index c0326dc7..37cc568a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,3 +32,4 @@ natsort>=8.4.0
 
 git+https://github.com/shadowcz007/SenseVoice-python.git
 
+faster_whisper

From 289f83675b7460e84e451b07ac085638b44eb083 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 16:41:44 +0800
Subject: [PATCH 02/11] Update SenseVoice.py

---
 nodes/SenseVoice.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/nodes/SenseVoice.py b/nodes/SenseVoice.py
index 8c532dcf..04736b13 100644
--- a/nodes/SenseVoice.py
+++ b/nodes/SenseVoice.py
@@ -169,8 +169,8 @@ def INPUT_TYPES(s):
     OUTPUT_NODE = True
     FUNCTION = "run" 
     
-    RETURN_TYPES = (any_type,"STRING","FLOAT",)
-    RETURN_NAMES = ("result","text","total_seconds",)
+    RETURN_TYPES = (any_type,"STRING","STRING","FLOAT",)
+    RETURN_NAMES = ("result","srt","text","total_seconds",)
 
     def run(self,audio,device,language,num_threads,use_int8,use_itn ):
  
@@ -216,6 +216,7 @@ def run(self,audio,device,language,num_threads,use_int8,use_itn ):
         results=self.processor.process_audio(waveform_numpy, sample_rate, language, use_itn)
 
         srt_content="\n".join([s['srt_content'] for s in results])
+        text="\n".join([s['text'] for s in results])
 
-        return (results,srt_content,total_length_seconds,)
+        return (results,srt_content,text,total_length_seconds,)
  
\ No newline at end of file

From 0f77f28a954773c9d8c211465ee1a4d447ce17e6 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 16:41:46 +0800
Subject: [PATCH 03/11] Update Whisper.py

---
 nodes/Whisper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/nodes/Whisper.py b/nodes/Whisper.py
index b3d4f60e..cc669d72 100644
--- a/nodes/Whisper.py
+++ b/nodes/Whisper.py
@@ -97,8 +97,8 @@ def INPUT_TYPES(s):
                              },
                 }
     
-    RETURN_TYPES = (any_type,"STRING","FLOAT",)
-    RETURN_NAMES = ("result","text","total_seconds",)
+    RETURN_TYPES = (any_type,"STRING","STRING","FLOAT",)
+    RETURN_NAMES = ("result","srt","text","total_seconds",)
 
     FUNCTION = "run"
 
@@ -166,6 +166,7 @@ def format_time(seconds):
             })
         
         srt_content="\n".join([s['srt_content'] for s in results])
+        text="\n".join([s['text'] for s in results])
 
-        return (results,srt_content,total_length_seconds,)
+        return (results,srt_content,text,total_length_seconds,)
 

From b72e7dda088954c275bb395e93721f297cd11019 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 17:05:35 +0800
Subject: [PATCH 04/11] Update Audio.py

---
 nodes/Audio.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/nodes/Audio.py b/nodes/Audio.py
index 1e44f34d..353c4d6b 100644
--- a/nodes/Audio.py
+++ b/nodes/Audio.py
@@ -17,6 +17,7 @@ def analyze_audio_data(audio_data):
     total_gap_duration = 0
     emotion_counts = {}
     audio_types = set()
+    languages = set()
 
     for i, entry in enumerate(audio_data):
         # Calculate the duration of each audio segment
@@ -37,6 +38,9 @@ def analyze_audio_data(audio_data):
         if "audio_type" in entry:
             audio_types.add(entry['audio_type'])
 
+        if "language" in entry:
+            languages.add(entry['language'])
+
         # Calculate gap duration if not the last entry
         if i < len(audio_data) - 1:
             next_start_time = audio_data[i + 1]['start_time']
@@ -53,6 +57,8 @@ def analyze_audio_data(audio_data):
     # Convert audio_types set to list for better readability
     audio_types = list(audio_types)
 
+    languages=list(languages)
+
     # Print the results
     print(f"Total Effective Duration: {total_duration:.2f} seconds")
     print(f"Total Gap Duration: {total_gap_duration:.2f} seconds")
@@ -66,13 +72,10 @@ def analyze_audio_data(audio_data):
         "total_gap_duration": total_gap_duration,
         "emotion_changes": emotion_counts,
         "most_frequent_emotion": most_frequent_emotion,
-        "audio_types": audio_types
+        "audio_types": audio_types,
+        "languages":languages
     }
 
-# Example usage
-audio_data = [{'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:00,540 --> 00:00:10,550\n嘿，欢迎来到梦幻纷飞探索心灵迷雾的奇境之旅。在这里，你将踏上一段超现实的。\n', 'start_time': 0.54, 'end_time': 10.55, 'text': '嘿，欢迎来到梦幻纷飞探索心灵迷雾的奇境之旅。在这里，你将踏上一段超现实的。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:10,550 --> 00:00:20,559\n有奇幻之旅，想象一下啊，这个悬浮的棋盘和巨大的漂浮眼睛。哇这。\n', 'start_time': 10.55, 'end_time': 20.56, 'text': '有奇幻之旅，想象一下啊，这个悬浮的棋盘和巨大的漂浮眼睛。哇这。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:20,559 --> 00:00:25,960\n意象就会让你对其他世界为独，充满好奇。\n', 'start_time': 20.56, 'end_time': 25.96, 'text': '意象就会让你对其他世界为独，充满好奇。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:26,239 --> 00:00:36,250\n我们的艺术家梦颖就以其独特的杏人型眼睛和飘逸的黑发，穿着流动的礼服，完美。\n', 'start_time': 26.24, 'end_time': 36.25, 'text': '我们的艺术家梦颖就以其独特的杏人型眼睛和飘逸的黑发，穿着流动的礼服，完美。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:36,250 --> 00:00:41,820\n融入这个暗淡的背景啊，形成一个神秘的身影。\n', 'start_time': 36.25, 'end_time': 41.82, 'text': '融入这个暗淡的背景啊，形成一个神秘的身影。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:43,170 --> 00:00:53,179\n在这次展览中，我们将一起揭示人类心灵的秘密与欲望。那个梦影的作品通过细腻的绘画。\n', 'start_time': 43.17, 'end_time': 53.18, 'text': '在这次展览中，我们将一起揭示人类心灵的秘密与欲望。那个梦影的作品通过细腻的绘画。'}, {'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:00:53,179 --> 00:01:03,189\n技巧和超越常规的图像构思，带你探索内心深处的情感和欲望。这种感觉真的就是。\n', 'start_time': 53.18, 'end_time': 63.19, 'text': '技巧和超越常规的图像构思，带你探索内心深处的情感和欲望。这种感觉真的就是。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:03,189 --> 00:01:09,019\n是准备好被激发想象力和困惑了吗？哇哦。\n', 'start_time': 63.19, 'end_time': 69.02, 'text': '是准备好被激发想象力和困惑了吗？哇哦。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:09,299 --> 00:01:19,310\n通过与艺术家的互动，你将进入一个超越现实的境界，挑战你对现实和逻辑的理解。\n', 'start_time': 69.3, 'end_time': 79.31, 'text': '通过与艺术家的互动，你将进入一个超越现实的境界，挑战你对现实和逻辑的理解。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:20,939 --> 00:01:30,950\n所以快来吧，然后让我们一起投身这场奇境之旅吧，挖掘人类的秘密与欲望。\n', 'start_time': 80.94, 'end_time': 90.95, 'text': '所以快来吧，然后让我们一起投身这场奇境之旅吧，挖掘人类的秘密与欲望。'}, {'language': 'zh', 'emotion': 'SAD', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:31,189 --> 00:01:41,200\n激发你的想象力与困惑，希望这段旅程能让你在艺术与灵魂的碰撞中找到珍贵的。\n', 'start_time': 91.19, 'end_time': 101.2, 'text': '激发你的想象力与困惑，希望这段旅程能让你在艺术与灵魂的碰撞中找到珍贵的。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:41,200 --> 00:01:47,170\n回忆并引发内心的转变与行动。那就是。\n', 'start_time': 101.2, 'end_time': 107.17, 'text': '回忆并引发内心的转变与行动。那就是。'}, {'language': 'zh', 'emotion': 'NEUTRAL', 'audio_type': 'Speech', 'itn': 'withitn', 'srt_content': '1\n00:01:48,760 --> 00:01:51,790\n嗯，准备好了吗？走吧。\n', 'start_time': 108.76, 'end_time': 111.79, 'text': '嗯，准备好了吗？走吧。'}]
-
-print(analyze_audio_data(audio_data))
 
 # 分析音频数据
 class AnalyzeAudioNone:

From e32a3675fcbc5219269c178b659c7e53ace58962 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 17:05:43 +0800
Subject: [PATCH 05/11] Update Whisper.py

---
 nodes/Whisper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nodes/Whisper.py b/nodes/Whisper.py
index cc669d72..62c44e07 100644
--- a/nodes/Whisper.py
+++ b/nodes/Whisper.py
@@ -162,7 +162,8 @@ def format_time(seconds):
                     "srt_content":srt_content,
                     "start_time":start_time,
                     "end_time":end_time,
-                    "text":text
+                    "text":text,
+                    "language":[info.language]
             })
         
         srt_content="\n".join([s['srt_content'] for s in results])

From d3aaa191489327fa55c2eafa8b65a3f0a8062c65 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Wed, 2 Oct 2024 17:07:10 +0800
Subject: [PATCH 06/11] Update ChatGPT.py

---
 nodes/ChatGPT.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nodes/ChatGPT.py b/nodes/ChatGPT.py
index ed797320..7ad339d6 100644
--- a/nodes/ChatGPT.py
+++ b/nodes/ChatGPT.py
@@ -802,6 +802,9 @@ def INPUT_TYPES(s):
 
     def run(self, json_string,key=""):
 
+        if not isinstance(json_string, str):
+            json_string=json.dumps(json_string)
+    
         json_string=extract_json_strings(json_string)
         # print(json_string)
         good_json_string = repair_json(json_string)

From 2fbee59c3ec43d01a9432248bea71989d4905483 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Thu, 3 Oct 2024 11:12:10 +0800
Subject: [PATCH 07/11] fixbug

---
 nodes/SenseVoice.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/nodes/SenseVoice.py b/nodes/SenseVoice.py
index 04736b13..159a5201 100644
--- a/nodes/SenseVoice.py
+++ b/nodes/SenseVoice.py
@@ -48,6 +48,9 @@ def format_time(seconds):
 
     pattern = r"<\|(.+?)\|><\|(.+?)\|><\|(.+?)\|><\|(.+?)\|>(.+)"
     match = re.match(pattern,asr_result)
+    print('#format_to_srt',match,asr_result)
+    if match==None:
+        return None, None, None, None,None,start_time,end_time,text
     lang, emotion, audio_type, itn, text = match.groups()
      # 😊 表示高兴，😡 表示愤怒，😔 表示悲伤。对于音频事件，🎼 表示音乐，😀 表示笑声，👏 表示掌声
     
@@ -115,16 +118,17 @@ def process_audio(self, waveform, _sample_rate, language, use_itn):
                     part[1], 
                     asr_result)
 
-                results.append({
-                    "language":lang, 
-                    "emotion":emotion,
-                    "audio_type":audio_type, 
-                    "itn":itn,
-                    "srt_content":srt_content,
-                    "start_time":start_time,
-                    "end_time":end_time,
-                    "text":text
-                })
+                if lang!=None:
+                    results.append({
+                        "language":lang, 
+                        "emotion":emotion,
+                        "audio_type":audio_type, 
+                        "itn":itn,
+                        "srt_content":srt_content,
+                        "start_time":start_time,
+                        "end_time":end_time,
+                        "text":text
+                    })
 
             self.vad.vad.all_reset_detection()
             pbar.update(1)  # 更新进度条

From 6579ff20b4a1d04bc90415e8caf0ddf0a6cfa5c2 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Thu, 3 Oct 2024 11:12:22 +0800
Subject: [PATCH 08/11] json_string2

---
 nodes/ChatGPT.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/nodes/ChatGPT.py b/nodes/ChatGPT.py
index 7ad339d6..940879bf 100644
--- a/nodes/ChatGPT.py
+++ b/nodes/ChatGPT.py
@@ -786,9 +786,12 @@ class JsonRepair:
     def INPUT_TYPES(s):
         return {
             "required": {
-                 "json_string":("STRING", {"forceInput": True,}), 
-                 "key":("STRING", {"multiline": False,"dynamicPrompts": False,"default": ""}),
-            }
+                "json_string":("STRING", {"forceInput": True,}), 
+                "key":("STRING", {"multiline": False,"dynamicPrompts": False,"default": ""}),
+            },
+            "optional":{
+                "json_string2":("STRING", {"forceInput": True,})
+            },
         }
 
     INPUT_IS_LIST = False
@@ -800,7 +803,7 @@ def INPUT_TYPES(s):
 
     CATEGORY = "♾️Mixlab/GPT"
 
-    def run(self, json_string,key=""):
+    def run(self, json_string,key="",json_string2=None):
 
         if not isinstance(json_string, str):
             json_string=json.dumps(json_string)
@@ -812,6 +815,20 @@ def run(self, json_string,key=""):
         # 将 JSON 字符串解析为 Python 对象
         data = json.loads(good_json_string)
 
+        if json_string2!=None:
+            if not isinstance(json_string2, str):
+                json_string2=json.dumps(json_string2)
+    
+            json_string2=extract_json_strings(json_string2)
+            # print(json_string)
+            good_json_string2 = repair_json(json_string2)
+
+            # 将 JSON 字符串解析为 Python 对象
+            data2 = json.loads(good_json_string2)
+
+            data={**data, **data2}
+
+
         v=""
         if key!="" and (key in data):
             v=data[key]

From b766b8b65dfb8675088e261a8e5f5a5d53699d6f Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Thu, 3 Oct 2024 11:13:14 +0800
Subject: [PATCH 09/11] Update SenseVoice.py

---
 nodes/SenseVoice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nodes/SenseVoice.py b/nodes/SenseVoice.py
index 159a5201..61f9a866 100644
--- a/nodes/SenseVoice.py
+++ b/nodes/SenseVoice.py
@@ -50,7 +50,7 @@ def format_time(seconds):
     match = re.match(pattern,asr_result)
     print('#format_to_srt',match,asr_result)
     if match==None:
-        return None, None, None, None,None,start_time,end_time,text
+        return None, None, None, None,None,start_time,end_time,None
     lang, emotion, audio_type, itn, text = match.groups()
      # 😊 表示高兴，😡 表示愤怒，😔 表示悲伤。对于音频事件，🎼 表示音乐，😀 表示笑声，👏 表示掌声
     

From 36ef7d25eff09e63f959cea1ba0d9679f5ab42f0 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Sat, 5 Oct 2024 12:31:22 +0800
Subject: [PATCH 10/11] Update ui_mixlab.js

---
 web/javascript/ui_mixlab.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/javascript/ui_mixlab.js b/web/javascript/ui_mixlab.js
index aff861ca..45026d96 100644
--- a/web/javascript/ui_mixlab.js
+++ b/web/javascript/ui_mixlab.js
@@ -544,7 +544,7 @@ async function getCustomnodeMappings () {
     const data = (await get_nodes_map()).data
     window._nodes_maps = data
   }
-  console.log('#getCustomnodeMappings', window._nodes_maps)
+  // console.log('#getCustomnodeMappings', window._nodes_maps)
   for (let url in window._nodes_maps) {
     let n = window._nodes_maps[url]
     for (let node of n[0]) {

From edd7af986d86222f990bbf3e8b724c0d975c2735 Mon Sep 17 00:00:00 2001
From: shadowcz007 <chizhiwei007@163.com>
Date: Sat, 12 Oct 2024 10:44:39 +0800
Subject: [PATCH 11/11] update

---
 pyproject.toml                        | 2 +-
 web/javascript/checkVersion_mixlab.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 41924b13..d250c713 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "comfyui-mixlab-nodes"
 description = "3D, ScreenShareNode & FloatingVideoNode, SpeechRecognition & SpeechSynthesis, GPT, LoadImagesFromLocal, Layers, Other Nodes, ..."
-version = "0.43.0"
+version = "0.44.0"
 license = "MIT"
 dependencies = ["numpy", "pyOpenSSL", "watchdog", "opencv-python-headless", "matplotlib", "openai", "simple-lama-inpainting", "clip-interrogator==0.6.0", "transformers>=4.36.0", "lark-parser", "imageio-ffmpeg", "rembg[gpu]", "omegaconf==2.3.0", "Pillow>=9.5.0", "einops==0.7.0", "trimesh>=4.0.5", "huggingface-hub", "scikit-image"]
 
diff --git a/web/javascript/checkVersion_mixlab.js b/web/javascript/checkVersion_mixlab.js
index 111158e7..eaf9aa9c 100644
--- a/web/javascript/checkVersion_mixlab.js
+++ b/web/javascript/checkVersion_mixlab.js
@@ -3,7 +3,7 @@ import { app } from '../../../scripts/app.js'
 const repoOwner = 'shadowcz007' // 替换为仓库的所有者
 const repoName = 'comfyui-mixlab-nodes' // 替换为仓库的名称
 
-const version = 'v0.43.0'
+const version = 'v0.44.0'
 
 fetch(`https://api.github.com/repos/${repoOwner}/${repoName}/releases/latest`)
   .then(response => response.json())