diff --git a/openwillis/__init__.py b/openwillis/__init__.py
index dd36a91..41d7d3b 100644
--- a/openwillis/__init__.py
+++ b/openwillis/__init__.py
@@ -8,12 +8,13 @@
     emotional_expressivity,
     eye_blink_rate,
     vocal_acoustics,
-    speech_transcription,
+    speech_transcription_whisper,
     speech_characteristics,
-    speaker_separation,
-    speaker_separation_cloud,
-    speech_transcription_cloud,
+    speaker_separation_nolabels,
+    speaker_separation_labels,
+    speech_transcription_aws,
+    speech_transcription_vosk,
     to_audio
 )
 
-__all__ = ["facial_expressivity", "vocal_acoustics", "emotional_expressivity", "eye_blink_rate", "speech_transcription", "speech_characteristics", "speaker_separation", "speaker_separation_cloud", "speech_transcription_cloud", "to_audio"]
+__all__ = ["facial_expressivity", "vocal_acoustics", "emotional_expressivity", "eye_blink_rate", "speech_transcription_whisper", "speech_characteristics", "speaker_separation_nolabels", "speaker_separation_labels", "speech_transcription_aws", "speech_transcription_vosk", "to_audio"]
diff --git a/openwillis/measures/api.py b/openwillis/measures/api.py
index ecc0897..38dad3e 100644
--- a/openwillis/measures/api.py
+++ b/openwillis/measures/api.py
@@ -9,10 +9,11 @@
 )
 from openwillis.measures.audio import (
     vocal_acoustics,
-    speech_transcription,
-    speaker_separation,
-    speaker_separation_cloud,
-    speech_transcription_cloud,
+    speech_transcription_whisper,
+    speaker_separation_nolabels,
+    speaker_separation_labels,
+    speech_transcription_aws,
+    speech_transcription_vosk
 )
 from openwillis.measures.text import (
     speech_characteristics
diff --git a/openwillis/measures/audio/__init__.py b/openwillis/measures/audio/__init__.py
index d53a3af..355a3bd 100644
--- a/openwillis/measures/audio/__init__.py
+++ b/openwillis/measures/audio/__init__.py
@@ -2,20 +2,24 @@
     vocal_acoustics,
 )
 
-from openwillis.measures.audio.speech_transcribe import (
-    speech_transcription,
+from openwillis.measures.audio.speech_transcribe_whisper import (
+    speech_transcription_whisper,
 )
 
-from openwillis.measures.audio.speech_separation import (
-    speaker_separation,
+from openwillis.measures.audio.speech_separation_nlabels import (
+    speaker_separation_nolabels,
 )
 
-from openwillis.measures.audio.speech_separation_cloud import (
-    speaker_separation_cloud,
+from openwillis.measures.audio.speech_separation_labels import (
+    speaker_separation_labels,
 )
 
 from openwillis.measures.audio.speech_transcribe_cloud import (
-    speech_transcription_cloud,
+    speech_transcription_aws,
 )
 
-__all__ = ["vocal_acoustics", "speech_transcription", "speaker_separation", "speaker_separation_cloud", "speech_transcription_cloud"]
+from openwillis.measures.audio.speech_transcribe_vosk import (
+    speech_transcription_vosk,
+)
+
+__all__ = ["vocal_acoustics", "speech_transcription_whisper", "speaker_separation", "speaker_separation_cloud", "speech_transcription_aws", "speech_transcription_vosk"]
diff --git a/openwillis/measures/audio/speech_separation_cloud.py b/openwillis/measures/audio/speech_separation_labels.py
similarity index 69%
rename from openwillis/measures/audio/speech_separation_cloud.py
rename to openwillis/measures/audio/speech_separation_labels.py
index f314c4d..d251e59 100644
--- a/openwillis/measures/audio/speech_separation_cloud.py
+++ b/openwillis/measures/audio/speech_separation_labels.py
@@ -38,7 +38,23 @@ def get_config():
     measures = json.load(file)
     return measures
 
-def speaker_separation_cloud(filepath, json_response):
+def is_amazon_transcribe(json_conf):
+    """
+    ------------------------------------------------------------------------------------------------------
+    This function checks if the json response object is from Amazon Transcribe.
+    Parameters:
+    ...........
+    json_conf: dict
+        JSON response object.
+    Returns:
+    ...........
+    bool: True if the json response object
+     is from Amazon Transcribe, False otherwise.
+    ------------------------------------------------------------------------------------------------------
+    """
+    return "jobName" in json_conf and "results" in json_conf
+
+def speaker_separation_labels(filepath, transcript_json):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -48,7 +64,7 @@ def speaker_separation_cloud(filepath, json_response):
     ...........
     filepath : str
         Path to the input audio file.
-    json_response : json
+    transcript_json : json
         Speech transcription json response.
 
     Returns:
@@ -66,8 +82,12 @@ def speaker_separation_cloud(filepath, json_response):
             return signal_label
 
         audio_signal = AudioSegment.from_file(file = filepath, format = "wav")
-        speaker_df, speaker_count = sutil.transcribe_response_to_dataframe(json_response)
+        if not is_amazon_transcribe(transcript_json):
 
+            speaker_df, speaker_count = sutil.whisperx_to_dataframe(transcript_json)
+        else:
+            speaker_df, speaker_count = sutil.transcribe_response_to_dataframe(transcript_json)
+            
         if len(speaker_df)>0 and speaker_count>1:
             signal_label = sutil.generate_audio_signal(speaker_df , audio_signal, '', measures)
 
diff --git a/openwillis/measures/audio/speech_separation.py b/openwillis/measures/audio/speech_separation_nlabels.py
similarity index 83%
rename from openwillis/measures/audio/speech_separation.py
rename to openwillis/measures/audio/speech_separation_nlabels.py
index 58d6777..62409eb 100644
--- a/openwillis/measures/audio/speech_separation.py
+++ b/openwillis/measures/audio/speech_separation_nlabels.py
@@ -3,13 +3,11 @@
 
 # import the required packages
 from pyannote.audio import Pipeline
-from openwillis.measures.audio.util import util as ut
 from openwillis.measures.audio.util import separation_util as sutil
 from pydub import AudioSegment
 
 import os
 import json
-import shutil
 import pandas as pd
 import logging
 
@@ -89,11 +87,10 @@ def read_kwargs(kwargs):
     ------------------------------------------------------------------------------------------------------
     """
     input_param = {}
-    input_param['model'] = kwargs.get('model', 'pyannote')
-
     input_param['hf_token'] = kwargs.get('hf_token', '')
-    input_param['json_response'] = kwargs.get('json_response', json.loads("{}"))
-    input_param['c_scale'] = kwargs.get('c_scale', '')
+    
+    input_param['transcript_json'] = kwargs.get('transcript_json', json.dumps({}))
+    input_param['context'] = kwargs.get('context', '')
     return input_param
 
 def get_pyannote(input_param, file_name, filepath):
@@ -122,12 +119,12 @@ def get_pyannote(input_param, file_name, filepath):
     """
     
     diart_df = run_pyannote(filepath, input_param['hf_token'])
-    transcribe_df = pd.DataFrame(input_param['json_response'])
+    transcribe_df = pd.DataFrame(input_param['transcript_json'])
 
     speaker_df, speaker_count = sutil.get_speaker_identification(diart_df, transcribe_df)
     return speaker_df, speaker_count
 
-def speaker_separation(filepath, **kwargs):
+def speaker_separation_nolabels(filepath, **kwargs):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -137,14 +134,12 @@ def speaker_separation(filepath, **kwargs):
     ...........
     filepath : str
         Path to the input audio file.
+    transcript_json : json
+        Speech transcription json response.
     hf_token : str
         Access token for HuggingFace to access pre-trained models.
-    json_response : json
-        Speech transcription json response.
-    model : str, optional
-        Model to use for speech diarization, default is 'pyannote'.
-    c_scale : str, optional
-        Clinical scale to use for slicing the separated audio files, if any.
+    context : str, optional
+        scale to use for slicing the separated audio files, if any.
 
     Returns:
     ...........
@@ -160,18 +155,14 @@ def speaker_separation(filepath, **kwargs):
     measures = get_config()
 
     try:
-        if not os.path.exists(filepath) or 'json_response' not in kwargs:
+        if not os.path.exists(filepath) or 'transcript_json' not in kwargs:
             return signal_label
 
-        if input_param['model'] == 'whisperx': 
-            input_param['c_scale'] = ''
-            speaker_df, speaker_count = sutil.whisperx_to_dataframe(input_param['json_response'])
-        else:
-            speaker_df, speaker_count = get_pyannote(input_param, file_name, filepath)
-
+        speaker_df, speaker_count = get_pyannote(input_param, file_name, filepath)
         audio_signal = AudioSegment.from_file(file = filepath, format = "wav")
+
         if len(speaker_df)>0 and speaker_count>1:
-            signal_label = sutil.generate_audio_signal(speaker_df , audio_signal, input_param['c_scale'], measures)
+            signal_label = sutil.generate_audio_signal(speaker_df , audio_signal, input_param['context'], measures)
 
     except Exception as e:
         logger.error(f'Error in diard processing: {e} & File: {filepath}')
diff --git a/openwillis/measures/audio/speech_transcribe_cloud.py b/openwillis/measures/audio/speech_transcribe_cloud.py
index 513e7a5..6156262 100644
--- a/openwillis/measures/audio/speech_transcribe_cloud.py
+++ b/openwillis/measures/audio/speech_transcribe_cloud.py
@@ -1,5 +1,5 @@
 # author:    Vijay Yadav
-# website:   http://www.bklynhlth.com
+# website:   http://www.brooklyn.health
 
 # import the required packages
 import os
@@ -53,20 +53,19 @@ def read_kwargs(kwargs):
     ------------------------------------------------------------------------------------------------------
     """
     input_param = {}
-    input_param['model'] = kwargs.get('model', 'pyannote')
     input_param['language'] = kwargs.get('language', 'en-US')
     input_param['region'] = kwargs.get('region', 'us-east-1')
 
     input_param['job_name'] = kwargs.get('job_name', 'transcribe_job_01')
-    input_param['ShowSpeakerLabels'] = kwargs.get('ShowSpeakerLabels', True)
-    input_param['MaxSpeakerLabels'] = kwargs.get('MaxSpeakerLabels', 2)
+    input_param['speaker_labels'] = kwargs.get('speaker_labels', False)
+    input_param['max_speakers'] = kwargs.get('max_speakers', 2)
 
-    input_param['c_scale'] = kwargs.get('c_scale', '')
+    input_param['context'] = kwargs.get('context', '')
     input_param['access_key'] = kwargs.get('access_key', '')
     input_param['secret_key'] = kwargs.get('secret_key', '')
     return input_param
 
-def speech_transcription_cloud(filepath, **kwargs):
+def speech_transcription_aws(s3_uri, **kwargs):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -74,29 +73,26 @@ def speech_transcription_cloud(filepath, **kwargs):
 
     Parameters:
     ...........
-    filepath : str
+    s3_uri : str
         The S3 uri for the recording to be transcribed.
     kwargs: Object
-        model : str, optional
-            The transcription model to use ('aws'). Default is 'aws'.
         language : str, optional
             The language of the audio file (e.g. 'en-US', 'en-IN'). Default is 'en-US'.
         region : str, optional
             The AWS region to use (e.g. 'us-east-1'). Only applicable if model is 'aws'. Default is 'us-east-1'.
         job_name : str, optional
             The name of the transcription job. Only applicable if model is 'aws'. Default is 'transcribe_job_01'.
-        ShowSpeakerLabels : boolean, optional
-            Show speaker labels
-        MaxSpeakerLabels : int, optional
-            Max number of speakers
-        c_scale : str, optional
-            Clinical scale to use for slicing the separated audio files, if any.
         access_key : str, optional
             AWS access key
         secret_key : str, optional
             AWS secret key
-
-
+        speaker_labels : boolean, optional
+            Show speaker labels
+        max_speakers : int, optional
+            Max number of speakers
+        context : str, optional
+            scale to use for slicing the separated audio files, if any.
+            
     Returns:
     ...........
     json_response : JSON Object
@@ -108,10 +104,10 @@ def speech_transcription_cloud(filepath, **kwargs):
     """
     input_param = read_kwargs(kwargs)
     measures = get_config()
-    json_response, transcript = tutil.transcribe_audio(filepath, input_param)
-
-    if input_param['ShowSpeakerLabels'] == True and input_param['c_scale']:
+    json_response, transcript = tutil.transcribe_audio(s3_uri, input_param)
+    
+    if input_param['speaker_labels'] == True and input_param['context'].lower() in measures['scale'].split(','):
         content_dict = tutil.extract_content(json_response)
-        json_response = tutil.get_clinical_labels(input_param['c_scale'], measures, content_dict, json_response)
-
+        
+        json_response = tutil.get_clinical_labels(input_param['context'], measures, content_dict, json_response)
     return json_response, transcript
diff --git a/openwillis/measures/audio/speech_transcribe.py b/openwillis/measures/audio/speech_transcribe_vosk.py
similarity index 72%
rename from openwillis/measures/audio/speech_transcribe.py
rename to openwillis/measures/audio/speech_transcribe_vosk.py
index 6be8eb5..f18ae9f 100644
--- a/openwillis/measures/audio/speech_transcribe.py
+++ b/openwillis/measures/audio/speech_transcribe_vosk.py
@@ -1,69 +1,43 @@
-# author:    Vijay Yadav
-# website:   http://www.bklynhlth.com
+# website:   http://www.brooklyn.health
 
 # import the required packages
-
-import numpy as np
-import pandas as pd
 import os
 import wave
 import json
 import logging
+import json
 
+from vosk import Model, KaldiRecognizer
 from pydub import AudioSegment
 from openwillis.measures.audio.util import util as ut
-from openwillis.measures.audio.util import transcribe_util as tutil
 
 logging.basicConfig(level=logging.INFO)
 logger=logging.getLogger()
 
-def run_vosk(filepath, language='en-us', transcribe_interval = []):
+def get_config():
     """
     ------------------------------------------------------------------------------------------------------
 
-    Transcribe speech in an audio file using the Vosk model.
+    Load the configuration settings for the speech transcription.
 
     Parameters:
-    ............
-    filepath : str
-        The path to the audio file to be transcribed.
-    language : str, optional
-        The language of the audio file (e.g. 'en-us', 'es', 'fr'). Default is 'en-us'.
-    transcribe_interval : list, optional
-        A list of tuples representing the start and end times (in seconds) of segments of the audio file to be transcribed.
-        Default is an empty list.
+    ...........
+    None
 
     Returns:
-    ............
-    json_response : str
-        The JSON response from the Vosk transcription service.
-    transcript : str
-        The transcript of the audio file.
+    ...........
+    measures : dict
+        A dictionary containing the configuration settings.
 
     ------------------------------------------------------------------------------------------------------
     """
-    json_response = '{}'
-    transcript = mono_filepath = ''
-
-    try:
-        if os.path.exists(filepath):
-
-            measures = get_config()
-            mono_filepath = stereo_to_mono(filepath, transcribe_interval)
-            results = get_vosk(mono_filepath, language)
-
-            ut.remove_dir(os.path.dirname(mono_filepath)) #Clean temp directory
-            json_response, transcript = filter_speech(measures, results)
-
-        else:
-            logger.info(f'Audio file not available. File: {filepath}')
-
-    except Exception as e:
-        ut.remove_dir(os.path.dirname(mono_filepath))#Clean temp directory
-        logger.error(f'Error in speech Transcription: {e} & File: {filepath}')
+    #Loading json config
+    dir_name = os.path.dirname(os.path.abspath(__file__))
+    measure_path = os.path.abspath(os.path.join(dir_name, 'config/speech.json'))
 
-    finally:
-        return json_response, transcript
+    file = open(measure_path)
+    measures = json.load(file)
+    return measures
 
 def filter_audio(filepath, t_interval):
     """
@@ -96,35 +70,42 @@ def filter_audio(filepath, t_interval):
     sound = sound.set_channels(1)
     return sound
 
-def stereo_to_mono(filepath, t_interval):
+def filter_speech(measures, results):
     """
     ------------------------------------------------------------------------------------------------------
 
-    Convert a stereo audio file to a mono audio file.
+    Filter the speech transcription results to extract the transcript.
 
     Parameters:
-    ............
-    filepath : str
-        The path to the stereo audio file to be converted.
-    t_interval : list
-        A list of tuples representing the start and end times (in seconds) of segments of the audio file to be transcribed.
+    ...........
+    measures : dict
+        A dictionary containing the configuration settings for the speech transcription.
+    results : list of dict
+        The raw transcription results returned by the transcription service.
 
     Returns:
-    ............
-    mono_filepath : str
-        The path to the mono audio file.
+    ...........
+    result_key : list
+        A list containing the framewise transcription of the audio file.
+    transcript : str
+        The transcript of the audio file.
 
     ------------------------------------------------------------------------------------------------------
     """
-    sound = filter_audio(filepath, t_interval)
+    result_key = []
+    text_key = []
+    transcript_dict = {}
 
-    filename, _ = os.path.splitext(os.path.basename(filepath))
-    dir_name = os.path.join(os.path.dirname(filepath), 'temp_mono_' + filename)
+    for res in results:
+        dict_keys = res.keys()
 
-    ut.make_dir(dir_name)
-    mono_filepath = os.path.join(dir_name, filename + '.wav')
-    sound.export(mono_filepath, format="wav")
-    return mono_filepath
+        if 'result' in dict_keys and 'text' in dict_keys:
+            result_key.extend(res['result'])
+            text_key.append(res['text'])
+
+    transcript_dict['result'] = result_key
+    transcript_dict['text'] = ' '.join(text_key)
+    return result_key, ' '.join(text_key)
 
 def get_vosk(audio_path, lang):
     """
@@ -146,9 +127,6 @@ def get_vosk(audio_path, lang):
 
     ------------------------------------------------------------------------------------------------------
     """
-    #import in-case of model=vosk
-    from vosk import Model, KaldiRecognizer
-    
     model = Model(lang=lang)
     wf = wave.open(audio_path, "rb")
 
@@ -170,126 +148,96 @@ def get_vosk(audio_path, lang):
     results.append(partial_result)
     return results
 
-def filter_speech(measures, results):
+def stereo_to_mono(filepath, t_interval):
     """
     ------------------------------------------------------------------------------------------------------
 
-    Filter the speech transcription results to extract the transcript.
+    Convert a stereo audio file to a mono audio file.
 
     Parameters:
-    ...........
-    measures : dict
-        A dictionary containing the configuration settings for the speech transcription.
-    results : list of dict
-        The raw transcription results returned by the transcription service.
+    ............
+    filepath : str
+        The path to the stereo audio file to be converted.
+    t_interval : list
+        A list of tuples representing the start and end times (in seconds) of segments of the audio file to be transcribed.
 
     Returns:
-    ...........
-    result_key : list
-        A list containing the framewise transcription of the audio file.
-    transcript : str
-        The transcript of the audio file.
+    ............
+    mono_filepath : str
+        The path to the mono audio file.
 
     ------------------------------------------------------------------------------------------------------
     """
-    result_key = []
-    text_key = []
-    transcript_dict = {}
-
-    for res in results:
-        dict_keys = res.keys()
-
-        if 'result' in dict_keys and 'text' in dict_keys:
-            result_key.extend(res['result'])
-            text_key.append(res['text'])
+    sound = filter_audio(filepath, t_interval)
 
-    transcript_dict['result'] = result_key
-    transcript_dict['text'] = ' '.join(text_key)
-    return result_key, ' '.join(text_key)
+    filename, _ = os.path.splitext(os.path.basename(filepath))
+    dir_name = os.path.join(os.path.dirname(filepath), 'temp_mono_' + filename)
 
+    ut.make_dir(dir_name)
+    mono_filepath = os.path.join(dir_name, filename + '.wav')
+    sound.export(mono_filepath, format="wav")
+    return mono_filepath
 
-def get_config():
+def run_vosk(filepath, language, transcribe_interval = []):
     """
     ------------------------------------------------------------------------------------------------------
 
-    Load the configuration settings for the speech transcription.
+    Transcribe speech in an audio file using the Vosk model.
 
     Parameters:
-    ...........
-    None
+    ............
+    filepath : str
+        The path to the audio file to be transcribed.
+    language : str, optional
+        The language of the audio file (e.g. 'en-us', 'es', 'fr'). Default is 'en-us'.
+    transcribe_interval : list, optional
+        A list of tuples representing the start and end times (in seconds) of segments of the audio file to be transcribed.
+        Default is an empty list.
 
     Returns:
-    ...........
-    measures : dict
-        A dictionary containing the configuration settings.
+    ............
+    json_response : str
+        The JSON response from the Vosk transcription service.
+    transcript : str
+        The transcript of the audio file.
 
     ------------------------------------------------------------------------------------------------------
     """
-    #Loading json config
-    dir_name = os.path.dirname(os.path.abspath(__file__))
-    measure_path = os.path.abspath(os.path.join(dir_name, 'config/speech.json'))
+    json_response = json.dumps({})
+    transcript = mono_filepath = ''
 
-    file = open(measure_path)
-    measures = json.load(file)
-    return measures
+    try:
+        if os.path.exists(filepath):
 
-def run_whisperx(filepath, hf_token, del_model, num_speakers, infra_model, language):
-    """
-    ------------------------------------------------------------------------------------------------------
+            measures = get_config()
+            mono_filepath = stereo_to_mono(filepath, transcribe_interval)
+            results = get_vosk(mono_filepath, language)
 
-    Transcribe audio data using the WhisperX model.
+            ut.remove_dir(os.path.dirname(mono_filepath)) #Clean temp directory
+            json_response, transcript = filter_speech(measures, results)
 
-    Parameters:
-    ...........
-    filepath : str
-        The path to the audio file to be transcribed.
-    hf_token : str
-        The Hugging Face token for model authentication.
-    del_model: boolean
-        Boolean indicator to delete model if low on GPU resources 
-    num_speakers: int
-        Number of speaker
-    infra_model:list
-        whisper model artifacts (this is optional param: to optimize willisInfra) 
-    language: str
-        language code
+        else:
+            logger.info(f'Audio file not available. File: {filepath}')
 
-    Returns:
-    ...........
-    json_response : JSON Object
-        A transcription response object in JSON format
-    transcript : str
-        The transcript of the recording.
+    except Exception as e:
+        ut.remove_dir(os.path.dirname(mono_filepath))#Clean temp directory
+        logger.error(f'Error in speech Transcription: {e} & File: {filepath}')
 
-    ------------------------------------------------------------------------------------------------------
-    """
-    json_response = '{}'
-    transcript = ''
-    
-    if os.path.exists(filepath)== False or hf_token == '':
+    finally:
         return json_response, transcript
-    
-    from openwillis.measures.audio.util import whisperx_util as wutil #import in-case of model=whisperx
-    json_response, transcript = wutil.get_whisperx_diariazation(filepath, hf_token, del_model, num_speakers, infra_model, language)
-    
-    if str(json_response) != '{}':
-        json_response = tutil.replace_whisperx_speaker_labels(json_response, ['SPEAKER_00', 'SPEAKER_01'], 
-                                                              ['speaker0', 'speaker1'])
-    return json_response, transcript
+
     
 
-def speech_transcription(filepath, **kwargs):
+def speech_transcription_vosk(filepath, **kwargs):
     """
     ------------------------------------------------------------------------------------------------------
 
-    Speech transcription function that transcribes an audio file using vosk/whisperx.
+    Speech transcription function that transcribes an audio file using vosk.
 
     Parameters:
     ...........
     filepath : str
         The path to the audio file to be transcribed.
-    model : str, optional
-        The transcription model to use ('vosk'). Default is 'vosk'.
     language : str, optional
         The language of the audio file (e.g. 'en-us', 'es', 'fr'). Default is 'en-us'.
     transcribe_interval : list, optional
@@ -307,24 +255,8 @@ def speech_transcription(filepath, **kwargs):
     """
 
     measures = get_config()
-    model = kwargs.get('model', 'vosk')
-    
     language = kwargs.get('language', 'en-us')
-    scale = kwargs.get('c_scale', '')
-    num_speakers = kwargs.get('num_speakers', None)
-    
     transcribe_interval = kwargs.get('transcribe_interval', [])
-    hf_token = kwargs.get('hf_token', '')
-    del_model = kwargs.get('del_model', False)
-    infra_model = kwargs.get('infra_model', [True, None, None])
     
-    if model == 'whisperx':
-        json_response, transcript = run_whisperx(filepath, hf_token, del_model, num_speakers, infra_model, language)
-        
-        if scale.lower() in measures['scale'].split(','):
-            content_dict = tutil.get_whisperx_content(json_response)
-            json_response = tutil.get_whisperx_clinical_labels(scale, measures, content_dict, json_response)
-        
-    else:
-        json_response, transcript = run_vosk(filepath, language, transcribe_interval)
+    json_response, transcript = run_vosk(filepath, language, transcribe_interval)
     return json_response, transcript
diff --git a/openwillis/measures/audio/speech_transcribe_whisper.py b/openwillis/measures/audio/speech_transcribe_whisper.py
new file mode 100644
index 0000000..5e5371d
--- /dev/null
+++ b/openwillis/measures/audio/speech_transcribe_whisper.py
@@ -0,0 +1,148 @@
+# author:    Vijay Yadav
+# website:   http://www.bklynhlth.com
+
+# import the required packages
+
+import numpy as np
+import pandas as pd
+import os
+import json
+import logging
+
+from pydub import AudioSegment
+from openwillis.measures.audio.util import util as ut
+from openwillis.measures.audio.util import transcribe_util as tutil
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+
+def get_config():
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    Load the configuration settings for the speech transcription.
+
+    Parameters:
+    ...........
+    None
+
+    Returns:
+    ...........
+    measures : dict
+        A dictionary containing the configuration settings.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    #Loading json config
+    dir_name = os.path.dirname(os.path.abspath(__file__))
+    measure_path = os.path.abspath(os.path.join(dir_name, 'config/speech.json'))
+
+    file = open(measure_path)
+    measures = json.load(file)
+    return measures
+
+def read_kwargs(kwargs):
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    Reads keyword arguments and returns a dictionary containing input parameters.
+
+    Parameters:
+    ...........
+    kwargs : dict
+        Keyword arguments to be processed.
+
+    Returns:
+    ...........
+    input_param: dict
+        A dictionary containing input parameters with their corresponding values.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    input_param = {}
+    input_param['model'] = kwargs.get('model', 'tiny')
+    input_param['language'] = kwargs.get('language', 'en')
+    
+    input_param['context'] = kwargs.get('context', '')
+    input_param['max_speakers'] = kwargs.get('max_speakers', None)
+    input_param['min_speakers'] = kwargs.get('min_speakers', None)
+
+    input_param['hf_token'] = kwargs.get('hf_token', '')
+    input_param['del_model'] = kwargs.get('del_model', False) #Temp filter
+    input_param['infra_model'] = kwargs.get('infra_model', [True, None, None]) #Temp filter
+    
+    return input_param
+
+def run_whisperx(filepath, input_param):
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    Transcribe audio data using the WhisperX model.
+
+    Parameters:
+    ...........
+    filepath : str
+        The path to the audio file to be transcribed.
+    input_param : dict
+        A dictionary containing input parameters
+
+    Returns:
+    ...........
+    json_response : JSON Object
+        A transcription response object in JSON format
+    transcript : str
+        The transcript of the recording.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    json_response = json.dumps({})
+    transcript = ''
+    
+    if os.path.exists(filepath)== False or input_param['hf_token'] == '':
+        return json_response, transcript
+    
+    from openwillis.measures.audio.util import whisperx_util as wutil #import in-case of model=whisperx
+    json_response, transcript = wutil.get_whisperx_diariazation(filepath, input_param)
+    
+    if str(json_response) != '{}':
+        json_response = tutil.filter_labels_whisper(json_response)
+    return json_response, transcript
+    
+
+def speech_transcription_whisper(filepath, **kwargs):
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    Speech transcription function that transcribes an audio file using whisperx.
+
+    Parameters:
+    ...........
+    filepath : str
+        The path to the audio file to be transcribed.
+    model : str, optional
+        The transcription model to use ('vosk'). Default is 'vosk'.
+    language : str, optional
+        The language of the audio file (e.g. 'en-us', 'es', 'fr'). Default is 'en-us'.
+    transcribe_interval : list, optional
+        A list of tuples representing the start and end times (in seconds) of segments of the audio file to be transcribed.
+        Only applicable if model is 'vosk'. Default is an empty list.
+
+    Returns:
+    ...........
+    json_response : JSON Object
+        A transcription response object in JSON format
+    transcript : str
+        The transcript of the recording.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    measures = get_config()
+    input_param = read_kwargs(kwargs)
+    
+    json_response, transcript = run_whisperx(filepath, input_param)
+    if input_param['context'].lower() in measures['scale'].split(','):
+        
+        content_dict = tutil.get_whisperx_content(json_response)
+        json_response = tutil.get_whisperx_clinical_labels(input_param['context'], measures, content_dict, json_response)
+    return json_response, transcript
diff --git a/openwillis/measures/audio/util/separation_util.py b/openwillis/measures/audio/util/separation_util.py
index 0959a4c..fff47d4 100644
--- a/openwillis/measures/audio/util/separation_util.py
+++ b/openwillis/measures/audio/util/separation_util.py
@@ -322,7 +322,7 @@ def transcribe_response_to_dataframe(response):
     speakers = 0
     df = pd.DataFrame()
 
-    if 'segments' in response:
+    if 'results' in response:
         if 'speaker_labels' in response['results']:
 
             if 'speakers' in response['results']['speaker_labels']:
@@ -338,6 +338,7 @@ def transcribe_response_to_dataframe(response):
 
                 df = df[df["confidence"] > 0].reset_index(drop=True)
                 df = df[["start_time", "end_time", "confidence", "speaker_label", "content"]]
+                
     return df, speakers
 
 def extract_data(segment_info):
@@ -358,12 +359,18 @@ def extract_data(segment_info):
 
     ------------------------------------------------------------------------------------------------------
     """
-    phrase = segment_info["text"]
-    start = segment_info["start"]
-    end = segment_info["end"]
+    phrase = segment_info.get("text", "")
+    start = segment_info.get("start", np.nan)
     
-    score = segment_info["words"][0]["score"] if segment_info["words"] and len(segment_info["words"]) > 0 else 0
-    speaker = segment_info["speaker"] if "speaker" in segment_info else "no_speaker"
+    end = segment_info.get("end", np.nan)
+    words = segment_info.get("words", None)
+
+    if words is not None and len(words) > 0:
+        score = words[0].get("score", 0)
+    else:
+        score = 0
+
+    speaker = segment_info.get("speaker", "no_speaker")
     return pd.Series([start, end, phrase, score, speaker], index=["start", "end", "phrase", "score", "speaker"])
 
 def whisperx_to_dataframe(json_response):
@@ -386,23 +393,17 @@ def whisperx_to_dataframe(json_response):
 
     ------------------------------------------------------------------------------------------------------
     """
-    # Initialize an empty DataFrame
-    df = pd.DataFrame(columns=["start", "end", "phrase", "score", "speaker"])
+    df = pd.DataFrame(columns=["start_time", "end_time", "content", "confidence", "speaker_label"])
     if 'segments' in json_response:
         
-        for segment_info in json_response["segments"]:
-            try:
-                
-                segment_df = extract_data(segment_info)
-                df = df.append(segment_df, ignore_index=True)
-                
-            except Exception as e:
-                logger.info("Some segments have no speaker labels.")
-    
-    df = df[df["score"] > 0].reset_index(drop=True)
-    df = df[df["speaker"] != "no_speaker"].reset_index(drop=True)
-    df = df.rename(columns={"start": "start_time", "end": "end_time", "score":"confidence", "speaker":"speaker_label", 
-                            "phrase":"content"})
-    
+        segment_infos = json_response["segments"]
+        df = pd.DataFrame(segment_infos).apply(extract_data, axis=1)
+
+        df = df[df["score"] > 0].reset_index(drop=True)
+        df = df.dropna(subset=["start", "end"]).reset_index(drop=True)
+        
+        df = df[df["speaker"] != "no_speaker"].reset_index(drop=True)
+        df = df.rename(columns={"start": "start_time", "end": "end_time", "score": "confidence", "speaker": "speaker_label", "phrase": "content"})
+
     speakers = df['speaker_label'].nunique()
     return df, speakers
\ No newline at end of file
diff --git a/openwillis/measures/audio/util/transcribe_util.py b/openwillis/measures/audio/util/transcribe_util.py
index 5e4049c..9b99542 100644
--- a/openwillis/measures/audio/util/transcribe_util.py
+++ b/openwillis/measures/audio/util/transcribe_util.py
@@ -1,5 +1,5 @@
 # author:    Vijay Yadav
-# website:   http://www.bklynhlth.com
+# website:   http://www.brooklyn.health
 
 # import the required packages
 
@@ -58,6 +58,89 @@ def replace_speaker_labels(data, check_labels, speaker_labels):
 
     return data
 
+def filter_labels_aws(data):
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    replaces speaker labels in AWS JSON.
+
+    Parameters:
+    ...........
+    data : JSON
+        The JSON containing speaker labels.
+
+    Returns:
+    ...........
+    data : JSON
+        The modified JSON with replaced speaker labels.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    if 'results' in data:
+        speaker_labels = data['results'].get('speaker_labels', {})
+        segments = speaker_labels.get('segments', [])
+
+        for segment in segments:
+            seg_speaker_label = segment.get('speaker_label', '')
+            
+            if 'spk_' in seg_speaker_label:
+                segment['speaker_label'] = seg_speaker_label.replace("spk_", "speaker")
+            
+            seg_items = segment.get('items', [])
+            for seg_item in seg_items:
+                
+                seg_item_speaker_label = seg_item.get('speaker_label', '')
+                if 'spk_' in seg_item_speaker_label:
+                    
+                    seg_item['speaker_label'] = seg_item_speaker_label.replace("spk_", "speaker")
+        items = data['results'].get('items', [])
+        
+        for item in items:
+            item_speaker_label = item.get('speaker_label', '')
+            
+            if 'spk_' in item_speaker_label:
+                item['speaker_label'] = item_speaker_label.replace("spk_", "speaker")
+
+    return data
+
+def filter_labels_whisper(data):
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    replaces speaker labels in Whisper JSON.
+
+    Parameters:
+    ...........
+    data : JSON
+        The JSON containing speaker labels.
+
+    Returns:
+    ...........
+    data : JSON
+        The modified JSON with replaced speaker labels.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    for segment in data.get('segments', []):
+        current_speaker = segment.get('speaker', '')
+
+        if 'SPEAKER_0' in current_speaker:
+            segment["speaker"] = current_speaker.replace("SPEAKER_0", "speaker")
+
+        for word in segment["words"]:
+            word_speaker = word.get('speaker', '')
+            
+            if 'SPEAKER_0' in word_speaker:
+                word["speaker"] = word_speaker.replace("SPEAKER_0", "speaker")
+
+    for word_segment in data.get('word_segments', []):
+        word_seg_speaker = word_segment.get('speaker', '')
+        
+        if 'SPEAKER_0' in word_seg_speaker: 
+            word_segment["speaker"] = word_seg_speaker.replace("SPEAKER_0", "speaker")
+
+    return data
+
 def extract_content(data):
     """
     ------------------------------------------------------------------------------------------------------
@@ -117,20 +200,19 @@ def get_clinical_labels(scale, measures, content_dict, json_response):
     ------------------------------------------------------------------------------------------------------
     """
     #Check if content is available for all the speaker
-    if content_dict and content_dict['speaker0'] and content_dict['speaker1']:
-        if scale.lower() not in measures['scale'].split(","):
-            return json_response
+    if len(content_dict) <2:
+        return json_response
         
-        score_string = scale.lower()+'_string'
-        spk1_score = sutil.match_transcript(measures[score_string], content_dict['speaker0'])
-        spk2_score = sutil.match_transcript(measures[score_string], content_dict['speaker1'])
-
-        if spk1_score > spk2_score:
-            json_response = replace_speaker_labels(json_response, ['speaker0', 'speaker1'], ['clinician', 'participant'])
+    score_string = scale.lower()+'_string'
+    spk1_score = sutil.match_transcript(measures[score_string], content_dict['speaker0'])
+    spk2_score = sutil.match_transcript(measures[score_string], content_dict['speaker1'])
 
-        else:
-            json_response = replace_speaker_labels(json_response, ['speaker0', 'speaker1'], ['participant', 'clinician'])
+    if spk1_score > spk2_score:
+        json_response = replace_speaker_labels(json_response, ['speaker0', 'speaker1'], ['clinician', 'participant'])
 
+    else:
+        json_response = replace_speaker_labels(json_response, ['speaker0', 'speaker1'], ['participant', 'clinician'])
+    
     return json_response
 
 def get_job_status(transcribe, input_param):
@@ -193,9 +275,9 @@ def filter_transcript_response(status, input_param):
     response = json.loads(read_data.read().decode('utf-8'))
 
     transcript = response['results']['transcripts'][0]['transcript']
-    if input_param['ShowSpeakerLabels'] == True:#replace speaker labels with standard names
+    if input_param['speaker_labels'] == True:#replace speaker labels with standard names
 
-        response = replace_speaker_labels(response, ['spk_0', 'spk_1'], ['speaker0', 'speaker1'])
+        response = filter_labels_aws(response)
     return response, transcript
 
 def transcribe_audio(s3uri, input_param):
@@ -222,24 +304,26 @@ def transcribe_audio(s3uri, input_param):
 
     ------------------------------------------------------------------------------------------------------
     """
-    response = json.loads("{}")
+    response = json.dumps({})
+    settings = {}
     transcript = ""
 
     try:
         if input_param['access_key'] and input_param['secret_key']:
-            transcribe = boto3.client('transcribe', region_name = input_param['region'], aws_access_key_id = input_param['access_key'], aws_secret_access_key = input_param['secret_key'])
+            transcribe = boto3.client('transcribe', region_name = input_param['region'], 
+                                      aws_access_key_id = input_param['access_key'], 
+                                      aws_secret_access_key = input_param['secret_key'])
         else:
             transcribe = boto3.client('transcribe', region_name = input_param['region'])
 
-        settings = {'ShowSpeakerLabels': input_param['ShowSpeakerLabels'], 'MaxSpeakerLabels': input_param['MaxSpeakerLabels']}
+        if input_param['speaker_labels'] == True and input_param['max_speakers']>=2:
+            settings = {'ShowSpeakerLabels': input_param['speaker_labels'], 'MaxSpeakerLabels': input_param['max_speakers']}
+
         transcribe.start_transcription_job(
             TranscriptionJobName=input_param['job_name'],
             Media={'MediaFileUri': s3uri},
-
-            #IdentifyMultipleLanguages=True,
             LanguageCode=input_param['language'],
-            Settings=settings
-        )
+            Settings=settings)
 
         status = get_job_status(transcribe, input_param)
         if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
diff --git a/openwillis/measures/audio/util/whisperx_util.py b/openwillis/measures/audio/util/whisperx_util.py
index bd71064..a7bae73 100644
--- a/openwillis/measures/audio/util/whisperx_util.py
+++ b/openwillis/measures/audio/util/whisperx_util.py
@@ -27,7 +27,7 @@ def delete_model(model):
     torch.cuda.empty_cache()
     del model
 
-def get_diarization(audio, align_json, HF_TOKEN, device, num_speakers, infra_model):
+def get_diarization(audio, align_json, device, input_param):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -38,12 +38,10 @@ def get_diarization(audio, align_json, HF_TOKEN, device, num_speakers, infra_mod
         audio signal object
     align_json: json
         aligned whisper transcribed output
-    HF_TOKEN : str
-        The Hugging Face token for model authentication.
     device : str
         device type
-    num_speakers: int
-        Number of speaker
+    input_param : dict
+        A dictionary containing input parameters
     
     Returns:
     ...........
@@ -53,17 +51,23 @@ def get_diarization(audio, align_json, HF_TOKEN, device, num_speakers, infra_mod
     ------------------------------------------------------------------------------------------------------
     """
     # Assign speaker labels
-    if infra_model[0]:
-        diarize_model = whisperx.DiarizationPipeline(use_auth_token=HF_TOKEN, device=device)
-    
+    if input_param['infra_model'][0]:
+        diarize_model = whisperx.DiarizationPipeline(use_auth_token=input_param['hf_token'], device=device)
     else:
-        diarize_model = infra_model[2]
+        diarize_model = input_param['infra_model'][2]
 
-    if num_speakers == None:
+    if input_param['min_speakers'] == None and input_param['max_speakers'] == None:
         diarize_segments = diarize_model(audio)
     
+    elif input_param['min_speakers'] == None and input_param['max_speakers'] != None:
+        diarize_segments = diarize_model(audio, max_speakers = input_param['max_speakers'])
+    
+    elif input_param['min_speakers'] != None and input_param['max_speakers'] == None:
+        diarize_segments = diarize_model(audio, min_speakers= input_param['min_speakers'])
+        
     else:
-        diarize_segments = diarize_model(audio, min_speakers=num_speakers, max_speakers=num_speakers)
+        diarize_segments = diarize_model(audio, min_speakers=input_param['min_speakers'], max_speakers=input_param['max_speakers'])
+        
     json_response = whisperx.assign_word_speakers(diarize_segments, align_json)
     return json_response
 
@@ -126,7 +130,7 @@ def transcribe_whisper(filepath, model, device, compute_type, batch_size, infra_
     transcribe_json = model_whisp.transcribe(audio, batch_size=batch_size, language=language)
     return transcribe_json, audio
 
-def get_whisperx_diariazation(filepath, HF_TOKEN, del_model, num_speakers, infra_model, language):
+def get_whisperx_diariazation(filepath, input_param):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -136,16 +140,8 @@ def get_whisperx_diariazation(filepath, HF_TOKEN, del_model, num_speakers, infra
     ...........
     filepath : str
         The path to the audio file to be transcribed.
-    HF_TOKEN : str
-        The Hugging Face token for model authentication.
-    del_model: boolean
-        Boolean indicator to delete model if low on GPU resources 
-    num_speakers: int
-        Number of speaker
-    infra_model: list
-        whisper model artifacts (this is optional param: to optimize willisInfra) 
-    language: str
-        language code
+    input_param : dict
+        A dictionary containing input parameters
 
     Returns:
     ...........
@@ -158,11 +154,9 @@ def get_whisperx_diariazation(filepath, HF_TOKEN, del_model, num_speakers, infra
     """
     device = 'cpu'
     compute_type = "int16"
-    
-    model = 'large-v2'
     batch_size = 16
     
-    json_response = '{}'
+    json_response = json.dumps({})
     transcript = ''
     
     try:
@@ -170,16 +164,16 @@ def get_whisperx_diariazation(filepath, HF_TOKEN, del_model, num_speakers, infra
             device = 'cuda'
             compute_type = "float16"
     
-        transcribe_json, audio = transcribe_whisper(filepath, model, device, compute_type, batch_size, infra_model, language)
+        transcribe_json, audio = transcribe_whisper(filepath, input_param['model'], device, compute_type, batch_size, input_param['infra_model'], input_param['language'])
     
         # Align whisper output
-        model_a, metadata = whisperx.load_align_model(language_code=language, device=device)
+        model_a, metadata = whisperx.load_align_model(language_code=input_param['language'], device=device)
         align_json = whisperx.align(transcribe_json["segments"], model_a, metadata, audio, device, return_char_alignments=False)
     
-        if del_model:
+        if input_param['del_model']:
             delete_model(model_a)
             
-        json_response = get_diarization(audio, align_json, HF_TOKEN, device, num_speakers, infra_model)    
+        json_response = get_diarization(audio, align_json, device, input_param)    
         transcript = get_transcribe_summary(json_response)
     
     except Exception as e:
diff --git a/openwillis/measures/commons/common.py b/openwillis/measures/commons/common.py
index 9199a2e..d0c4951 100644
--- a/openwillis/measures/commons/common.py
+++ b/openwillis/measures/commons/common.py
@@ -26,7 +26,7 @@ def make_dir(dir_name):
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
 
-def to_audio(filepath, speaker_label, out_dir):
+def to_audio(filepath, speaker_dict, output_dir):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -36,22 +36,22 @@ def to_audio(filepath, speaker_label, out_dir):
     ----------
         filepath : str
             The path to the input audio file.
-        speaker_label : dict
+        speaker_dict : dict
             A dictionary containing speaker labels as keys and corresponding segments (NumPy arrays) as values.
-        out_dir : str
+        output_dir : str
             The directory where the output audio files will be saved.
 
     ------------------------------------------------------------------------------------------------------
     """
-    make_dir(out_dir)
-    for key, value in speaker_label.items():
+    make_dir(output_dir)
+    for key, value in speaker_dict.items():
         file_name, _ = os.path.splitext(os.path.basename(filepath))
 
         audio_signal = AudioSegment.from_file(file = filepath, format = "wav")
         spk_signal = AudioSegment(value.tobytes(), frame_rate=audio_signal.frame_rate,
                                   sample_width=audio_signal.sample_width, channels=audio_signal.channels)
 
-        output_file = os.path.join(out_dir, file_name + '_' + key + '.wav')
+        output_file = os.path.join(output_dir, file_name + '_' + key + '.wav')
         spk_signal.export(output_file, format="wav")
 
 def get_config(filepath, json_file):
diff --git a/openwillis/measures/text/config/text.json b/openwillis/measures/text/config/text.json
index fe08030..897ace2 100644
--- a/openwillis/measures/text/config/text.json
+++ b/openwillis/measures/text/config/text.json
@@ -21,8 +21,8 @@
   "word_pause": "pre_word_pause",
   "phrase_pause": "pre_phrase_pause",
   "turn_pause": "pre_turn_pause",
-  "word_pause_mean": "word_pause_length_mean",
-  "word_pause_var": "word_pause_variability",
+  "word_pause_mean": "mean_pre_word_pause",
+  "word_pause_var": "mean_pause_variability",
   "phrase_pause_mean": "phrase_pause_length_mean",
   "phrase_pause_var": "phrase_pause_variability",
   "num_syllables": "num_syllables",
@@ -33,6 +33,8 @@
   "speech_words": "speech_length_words",
   "turn_minutes": "turn_length_minutes",
   "turn_words": "turn_length_words",
+  "file_length": "file_length",
+  "speaker_percentage": "speaker_percentage",
   "word_rate": "words_per_min",
   "syllable_rate": "syllables_per_min",
   "pause_rate": "pauses_per_min",
diff --git a/openwillis/measures/text/speech_attribute.py b/openwillis/measures/text/speech_attribute.py
index e5bd708..b030e41 100644
--- a/openwillis/measures/text/speech_attribute.py
+++ b/openwillis/measures/text/speech_attribute.py
@@ -78,23 +78,25 @@ def is_whisper_transcribe(json_conf):
     ------------------------------------------------------------------------------------------------------
     """
     if "segments" in json_conf:
-        if "words" in json_conf["segments"][0]:
-            return True
-    return False
+        if len(json_conf["segments"])>0:
 
+            if "words" in json_conf["segments"][0]:
+                return True
+    return False
 
-def filter_transcribe(json_conf, measures, speaker_label=None):
+def filter_transcribe(json_conf, measures, min_turn_length, speaker_label=None):
     """
     ------------------------------------------------------------------------------------------------------
-    This function extracts the text and filters the JSON data
-     for Amazon Transcribe json response objects.
-     Also, it filters the JSON data based on the speaker label if provided.
+    This function extracts the text and filters the JSON data for Amazon Transcribe json response objects.
+    Also, it filters the JSON data based on the speaker label if provided.
     Parameters:
     ...........
     json_conf: dict
         aws transcribe json response.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
+    min_turn_length: int
+        minimum words required in each turn
     speaker_label: str
         Speaker label
     Returns:
@@ -104,58 +106,34 @@ def filter_transcribe(json_conf, measures, speaker_label=None):
         only the relevant data for processing.
     text_list: list
         List of transcribed text.
-         split into words, phrases, turns, and full text.
+         split into words, turns, and full text.
     text_indices: list
         List of indices for text_list.
-         for phrases and turns.
-    Raises:
-    ...........
-    ValueError: If the speaker label is not found in the json response object.
     ------------------------------------------------------------------------------------------------------
     """
     item_data = json_conf["results"]["items"]
-
-    # make a dictionary to map old indices to new indices
-    item_data = cutil.create_index_column(item_data, measures)
+    
+    for i, item in enumerate(item_data): # create_index_column
+        item[measures["old_index"]] = i
 
     # extract text
-    text = " ".join(
-        [
-            item["alternatives"][0]["content"]
-            for item in item_data
-            if "alternatives" in item
-        ]
-    )
-
-    # phrase-split
-    phrases, phrases_idxs = cutil.phrase_split(text)
-
-    # turn-split
-    turns = []
-    turns_idxs = []
+    text = " ".join([item["alternatives"][0]["content"] for item in item_data if "alternatives" in item])
 
     if speaker_label is not None:
+        turns_idxs, turns = cutil.filter_speaker_aws(item_data, min_turn_length, speaker_label)
+        text = " ".join(turns)
+        
+    else:
+        turns_idxs, turns = [], []
 
-        turns_idxs, turns, phrases_idxs, phrases = cutil.filter_speaker(
-            item_data, speaker_label, turns_idxs, turns, phrases_idxs, phrases
-        )
-
-    # entire transcript - by joining all the phrases
-    text = " ".join(phrases)
-
-    # filter json to only include items with start_time and end_time
-    filter_json = cutil.filter_json_transcribe(item_data, speaker_label, measures)
-
-    # extract words
+    filter_json = cutil.filter_json_transcribe_aws(item_data, speaker_label, measures)
     words = [word["alternatives"][0]["content"] for word in filter_json]
 
-    text_list = [words, phrases, turns, text]
-    text_indices = [phrases_idxs, turns_idxs]
-
-    return filter_json, text_list, text_indices
+    text_list = [words, turns, text]
+    return filter_json, text_list, turns_idxs
 
 
-def filter_whisper(json_conf, measures, speaker_label=None):
+def filter_whisper(json_conf, measures, min_turn_length, speaker_label=None):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -171,6 +149,8 @@ def filter_whisper(json_conf, measures, speaker_label=None):
         A dictionary containing the names of the columns in the output dataframes.
     speaker_label: str
         Speaker label
+    min_turn_length: int
+        minimum words required in each turn
 
     Returns:
     ...........
@@ -190,43 +170,25 @@ def filter_whisper(json_conf, measures, speaker_label=None):
     ------------------------------------------------------------------------------------------------------
     """
     item_data = json_conf["segments"]
+    text = " ".join(item.get("text", "") for item in item_data)
 
     if speaker_label is not None:
-        # filter out segments that do not have speaker labels
-        item_data = [
-            segment for segment in item_data if "speaker" in segment
-        ]
-
-    # make a dictionary to map old indices to new indices
+        item_data = [segment for segment in item_data if "speaker" in segment]
+        
     item_data = cutil.create_index_column(item_data, measures)
-
-    # phrase-split
-    phrases_idxs, phrases = cutil.filter_phrases(
-        item_data, speaker_label, measures
-    )
-
-    # turn-split
-    if speaker_label is not None:
-        turns_idxs, turns = cutil.filter_turns(
-            item_data, speaker_label, measures
-        )
+    if speaker_label is not None:    
+        turns_idxs, turns = cutil.filter_turns(item_data, speaker_label, measures, min_turn_length)
+        
+        text = " ".join(turns)
     else:
         turns_idxs, turns = [], []
-
-
+    
     # filter json to only include items with start_time and end_time
     filter_json = cutil.filter_json_transcribe(item_data, speaker_label, measures)
-
-    # extract words
-    words = [w["word"] for w in filter_json]
-
-    # entire transcript - by joining all the phrases
-    text = " ".join(phrases)
-
-    text_list = [words, phrases, turns, text]
-    text_indices = [phrases_idxs, turns_idxs]
-
-    return filter_json, text_list, text_indices
+    words = [value["word"] for value in filter_json]
+    
+    text_list = [words, turns, text]
+    return filter_json, text_list, turns_idxs
 
 
 def filter_vosk(json_conf, measures):
@@ -258,12 +220,125 @@ def filter_vosk(json_conf, measures):
     # make a dictionary to map old indices to new indices
     for i, item in enumerate(json_conf):
         item[measures["old_index"]] = i
+        
+    return words, text
 
+def common_summary_feature(df_summ, json_data, model, speaker_label):
+    """
+    ------------------------------------------------------------------------------------------------------
 
-    return words, text
+    Calculate file features based on JSON data.
 
+    Parameters:
+    ...........
+    json_conf: list
+        JSON response object.
+    summ_df: pandas dataframe
+        A dataframe containing summary information on the speech
+    model: str
+        model name
+    speaker_label: str
+        Speaker label
 
-def speech_characteristics(json_conf, language="en", speaker_label=None):
+    Returns:
+    ...........
+    summ_df: pandas dataframe
+        A dataframe containing summary information on the speech
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    try:
+        if model == 'vosk':
+            if len(json_data) > 0 and 'end' in json_data[-1]:
+
+                last_dict = json_data[-1]
+                df_summ['file_length'] = [last_dict['end']]
+
+        else:
+            if model == 'aws':
+                json_data = json_data["results"]
+                fl_length, spk_pct = cutil.calculate_file_feature(json_data, model, speaker_label)
+
+            else:
+                fl_length, spk_pct = cutil.calculate_file_feature(json_data, model, speaker_label)
+            
+            df_summ['file_length'] = [fl_length]
+            df_summ['speaker_percentage'] = [spk_pct]# if speaker_label is not None else df_summ['speaker_percentage']
+            
+    except Exception as e:
+        logger.error("Error in file length calculation")
+    return df_summ
+
+def process_transcript(df_list, json_conf, measures, min_turn_length, speaker_label, source, language):
+    """
+    ------------------------------------------------------------------------------------------------------
+    
+    Process transcript
+    
+    Parameters:
+    ...........
+    df_list: list, :
+        contains pandas dataframe
+    json_conf: dict
+        Transcribed json file
+    measures: dict
+        A dictionary containing the names of the columns in the output dataframes.
+    min_turn_length: int
+        minimum words required in each turn
+    speaker_label: str
+        Speaker label
+    source: str
+        model name
+    language: str
+        Language type
+    
+    Returns:
+    ...........
+    df_list: list
+        contains pandas dataframe
+    
+    ------------------------------------------------------------------------------------------------------
+    """
+    common_summary_feature(df_list[2], json_conf, source, speaker_label)
+
+    if source == 'whisper':
+        info = filter_whisper(json_conf, measures, min_turn_length, speaker_label)
+        
+    elif source == 'aws':
+        info = filter_transcribe(json_conf, measures, min_turn_length, speaker_label)
+        
+    else:
+        words, text = filter_vosk(json_conf, measures)
+        info = (json_conf, [words, [], text], [])
+
+    if len(info[0]) > 0 and len(info[1][-1]) > 0:
+        df_list = cutil.process_language_feature(df_list, info, language, get_time_columns(source), measures)
+    return df_list
+
+def get_time_columns(source):
+    """
+    ------------------------------------------------------------------------------------------------------
+    
+    get time columns
+    
+    Parameters:
+    ...........
+    source: str
+        model name
+    
+    Returns:
+    ...........
+    object: list
+        time index name
+        
+    ------------------------------------------------------------------------------------------------------
+    """
+    if source == 'aws':
+        return ["start_time", "end_time"]
+    else:
+        return ["start", "end"]
+
+def speech_characteristics(json_conf, language="en", speaker_label=None, min_turn_length=1):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -277,14 +352,14 @@ def speech_characteristics(json_conf, language="en", speaker_label=None):
         Language type
     speaker_label: str
         Speaker label
+    min_turn_length: int
+        minimum words required in each turn
 
     Returns:
     ...........
     df_list: list, contains:
         word_df: pandas dataframe
             A dataframe containing word summary information
-        phrase_df: pandas dataframe
-            A dataframe containing phrase summary information
         turn_df: pandas dataframe
             A dataframe containing turn summary information
         summ_df: pandas dataframe
@@ -292,56 +367,31 @@ def speech_characteristics(json_conf, language="en", speaker_label=None):
 
     ------------------------------------------------------------------------------------------------------
     """
-
-    measures = get_config(os.path.abspath(__file__), "text.json")
-    df_list = cutil.create_empty_dataframes(measures)
-
     try:
-        if not isinstance(language, str):
-            raise ValueError("Language should be a string")
-        if len(language) < 2:
-            # if language is not specified, then set it to "xx"
-            # run speech characteristics as not english
-            language = "xx"
-        else:
-            language = language[:2].lower()
+        # Load configuration measures
+        measures = get_config(os.path.abspath(__file__), "text.json")
+        df_list = cutil.create_empty_dataframes(measures)
 
         if bool(json_conf):
-            cutil.download_nltk_resources()
+            language = language[:2].lower() if (language and len(language) >= 2) else "na"
+
+            if language == 'en':
+                cutil.download_nltk_resources()
 
             if is_whisper_transcribe(json_conf):
-                filter_json, text_list, text_indices = filter_whisper(
-                    json_conf, measures, speaker_label
-                )
-
-                if len(filter_json) > 0 and len(text_list[-1]) > 0:
-                    df_list = cutil.process_language_feature(
-                        filter_json, df_list, text_list,
-                        text_indices, language, measures,
-                    )
+                df_list = process_transcript(df_list, json_conf, measures, min_turn_length, speaker_label, 'whisper', language)
+
+            elif is_amazon_transcribe(json_conf):
+                df_list = process_transcript(df_list, json_conf, measures, min_turn_length, speaker_label, 'aws', language)
+
             else:
-                words, text = filter_vosk(json_conf, measures)
-                if len(text) > 0:
-                    df_list = cutil.process_language_feature(
-                        json_conf, df_list, [words, [], [], text],
-                        [[], []], language, measures,
-                    )
-            
+                df_list = process_transcript(df_list, json_conf, measures, min_turn_length, speaker_label, 'vosk', language)
+
     except Exception as e:
         logger.error(f"Error in Speech Characteristics {e}")
 
     finally:
-        # if word_df is empty, then add a row of NaNs
-        if df_list[0].empty:
-            df_list[0].loc[0] = np.nan
-        # if phrase_df is empty, then add a row of NaNs
-        if df_list[1].empty:
-            df_list[1].loc[0] = np.nan
-        # if turn_df is empty, then add a row of NaNs
-        if df_list[2].empty:
-            df_list[2].loc[0] = np.nan
-        # if summ_df is empty, then add a row of NaNs
-        if df_list[3].empty:
-            df_list[3].loc[0] = np.nan
-
-        return df_list
+        for df in df_list:
+            df.loc[0] = np.nan if df.empty else df.loc[0]
+
+    return df_list
diff --git a/openwillis/measures/text/util/characteristics_util.py b/openwillis/measures/text/util/characteristics_util.py
index dcefbe6..d09d8c1 100644
--- a/openwillis/measures/text/util/characteristics_util.py
+++ b/openwillis/measures/text/util/characteristics_util.py
@@ -15,23 +15,8 @@
 logger = logging.getLogger()
 
 # NLTK Tag list
-TAG_DICT = {
-    "PRP": "Pronoun",
-    "PRP$": "Pronoun",
-    "VB": "Verb",
-    "VBD": "Verb",
-    "VBG": "Verb",
-    "VBN": "Verb",
-    "VBP": "Verb",
-    "VBZ": "Verb",
-    "JJ": "Adjective",
-    "JJR": "Adjective",
-    "JJS": "Adjective",
-    "NN": "Noun",
-    "NNP": "Noun",
-    "NNS": "Noun",
-}
-
+TAG_DICT = {"PRP": "Pronoun", "PRP$": "Pronoun", "VB": "Verb", "VBD": "Verb", "VBG": "Verb", "VBN": "Verb", "VBP": "Verb", 
+            "VBZ": "Verb", "JJ": "Adjective", "JJR": "Adjective", "JJS": "Adjective", "NN": "Noun", "NNP": "Noun", "NNS": "Noun"}
 
 def create_empty_dataframes(measures):
     """
@@ -46,167 +31,87 @@ def create_empty_dataframes(measures):
 
     Returns:
     ...........
-    word_df: pandas dataframe
-        A dataframe containing word summary information
-    phrase_df: pandas dataframe
-        A dataframe containing phrase summary information
-    turn_df: pandas dataframe
-        A dataframe containing turn summary information
-    summ_df: pandas dataframe
-        A dataframe containing summary information on the speech
+    tuple: pandas dataframe
+        An empty dataframe for word, turn and summary measures
 
     ------------------------------------------------------------------------------------------------------
     """
 
-    word_df = pd.DataFrame(
-        columns=[
-            measures["word_pause"],
-            measures["num_syllables"],
-            measures["part_of_speech"],
-            measures["pos"],
-            measures["neg"],
-            measures["neu"],
-            measures["compound"],
-        ]
-    )
-
-    phrase_df = pd.DataFrame(
-        columns=[
-            measures["phrase_pause"],
-            measures["phrase_minutes"],
-            measures["phrase_words"],
-            measures["word_rate"],
-            measures["syllable_rate"],
-            measures["pause_rate"],
-            measures["pause_var"],
-            measures["pause_meandur"],
-            measures["speech_percentage"],
-            measures["speech_noun"],
-            measures["speech_verb"],
-            measures["speech_adj"],
-            measures["speech_pronoun"],
-            measures["pos"],
-            measures["neg"],
-            measures["neu"],
-            measures["compound"],
-            measures["speech_mattr"],
-        ]
-    )
-
-    turn_df = pd.DataFrame(
-        columns=[
-            measures["turn_pause"],
-            measures["turn_minutes"],
-            measures["turn_words"],
-            measures["word_rate"],
-            measures["syllable_rate"],
-            measures["pause_rate"],
-            measures["pause_var"],
-            measures["pause_meandur"],
-            measures["speech_percentage"],
-            measures["speech_noun"],
-            measures["speech_verb"],
-            measures["speech_adj"],
-            measures["speech_pronoun"],
-            measures["pos"],
-            measures["neg"],
-            measures["neu"],
-            measures["compound"],
-            measures["speech_mattr"],
-            measures["interrupt_flag"],
-        ]
-    )
+    word_df = pd.DataFrame(columns=[measures["word_pause"], measures["num_syllables"], measures["part_of_speech"]])
+    turn_df = pd.DataFrame(columns=[measures["turn_pause"], measures["turn_minutes"], measures["turn_words"], 
+                                    measures["word_rate"], measures["syllable_rate"], measures["speech_percentage"], 
+                                    measures["pause_meandur"], measures["pause_var"], measures["pos"], measures["neg"], 
+                                    measures["neu"], measures["compound"], measures["speech_mattr"], 
+                                    measures["interrupt_flag"]])
 
     summ_df = pd.DataFrame(
-        columns=[
-            measures["speech_minutes"],
-            measures["speech_words"],
-            measures["word_rate"],
-            measures["syllable_rate"],
-            measures["pause_rate"],
-            measures["word_pause_mean"],
-            measures["word_pause_var"],
-            measures["phrase_pause_mean"],
-            measures["phrase_pause_var"],
-            measures["speech_percentage"],
-            measures["speech_noun"],
-            measures["speech_verb"],
-            measures["speech_adj"],
-            measures["speech_pronoun"],
-            measures["pos"],
-            measures["neg"],
-            measures["neu"],
-            measures["compound"],
-            measures["speech_mattr"],
-            measures["num_turns"],
-            measures["turn_minutes_mean"],
-            measures["turn_words_mean"],
-            measures["turn_pause_mean"],
-            measures["num_one_word_turns"],
-            measures["num_interrupts"],
-        ]
-    )
-
-    return word_df, phrase_df, turn_df, summ_df
-
-
-def filter_speaker_phrase(item_data, speaker_label, phrases_idxs, phrases):
+        columns=[measures["file_length"], measures["speech_minutes"], measures["speech_words"], measures["word_rate"],
+                 measures["syllable_rate"], measures["word_pause_mean"], measures["word_pause_var"], 
+                 measures["speech_percentage"], measures["pos"], measures["neg"], measures["neu"], measures["compound"], 
+                 measures["speech_mattr"], measures["num_turns"], measures["num_one_word_turns"], measures["turn_minutes_mean"],
+                 measures["turn_words_mean"], measures["turn_pause_mean"], measures["speaker_percentage"], 
+                 measures["num_interrupts"]])
+
+    return word_df, turn_df, summ_df
+
+def create_index_column(item_data, measures):
     """
-    ------------------------------------------------------------------------------------------------------
-    This function updates the phrases list
-        to only include the speaker label provided.
+    This function creates an index column in the JSON response object.
+
     Parameters:
-    ...........
     item_data: dict
         JSON response object.
-    speaker_label: str
-        Speaker label
-    phrases_idxs: list
-        A list of tuples containing
-            the start and end indices of the phrases in the JSON object.
-    phrases: list
-        A list of phrases extracted from the JSON object.
+    measures: dict
+        A dictionary containing the names of the columns in the output dataframes.
+
     Returns:
-    ...........
-    phrases_idxs: list
-        A list of tuples containing
-            the start and end indices of the phrases in the JSON object.
-    phrases: list
-        A list of phrases extracted from the JSON object.
-    ------------------------------------------------------------------------------------------------------
+    item_data: dict
+        The updated JSON response object.
     """
-    phrases_idxs2 = []
-    phrases2 = []
-    for i, phrase in enumerate(phrases_idxs):
-        try:
-            start_idx = phrase[0]
-            if item_data[start_idx].get("speaker_label", "") == speaker_label:
-                phrases_idxs2.append(phrase)
-                phrases2.append(phrases[i])
-        except Exception as e:
-            logger.error(f"Error in phrase-split for speaker {speaker_label}: {e}")
-            continue
+    index = 0
+    for item in item_data:
+        
+        for word in item.get("words", []):
+            word[measures["old_index"]] = index
+            index += 1
+
+    return item_data
 
-    return phrases_idxs2, phrases2
+def download_nltk_resources():
+    """
+    ------------------------------------------------------------------------------------------------------
+
+    This function downloads the
+     required NLTK resources for processing text data.
+
+    ------------------------------------------------------------------------------------------------------
+    """
+    try:
+        nltk.data.find("tokenizers/punkt")
+    except LookupError:
+        nltk.download("punkt")
 
+    try:
+        nltk.data.find("averaged_perceptron_tagger")
+    except LookupError:
+        nltk.download("averaged_perceptron_tagger")
 
-def filter_speaker_turn(item_data, speaker_label, turns_idxs, turns):
+def filter_turn_aws(item_data, min_turn_length, speaker_label):
     """
     ------------------------------------------------------------------------------------------------------
     
     This function updates the turns list
         to only include the speaker label provided.
+
     Parameters:
     ...........
     item_data: dict
         JSON response object.
+    min_turn_length: int
+        minimum words required in each turn
     speaker_label: str
         Speaker label
-    turns_idxs: list
-        A list of tuples containing
-            the start and end indices of the turns in the JSON object.
-    turns: list
-        A list of turns extracted from the JSON object.
+        
     Returns:
     ...........
     turns_idxs: list
@@ -214,148 +119,75 @@ def filter_speaker_turn(item_data, speaker_label, turns_idxs, turns):
             the start and end indices of the turns in the JSON object.
     turns: list
         A list of turns extracted from the JSON object.
+
     ------------------------------------------------------------------------------------------------------
     """
     start_idx = 0
+    turns_idxs, turns = [], []
     for i, item in enumerate(item_data):
+        
         try:
-            if (
-                i > 0
-                and item.get("speaker_label", "") == speaker_label
-                and item_data[i - 1].get("speaker_label", "") != speaker_label
-            ):
+            if (i > 0 and item.get("speaker_label", "") == speaker_label and item_data[i - 1].get("speaker_label", "") != speaker_label):
                 start_idx = i
-            elif (
-                i > 0
-                and item.get("speaker_label", "") != speaker_label
-                and item_data[i - 1].get("speaker_label", "") == speaker_label
-            ):
-                turns_idxs.append((start_idx, i - 1))
-                # create turns texts
-                turns.append(
-                    " ".join(
-                        [
-                            item["alternatives"][0]["content"]
-                            for item in item_data[start_idx:i]
-                        ]
-                    )
-                )
+            
+            elif (i > 0 and item.get("speaker_label", "") != speaker_label and item_data[i - 1].get("speaker_label", "") == speaker_label):
+                turn_text = " ".join([item["alternatives"][0]["content"] for item in item_data[start_idx:i]])
+
+                if len(turn_text.split(" ")) >= min_turn_length:
+                    turns_idxs.append((start_idx, i - 1))
+                    turns.append(turn_text)
+                
         except Exception as e:
             logger.error(f"Error in turn-split for speaker {speaker_label}: {e}")
             continue
 
-    # if the last item is the speaker label
     if start_idx not in [item[0] for item in turns_idxs]:
-        turns_idxs.append((start_idx, len(item_data) - 1))
-        turns.append(
-            " ".join(
-                [
-                    item["alternatives"][0]["content"]
-                    for item in item_data[start_idx:]
-                ]
-            )
-        )
-    return turns_idxs, turns
+        turn_text = " ".join([item["alternatives"][0]["content"] for item in item_data[start_idx:]])
 
+        if len(turn_text.split(" ")) >= min_turn_length:
+            turns_idxs.append((start_idx, len(item_data) - 1))
+
+            turns.append(turn_text)
+    return turns_idxs, turns
 
-def filter_speaker(item_data, speaker_label, turns_idxs, turns, phrases_idxs, phrases):
+def filter_speaker_aws(item_data, min_turn_length, speaker_label):
     """
     ------------------------------------------------------------------------------------------------------
-    This function updates the turns and phrases lists
-        to only include the speaker label provided.
+
+    This function updates the turns lists to only include the speaker label provided.
+
     Parameters:
     ...........
     item_data: dict
         JSON response object.
+    min_turn_length: int
+        minimum words required in each turn
     speaker_label: str
         Speaker label
-    turns_idxs: list
-        A list of tuples containing
-            the start and end indices of the turns in the JSON object.
-    turns: list
-        A list of turns extracted from the JSON object.
-    phrases_idxs: list
-        A list of tuples containing
-            the start and end indices of the phrases in the JSON object.
-    phrases: list
-        A list of phrases extracted from the JSON object.
+
     Returns:
     ...........
     turns_idxs: list
-        A list of tuples containing
-            the start and end indices of the turns in the JSON object.
+        A list of tuples containing the start and end indices of the turns in the JSON object.
     turns: list
         A list of turns extracted from the JSON object.
-    phrases_idxs: list
-        A list of tuples containing
-            the start and end indices of the phrases in the JSON object.
-    phrases: list
-        A list of phrases extracted from the JSON object.
-    Raises:
-    ...........
-        ValueError: If the speaker label is not found in the json response object.
+
     ------------------------------------------------------------------------------------------------------
     """
 
-    speaker_labels = [
-        item["speaker_label"] for item
-        in item_data if "speaker_label" in item
-    ]
+    speaker_labels = [item["speaker_label"] for item in item_data if "speaker_label" in item]
 
     if speaker_label not in speaker_labels:
-        raise ValueError(
-            f"Speaker label {speaker_label} "
-            "not found in the json response object."
-        )
-
-    # phrase-split for the speaker label
-    phrases_idxs, phrases = filter_speaker_phrase(
-        item_data, speaker_label, phrases_idxs, phrases
-    )
-
-    # turn-split for the speaker label
-    turns_idxs, turns = filter_speaker_turn(
-        item_data, speaker_label, turns_idxs, turns
-    )
-
-    return turns_idxs, turns, phrases_idxs, phrases
+        logger.error(f"Speaker label {speaker_label} not found in the json response object.")
 
+    turns_idxs, turns = filter_turn_aws(item_data, min_turn_length, speaker_label)
+    return turns_idxs, turns
 
-def phrase_split(text):
+def filter_json_transcribe_aws(item_data, speaker_label, measures):
     """
     ------------------------------------------------------------------------------------------------------
-    This function splits the input text into phrases.
-    Parameters:
-    ...........
-    text: str
-        The input text.
-    Returns:
-    ...........
-    phrases: list
-        A list of phrases extracted from the input text.
-    phrases_idxs: list
-        A list of tuples containing
-            the start and end indices of the phrases in the input text.
-    ------------------------------------------------------------------------------------------------------
-    """
-    phrases = nltk.tokenize.sent_tokenize(text)
-    phrases_idxs = []
-
-    start_idx = 0
-    for phrase in phrases:
-        end_idx = start_idx + len(phrase.split()) - 1
-        phrases_idxs.append((start_idx, end_idx))
-        start_idx = end_idx + 1
-
-    return phrases, phrases_idxs
-
 
-def filter_turns(item_data, speaker_label, measures):
-    """
-    ------------------------------------------------------------------------------------------------------
-    
-    This function updates the turns list
-        to only include the speaker label provided.
+    This function filters the JSON response object to only include items with start_time and end_time.
 
     Parameters:
     ...........
@@ -368,85 +200,24 @@ def filter_turns(item_data, speaker_label, measures):
 
     Returns:
     ...........
-    turns_idxs: list
-        A list of tuples containing
-            the start and end indices of the turns in the JSON object.
-    turns: list
-        A list of turns extracted from the JSON object.
-
-    Raises:
-    ...........
-        ValueError: If the speaker label is not found in the json response object.
+    filter_json: list
+        The updated JSON response object.
 
     ------------------------------------------------------------------------------------------------------
     """
+    filter_json = [item for item in item_data if "start_time" in item and "end_time" in item]
+    filter_json = pause_calculation(filter_json, measures, ['start_time', 'end_time'])
 
-    speaker_labels = [
-        item["speaker"] for item
-        in item_data if "speaker" in item
-    ]
-
-    if speaker_label not in speaker_labels:
-        raise ValueError(
-            f"Speaker label {speaker_label} "
-            "not found in the json response object."
-        )
-    
-    turns_idxs, turns = [], []
-
-    start_idx = 0
-    start_idx2 = 0
-    for i, item in enumerate(item_data):
-        try:
-            if (
-                i > 0
-                and item.get("speaker", "") == speaker_label
-                and item_data[i - 1].get("speaker", "") != speaker_label
-            ):
-                start_idx = i
-                start_idx2 = item["words"][0][measures["old_index"]]
-            elif (
-                i > 0
-                and item.get("speaker", "") != speaker_label
-                and item_data[i - 1].get("speaker", "") == speaker_label
-            ):
-                end_idx = i-1
-                end_idx2 = item["words"][-1][measures["old_index"]]
-                turns_idxs.append((start_idx2, end_idx2))
-                # create turns texts
-                turns.append(
-                    " ".join(
-                        [
-                            item["text"]
-                            for item in item_data[start_idx:(end_idx+1)]
-                        ]
-                    )
-                )
-        except Exception as e:
-            logger.error(f"Error in turn-split for speaker {speaker_label}: {e}")
-            continue
-
-    # if the last item is the speaker label
-    if start_idx not in [item[0] for item in turns_idxs]:
-        end_idx2 = item_data[-1]["words"][-1][measures["old_index"]]
-        turns_idxs.append((start_idx2, end_idx2))
-        turns.append(
-            " ".join(
-                [
-                    item["text"]
-                    for item in item_data[start_idx:]
-                ]
-            )
-        )
-
-    return turns_idxs, turns
+    if speaker_label is not None:
+        filter_json = [item for item in filter_json if item.get("speaker_label", "") == speaker_label]
 
+    return filter_json
 
-def filter_phrases(item_data, speaker_label, measures):
+def filter_turns(item_data, speaker_label, measures, min_turn_length):
     """
     ------------------------------------------------------------------------------------------------------
     
-    This function updates the phrases list
+    This function updates the turns list
         to only include the speaker label provided.
 
     Parameters:
@@ -457,74 +228,60 @@ def filter_phrases(item_data, speaker_label, measures):
         Speaker label
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
+    min_turn_length: int
+        minimum words required in each turn
 
     Returns:
     ...........
-    phrases_idxs: list
+    turns_idxs: list
         A list of tuples containing
-            the start and end indices of the phrases in the JSON object.
-    phrases: list
-        A list of phrases extracted from the JSON object.
+            the start and end indices of the turns in the JSON object.
+    turns: list
+        A list of turns extracted from the JSON object.
 
     ------------------------------------------------------------------------------------------------------
     """
+    turns_idxs, turns = [], []
+    current_turn = None
 
-
-    phrases_idxs, phrases = [], []
     for item in item_data:
         try:
-
-            start_idx = item["words"][0][measures["old_index"]]
-            end_idx = item["words"][-1][measures["old_index"]]
-
-            if speaker_label is not None:
+            
+            if "speaker" in item:
                 if item["speaker"] == speaker_label:
-                    phrases.append(item["text"])
-                    phrases_idxs.append((start_idx, end_idx))
-            else:
-                phrases.append(item["text"])
-                phrases_idxs.append((start_idx, end_idx))
-
+                    current_turn = [item] if current_turn is None else current_turn + [item]
+                    
+                else:
+                    if current_turn is not None:
+                        
+                        if len(current_turn)>0 and len(current_turn[0]["words"])>0: 
+                            start_idx2 = current_turn[0]["words"][0][measures["old_index"]]
+                            
+                            end_idx2 = current_turn[-1]["words"][-1][measures["old_index"]]
+                            turn_text = " ".join(item["text"] for item in current_turn)
+                            
+                            if len(turn_text.split(" ")) >= min_turn_length:
+                                turns_idxs.append((start_idx2, end_idx2))
+
+                                turns.append(turn_text)
+                        current_turn = None
+                        
         except Exception as e:
-            logger.error(f"Failed to filter phrases: {e}")
-    return phrases_idxs, phrases
-
-
-def create_index_column(item_data, measures):
-    """
-    ------------------------------------------------------------------------------------------------------
-
-    This function creates an index column in the JSON response object.
-
-    Parameters:
-    ...........
-    item_data: dict
-        JSON response object.
-    measures: dict
-        A dictionary containing the names of the columns in the output dataframes.
-
-    Returns:
-    ...........
-    item_data: dict
-        The updated JSON response object.
-
-    ------------------------------------------------------------------------------------------------------
-    """
-    i = 0
-    i_p = 0
-    while True:
-        for j, word in enumerate(item_data[i_p]["words"]):
-            item_data[i_p]["words"][j][measures["old_index"]] = i
-            i += 1
-        
-        i_p += 1
-        if i_p >= len(item_data):
-            break
+            logger.error(f"Error in turn calculation {e}")
     
-    return item_data
-
+    if current_turn is not None:
+        start_idx2 = current_turn[0]["words"][0][measures["old_index"]]
+        
+        end_idx2 = current_turn[-1]["words"][-1][measures["old_index"]]
+        turn_text = " ".join(item["text"] for item in current_turn)
+        
+        if len(turn_text.split(" ")) >= min_turn_length: 
+            turns_idxs.append((start_idx2, end_idx2))
+            
+            turns.append(turn_text)
+    return turns_idxs, turns
 
-def pause_calculation(filter_json, measures):
+def pause_calculation(filter_json, measures, time_index):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -546,15 +303,12 @@ def pause_calculation(filter_json, measures):
     """
     for i, item in enumerate(filter_json):
         if i > 0:
-            item[measures["pause"]] = float(item["start"]) - float(
-                filter_json[i - 1]["end"]
-            )
+            item[measures["pause"]] = float(item[time_index[0]]) - float(filter_json[i - 1][time_index[1]])
+        
         else:
             item[measures["pause"]] = np.nan
-    
     return filter_json
 
-
 def filter_json_transcribe(item_data, speaker_label, measures):
     """
     ------------------------------------------------------------------------------------------------------
@@ -577,131 +331,113 @@ def filter_json_transcribe(item_data, speaker_label, measures):
 
     ------------------------------------------------------------------------------------------------------
     """
-    # phrase filtering
     item_data2 = []
     for item in item_data:
         try:
             
             speaker = item["speaker"]
             words = item["words"]
-    
-            # update speaker labels
-            for j, w in enumerate(words):
+            
+            for j, w in enumerate(words):# update speaker labels
                 words[j]["speaker"] = speaker
             
             item_data2 += words
         except Exception as e:
             logger.error(f"Failed to filter word: {e}")
     
-    filter_json = [
-        item for item in item_data2
-        if "start" in item and "end" in item
-    ]
-
-    # calculate time difference between each word
-    filter_json = pause_calculation(filter_json, measures)
+    filter_json = [item for item in item_data2 if "start" in item and "end" in item]
+    filter_json = pause_calculation(filter_json, measures, ['start', 'end'])
 
     if speaker_label is not None:
-        filter_json = [
-            item
-            for item in filter_json
-            if item.get("speaker", "") == speaker_label
-        ]
-
+        filter_json = [item for item in filter_json if item.get("speaker", "") == speaker_label]
     return filter_json
 
-
-def download_nltk_resources():
+def get_num_of_syllables(text):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function downloads the
-     required NLTK resources for processing text data.
+    This function calculates the number of syllables in the input text.
 
     Parameters:
     ...........
-    None
+    text: str
+        The input text.
 
     Returns:
     ...........
-    None
+    syllable_count: int
+        The number of syllables in the input text.
 
-    ------------------------------------------------------------------------------------------------------
+    ---------------------------------------------------------------------------------------
     """
-    try:
-        nltk.data.find("tokenizers/punkt")
-    except LookupError:
-        nltk.download("punkt")
 
-    try:
-        nltk.data.find("averaged_perceptron_tagger")
-    except LookupError:
-        nltk.download("averaged_perceptron_tagger")
+    syllable_tokenizer = nltk.tokenize.SyllableTokenizer()
+    punctuation = "!\"#$%&()*+,-./:;<=>?@[\]^_`{|}~" # remove punctuation
+    
+    syllables = [syllable_tokenizer.tokenize(token) for token in nltk.word_tokenize(text) if token not in punctuation]
+    syllable_count = sum([len(token) for token in syllables])
 
+    return syllable_count
 
-def get_tag(json_conf, tag_dict, measures):
+def get_pause_feature_word(word_df, df_diff, word_list, turn_index, measures):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function performs part-of-speech
-     tagging on the input text using NLTK, and returns an updated
-     json_conf list with the part-of-speech tags.
+    This function calculates various pause-related speech characteristic
+        features at the word level and adds them to the output dataframe word_df.
 
     Parameters:
     ...........
-    json_conf: list
-        JSON response object.
-    tag_dict: dict
-        A dictionary mapping the NLTK tags to more readable tags.
+    word_df: pandas dataframe
+        A dataframe containing word summary information
+    df_diff: pandas dataframe
+        A dataframe containing the word-level information
+            from the JSON response.
+    word_list: list
+        List of transcribed text at the word level.
+    turn_index: list
+        A list containing the indices of the first and last word
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
 
     Returns:
     ...........
-    json_conf: list
-        The updated json_conf list.
+    word_df: pandas dataframe
+        The updated word_df dataframe.
 
     ------------------------------------------------------------------------------------------------------
     """
-    if len(json_conf) <= 0:
-        return json_conf
-
-    if "alternatives" not in json_conf[0].keys():
-        # local vosk transcriber
-        word_list = [word["word"] for word in json_conf if "word" in word]
-    else:
-        # aws transcriber
-        word_list = [item["alternatives"][0]["content"] for item in json_conf]
-
-    tag_list = nltk.pos_tag(word_list)
-
-    for i, tag in enumerate(tag_list):
-        if tag[1] in tag_dict.keys():
-            json_conf[i][measures["tag"]] = tag_dict[tag[1]]
-        else:
-            json_conf[i][measures["tag"]] = "Other"
-
-    return json_conf
-
+    turn_starts = [pindex[0] for pindex in turn_index]
+    word_df[measures["word_pause"]] = df_diff[measures["pause"]].where(~df_diff[measures["old_index"]].isin(turn_starts), np.nan)
+    
+    word_df[measures["num_syllables"]] = [get_num_of_syllables(word) for word in word_list]
+    return word_df
 
-def get_part_of_speech(df, tags, measures, index=0):
+def process_pause_feature(df_diff, df, text_level, index_list, time_index, level_name, measures, language):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function calculates the proportions of verbs,
-     pronouns, adjectives, and nouns in the
-     transcribed text, and adds them to the output dataframe df.
+    This function calculates various pause-related speech
+     characteristic features at the turn
+     level and adds them to the output dataframe df.
 
     Parameters:
     ...........
+    df_diff: pandas dataframe
+        A dataframe containing the word-level information from the JSON response.
     df: pandas dataframe
-        A dataframe containing the speech characteristics of the input text.
-    tags: list
-        A list of part-of-speech tags for the input text.
+        A dataframe containing turn summary information
+    text_level: list
+        List of transcribed text at the turn level.
+    index_list: list
+        A list containing the indices of the first and last word in each turn.
+    time_index: list
+        A list containing the names of the columns in json that contain
+         the start and end times of each word.
+    level_name: str
+        The name of the level being analyzed turn.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
-    index: int
-        The index of the row in the output dataframe df.
 
     Returns:
     ...........
@@ -710,319 +446,96 @@ def get_part_of_speech(df, tags, measures, index=0):
 
     ------------------------------------------------------------------------------------------------------
     """
-    if len(tags) == 0:
+
+    if level_name not in [measures["turn"]]:
+        logger.error(f"level_name must be turn")
         return df
 
-    df.loc[index, measures["speech_noun"]] = (
-        100 * len(tags[tags == "Noun"]) / len(tags)
-    )
-    df.loc[index, measures["speech_verb"]] = (
-        100 * len(tags[tags == "Verb"]) / len(tags)
-    )
-    df.loc[index, measures["speech_adj"]] = (
-        100 * len(tags[tags == "Adjective"]) / len(tags)
-    )
-    df.loc[index, measures["speech_pronoun"]] = (
-        100 * len(tags[tags == "Pronoun"]) / len(tags)
-    )
-
-    return df
-
-
-def get_tag_summ(json_conf, df_list, text_indices, measures):
-    """
-    ------------------------------------------------------------------------------------------------------
-
-    This function calculates the proportions of verbs,
-     pronouns, adjectives, and nouns in the
-     transcribed text, and adds them to the output dataframe summ_df.
-
-    Parameters:
-    ...........
-    json_conf: list
-        JSON response object.
-    df_list: list
-        List of pandas dataframes.
-            word_df, phrase_df, turn_df, summ_df
-    text_indices: list
-        List of indices for text_list.
-            for phrases and turns.
-    measures: dict
-        A dictionary containing the names of the columns in the output dataframes.
-
-    Returns:
-    ...........
-    df_list: list
-        List of updated pandas dataframes.
-
-    ------------------------------------------------------------------------------------------------------
-    """
-
-    word_df, phrase_df, turn_df, summ_df = df_list
-    phrase_index, turn_index = text_indices
-
-    df_conf = pd.DataFrame(json_conf)
-
-    # word-level analysis
-    word_df[measures["part_of_speech"]] = df_conf[measures["tag"]]
-
-    # phrase-level analysis
-    for j, pindex in enumerate(phrase_index):
-        prange = range(pindex[0], pindex[1] + 1)
-        phrase_tags = df_conf.loc[df_conf[measures["old_index"]].isin(prange), measures["tag"]]
-
-        phrase_df = get_part_of_speech(phrase_df, phrase_tags, measures, j)
-
-    # turn-level analysis
-    for j, uindex in enumerate(turn_index):
-        urange = range(uindex[0], uindex[1] + 1)
-        turn_tags = df_conf.loc[df_conf[measures["old_index"]].isin(urange), measures["tag"]]
-
-        turn_df = get_part_of_speech(turn_df, turn_tags, measures, j)
-
-    # file-level analysis
-    summ_df = get_part_of_speech(summ_df, df_conf[measures["tag"]], measures)
-
-    df_list = [word_df, phrase_df, turn_df, summ_df]
-
-    return df_list
-
-
-def get_mattr(text):
-    """
-    ------------------------------------------------------------------------------------------------------
-    This function calculates the Moving Average Type-Token Ratio (MATTR)
-     of the input text using the
-     LexicalRichness library.
-
-    Parameters:
-    ...........
-    text : str
-        The input text to be analyzed.
-
-    Returns:
-    ...........
-    mattr : float
-        The calculated MATTR value.
-
-    ------------------------------------------------------------------------------------------------------
-    """
-    word = nltk.word_tokenize(text)
-    filter_punc = list(value for value in word if value not in [".", "!", "?"])
-    filter_punc = " ".join(filter_punc)
-    mattr = np.nan
-
-    lex_richness = LexicalRichness(filter_punc)
-    if lex_richness.words > 0:
-        mattr = lex_richness.mattr(window_size=lex_richness.words)
-
-    return mattr
-
-
-def get_sentiment(df_list, text_list, measures):
-    """
-    ------------------------------------------------------------------------------------------------------
-
-    This function calculates the sentiment scores of the input text using
-     VADER, and adds them to the output dataframe summ_df.
-
-    Parameters:
-    ...........
-    df_list: list
-        List of pandas dataframes.
-            word_df, phrase_df, turn_df, summ_df
-    text_list: list
-        List of transcribed text.
-            split into words, phrases, turns, and full text.
-    measures: dict
-        A dictionary containing the names of the columns in the output dataframes.
-
-    Returns:
-    ...........
-    df_list: list
-        List of updated pandas dataframes.
-
-    ------------------------------------------------------------------------------------------------------
-    """
-    word_df, phrase_df, turn_df, summ_df = df_list
-    word_list, phrase_list, turn_list, full_text = text_list
-
-    sentiment = SentimentIntensityAnalyzer()
-
-    # column names
-    cols = [
-        measures["neg"],
-        measures["neu"],
-        measures["pos"],
-        measures["compound"],
-        measures["speech_mattr"],
-    ]
-
-    # word-level analysis
-    for idx, w in enumerate(word_list):
-        try:
-            sentiment_dict = sentiment.polarity_scores(w)
-
-            word_df.loc[idx, cols[:-1]] = list(sentiment_dict.values())
-        except Exception as e:
-            logger.error(f"Error in sentiment analysis for word {w}: {e}")
-            continue
-
-    # phrase-level analysis
-    for idx, p in enumerate(phrase_list):
+    for j, index in enumerate(index_list):
         try:
-            sentiment_dict = sentiment.polarity_scores(p)
-            mattr = get_mattr(p)
+            
+            rng = range(index[0], index[1] + 1)
+            level_json = df_diff[df_diff[measures["old_index"]].isin(rng)]
+            
+            pauses = level_json[measures["pause"]].values[1:]
+            level_min_val = (float(level_json.iloc[-1][time_index[1]]) - float(level_json.iloc[0][time_index[0]])) / 60
+            
+            df.loc[j, measures[f"{level_name}_minutes"]] = level_min_val
+            df.loc[j, measures[f"{level_name}_words"]] = len(level_json)
 
-            phrase_df.loc[idx, cols] = list(sentiment_dict.values()) + [mattr]
-        except Exception as e:
-            logger.error(f"Error in sentiment analysis for phrase {p}: {e}")
-            continue
+            if len(pauses) == 1:
+                df.loc[j, measures["pause_var"]] = 0
+                df.loc[j, measures["pause_meandur"]] = np.mean(pauses)
 
-    # turn-level analysis
-    for idx, u in enumerate(turn_list):
-        try:
-            sentiment_dict = sentiment.polarity_scores(u)
-            mattr = get_mattr(u)
+            elif len(pauses) > 1:
+                df.loc[j, measures["pause_var"]] = np.var(pauses)
+                df.loc[j, measures["pause_meandur"]] = np.mean(pauses)
 
-            turn_df.loc[idx, cols] = list(sentiment_dict.values()) + [mattr]
+            if df.loc[j, measures[f"{level_name}_minutes"]] > 0:
+                speech_pct_val = 100 * (1 - np.sum(pauses) / (60 * df.loc[j, measures[f"{level_name}_minutes"]]))
+                df.loc[j, measures["speech_percentage"]] = speech_pct_val
+
+                if language == 'en':
+                    syllable_rate = (get_num_of_syllables(text_level[j]) / df.loc[j, measures[f"{level_name}_minutes"]])
+                    df.loc[j, measures["syllable_rate"]] = syllable_rate
+                
+                word_rate_val = (df.loc[j, measures[f"{level_name}_words"]] / df.loc[j, measures[f"{level_name}_minutes"]])
+                df.loc[j, measures["word_rate"]] = word_rate_val
+                
         except Exception as e:
-            logger.error(f"Error in sentiment analysis for turn {u}: {e}")
+            logger.error(f"Error in pause feature calculation for {level_name} {j}: {e}")
             continue
 
-    # file-level analysis
-    sentiment_dict = sentiment.polarity_scores(full_text)
-    mattr = get_mattr(full_text)
-
-    summ_df.loc[0, cols] = list(sentiment_dict.values()) + [mattr]
-
-    df_list = [word_df, phrase_df, turn_df, summ_df]
-
-    return df_list
-
-
-def get_num_of_syllables(text):
-    """
-    ------------------------------------------------------------------------------------------------------
-
-    This function calculates the number of syllables in the input text.
-
-    Parameters:
-    ...........
-    text: str
-        The input text.
-
-    Returns:
-    ...........
-    syllable_count: int
-        The number of syllables in the input text.
-
-    ---------------------------------------------------------------------------------------
-    """
-
-    syllable_tokenizer = nltk.tokenize.SyllableTokenizer()
-
-    # remove punctuation
-    punctuation = "!\"#$%&()*+,-./:;<=>?@[\]^_`{|}~"
-    syllables = [syllable_tokenizer.tokenize(token) for token in nltk.word_tokenize(text) if token not in punctuation]
-    # count the number of syllables in each word
-    syllable_count = sum([len(token) for token in syllables])
-
-    return syllable_count
-
+    return df
 
-def process_pause_feature(df_diff, df, text_level, index_list, time_index, level_name, measures):
+def get_pause_feature_turn(turn_df, df_diff, turn_list, turn_index, time_index, measures, language):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function calculates various pause-related speech
-     characteristic features at the phrase or turn
-     level and adds them to the output dataframe df.
+    This function calculates various pause-related speech characteristic
+        features at the turn level and adds them to the output dataframe turn_df.
 
     Parameters:
     ...........
+    turn_df: pandas dataframe
+        A dataframe containing turn summary information
     df_diff: pandas dataframe
         A dataframe containing the word-level information
-         from the JSON response.
-    df: pandas dataframe
-        A dataframe containing phrase or turn summary information
-    text_level: list
-        List of transcribed text at the phrase or turn level.
-    index_list: list
+            from the JSON response.
+    turn_list: list
+        List of transcribed text at the turn level.
+    turn_index: list
         A list containing the indices of the first and last word
-         in each phrase or turn.
+            in each turn.
     time_index: list
         A list containing the names of the columns in json that contain
-         the start and end times of each word.
-    level_name: str
-        The name of the level being analyzed (phrase or turn).
+            the start and end times of each word.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
 
     Returns:
     ...........
-    df: pandas dataframe
-        The updated df dataframe.
+    turn_df: pandas dataframe
+        The updated turn_df dataframe.
 
     ------------------------------------------------------------------------------------------------------
     """
+    turn_starts = [uindex[0] for uindex in turn_index]
+    df_diff_turn = df_diff[df_diff[measures["old_index"]].isin(turn_starts)]
 
-    if level_name not in [measures["phrase"], measures["turn"]]:
-        logger.error(
-            f"level_name must be either {measures['phrase']} or {measures['turn']}"
-        )
-        return df
-
-    for j, index in enumerate(index_list):
-        try:
-            rng = range(index[0], index[1] + 1)
-            level_json = df_diff[df_diff[measures["old_index"]].isin(rng)]
-
-            # remove first pause as it is the pre_pause
-            pauses = level_json[measures["pause"]].values[1:]
-
-            df.loc[j, measures[f"{level_name}_minutes"]] = (
-                float(level_json.iloc[-1][time_index[1]])
-                - float(level_json.iloc[0][time_index[0]])
-            ) / 60
-            df.loc[j, measures[f"{level_name}_words"]] = len(level_json)
-
-            # if there is 1 pause
-            if len(pauses) == 1:
-                df.loc[j, measures["pause_var"]] = 0
-                df.loc[j, measures["pause_meandur"]] = np.mean(pauses)
-            # if there are more than 1 pauses
-            elif len(pauses) > 1:
-                df.loc[j, measures["pause_var"]] = np.var(pauses)
-                df.loc[j, measures["pause_meandur"]] = np.mean(pauses)
-
-            if df.loc[j, measures[f"{level_name}_minutes"]] > 0:
-                df.loc[j, measures["speech_percentage"]] = 100 * (
-                    1 - np.sum(pauses) / (
-                        60 * df.loc[j, measures[f"{level_name}_minutes"]]
-                    )
-                )
-
-                # articulation rate
-                df.loc[j, measures["syllable_rate"]] = (
-                    get_num_of_syllables(text_level[j]) / df.loc[j, measures[f"{level_name}_minutes"]]
-                )
-
-                df.loc[j, measures["word_rate"]] = (
-                    df.loc[j, measures[f"{level_name}_words"]] / df.loc[j, measures[f"{level_name}_minutes"]]
-                )
-        except Exception as e:
-            logger.error(f"Error in pause feature calculation for {level_name} {j}: {e}")
-            continue
-
-    df[measures["pause_rate"]] = df[measures["word_rate"]]
-
-    return df
+    turn_df[measures["turn_pause"]] = df_diff_turn[measures["pause"]]
+    turn_df[measures["interrupt_flag"]] = False
+    
+    negative_pause = turn_df[measures["turn_pause"]] <= 0
+    turn_df.loc[negative_pause, measures["turn_pause"]] = 0
+    
+    turn_df.loc[negative_pause, measures["interrupt_flag"]] = True
+    turn_df = turn_df.reset_index(drop=True)
 
+    turn_df = process_pause_feature(df_diff, turn_df, turn_list, turn_index, time_index, measures["turn"], measures, language)
+    return turn_df
 
-def update_summ_df(
-    df_diff, summ_df, full_text, time_index, word_df, phrase_df, turn_df, measures
-):
+def update_summ_df(df_diff, summ_df, full_text, time_index, word_df, turn_df, measures):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -1041,8 +554,6 @@ def update_summ_df(
          that contain the start and end times of each word.
     word_df: pandas dataframe
         A dataframe containing word summary information
-    phrase_df: pandas dataframe
-        A dataframe containing phrase summary information
     turn_df: pandas dataframe
         A dataframe containing turn summary information
     measures: dict
@@ -1055,302 +566,283 @@ def update_summ_df(
 
     ------------------------------------------------------------------------------------------------------
     """
-    if len(phrase_df) > 0:
-        speech_minutes = phrase_df[measures["phrase_minutes"]].sum()
+    if len(turn_df) > 0:
+        speech_minutes = turn_df[measures["turn_minutes"]].sum()
     else:
         speech_minutes = (float(df_diff.iloc[-1][time_index[1]]) - float(df_diff.iloc[0][time_index[0]])) / 60
-
     summ_df[measures["speech_minutes"]] = [speech_minutes]
-
+    
     summ_df[measures["speech_words"]] = len(df_diff)
     if speech_minutes > 0:
-        summ_df[measures["word_rate"]] = (
-            summ_df[measures["speech_words"]] / summ_df[measures["speech_minutes"]]
-        )
-        summ_df[measures["syllable_rate"]] = (
-            get_num_of_syllables(full_text) / summ_df[measures["speech_minutes"]]
-        )
-        summ_df[measures["speech_percentage"]] = 100 * (
-        1
-        - df_diff.loc[1:, measures["pause"]].sum()
-        / (60 * summ_df[measures["speech_minutes"]])
-    )
-
-    summ_df[measures["pause_rate"]] = summ_df[measures["word_rate"]]
-    
+        
+        summ_df[measures["word_rate"]] = (summ_df[measures["speech_words"]] / summ_df[measures["speech_minutes"]])
+        summ_df[measures["syllable_rate"]] = (get_num_of_syllables(full_text) / summ_df[measures["speech_minutes"]])
+        summ_df[measures["speech_percentage"]] = 100 * (summ_df[measures["speech_minutes"]] / summ_df[measures["file_length"]])
+
     if len(word_df[measures["word_pause"]]) > 1:
-        summ_df[measures["word_pause_mean"]] = word_df[measures["word_pause"]].mean(
-            skipna=True
-        )
-        summ_df[measures["word_pause_var"]] = word_df[measures["word_pause"]].var(
-            skipna=True
-        )
-    
-    if len(phrase_df[measures["phrase_pause"]]) > 1:
-        summ_df[measures["phrase_pause_mean"]] = phrase_df[measures["phrase_pause"]].mean(
-            skipna=True
-        )
-        summ_df[measures["phrase_pause_var"]] = phrase_df[measures["phrase_pause"]].var(
-            skipna=True
-        )
+        summ_df[measures["word_pause_mean"]] = word_df[measures["word_pause"]].mean(skipna=True)
+        summ_df[measures["word_pause_var"]] = word_df[measures["word_pause"]].var(skipna=True)
     
     if len(turn_df) > 0:
         summ_df[measures["num_turns"]] = len(turn_df)
-        summ_df[measures["turn_minutes_mean"]] = turn_df[
-            measures["turn_minutes"]
-        ].mean(skipna=True)
-        summ_df[measures["turn_words_mean"]] = turn_df[
-            measures["turn_words"]
-        ].mean(skipna=True)
-        summ_df[measures["turn_pause_mean"]] = turn_df[
-            measures["turn_pause"]
-        ].mean(skipna=True)
-        summ_df["num_one_word_turns"] = len(
-            turn_df[turn_df[measures["turn_words"]] == 1]
-        )
-        summ_df[measures["num_interrupts"]] = sum(turn_df[measures["interrupt_flag"]])
+        summ_df[measures["turn_minutes_mean"]] = turn_df[measures["turn_minutes"]].mean(skipna=True)
+        
+        summ_df[measures["turn_words_mean"]] = turn_df[measures["turn_words"]].mean(skipna=True)
+        summ_df[measures["turn_pause_mean"]] = turn_df[measures["turn_pause"]].mean(skipna=True)
+        
+        summ_df["num_one_word_turns"] = len(turn_df[turn_df[measures["turn_words"]] == 1])
+        summ_df[measures["num_interrupts"]] = len(turn_df[turn_df[measures["interrupt_flag"]]==True])
 
     return summ_df
 
-
-def get_pause_feature_word(word_df, df_diff, word_list, phrase_index, measures):
+def get_pause_feature(json_conf, df_list, text_list, turn_index, measures, time_index, language):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function calculates various pause-related speech characteristic
-        features at the word level and adds them to the output dataframe word_df.
+    This function calculates various pause-related
+     speech characteristic features
 
     Parameters:
     ...........
-    word_df: pandas dataframe
-        A dataframe containing word summary information
-    df_diff: pandas dataframe
-        A dataframe containing the word-level information
-            from the JSON response.
-    word_list: list
-        List of transcribed text at the word level.
-    phrase_index: list
-        A list containing the indices of the first and last word
-            in each phrase or turn.
+    json_conf: list
+        JSON response object.
+    df_list: list
+        List of pandas dataframes: word_df, turn_df, summ_df
+    text_list: list
+        List of transcribed text: split into words, turns, and full text.
+    turn_index: list
+        List of indices for text_list.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
+    time_index: list
+        timepoint index (start/end)
+    language: str
+        Language of the transcribed text.
 
     Returns:
     ...........
-    word_df: pandas dataframe
-        The updated word_df dataframe.
+    df_feature: list
+        List of updated pandas dataframes (word_df, turn_df and summ_df)
 
     ------------------------------------------------------------------------------------------------------
     """
-    phrase_starts = [pindex[0] for pindex in phrase_index]
+    if len(json_conf) <= 0:
+        return df_list
 
-    word_df[measures["word_pause"]] = df_diff[measures["pause"]].where(
-        ~df_diff[measures["old_index"]].isin(phrase_starts), np.nan
-    )
+    word_df, turn_df, summ_df = df_list
+    word_list, turn_list, full_text = text_list
+    df_diff = pd.DataFrame(json_conf)
 
-    # calculate the number of syllables in each word from the word list
-    word_df[measures["num_syllables"]] = [
-        get_num_of_syllables(word) for word in word_list
-    ]
-    return word_df
+    # Calculate the pause time between; each word and add the results to pause_list
+    if measures["pause"] not in df_diff.columns:
+        df_diff[measures["pause"]] = df_diff[time_index[0]].astype(float) - df_diff[time_index[1]].astype(float).shift(1)
+
+    # word-level analysis
+    word_df = get_pause_feature_word(word_df, df_diff, word_list, turn_index, measures)
+
+    # turn-level analysis
+    if len(turn_index) > 0:
+        turn_df = get_pause_feature_turn(turn_df, df_diff, turn_list, turn_index, time_index, measures, language)
 
+    # file-level analysis
+    summ_df = update_summ_df(df_diff, summ_df, full_text, time_index, word_df, turn_df, measures)
+    df_feature = [word_df, turn_df, summ_df]
+    return df_feature
 
-def get_pause_feature_phrase(phrase_df, df_diff, phrase_list, phrase_index, turn_index, time_index, measures):
+def get_mattr(text):
     """
     ------------------------------------------------------------------------------------------------------
-
-    This function calculates various pause-related speech characteristic
-        features at the phrase level and adds them to the output dataframe phrase_df.
+    This function calculates the Moving Average Type-Token Ratio (MATTR)
+     of the input text using the
+     LexicalRichness library.
 
     Parameters:
     ...........
-    phrase_df: pandas dataframe
-        A dataframe containing phrase summary information
-    df_diff: pandas dataframe
-        A dataframe containing the word-level information
-            from the JSON response.
-    phrase_list: list
-        List of transcribed text at the phrase level.
-    phrase_index: list
-        A list containing the indices of the first and last word
-            in each phrase
-    turn_index: list
-        A list containing the indices of the first and last word
-            in each turn.
-    time_index: list
-        A list containing the names of the columns in json that contain
-            the start and end times of each word.
-    measures: dict
-        A dictionary containing the names of the columns in the output dataframes.
+    text : str
+        The input text to be analyzed.
 
     Returns:
     ...........
-    phrase_df: pandas dataframe
-        The updated phrase_df dataframe.
+    mattr : float
+        The calculated MATTR value.
 
     ------------------------------------------------------------------------------------------------------
     """
-    phrase_starts = [pindex[0] for pindex in phrase_index]
-
-    df_diff_phrase = df_diff[
-        df_diff[measures["old_index"]].isin(phrase_starts)
-    ]  # get the rows corresponding to the start of each phrase
-
-    if len(turn_index) > 0:
-        turn_starts = [
-            uindex[0] for uindex in turn_index
-        ]  # get the start index of each turn
-        phrase_df[measures["phrase_pause"]] = df_diff_phrase[measures["pause"]].where(
-            ~df_diff_phrase[measures["old_index"]].isin(turn_starts), np.nan
-        )
-    else:
-        phrase_df[measures["phrase_pause"]] = df_diff_phrase[measures["pause"]]
-
-    phrase_df = phrase_df.reset_index(drop=True)
-
-    phrase_df = process_pause_feature(
-        df_diff, phrase_df, phrase_list, phrase_index, time_index, measures["phrase"], measures
-    )
+    word = nltk.word_tokenize(text)
+    filter_punc = list(value for value in word if value not in [".", "!", "?"])
+    filter_punc = " ".join(filter_punc)
+    mattr = np.nan
 
-    return phrase_df
+    lex_richness = LexicalRichness(filter_punc)
+    if lex_richness.words > 0:
+        mattr = lex_richness.mattr(window_size=lex_richness.words)
 
+    return mattr
 
-def get_pause_feature_turn(turn_df, df_diff, turn_list, turn_index, time_index, measures):
+def get_tag(json_conf, tag_dict, measures):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function calculates various pause-related speech characteristic
-        features at the turn level and adds them to the output dataframe turn_df.
+    This function performs part-of-speech
+     tagging on the input text using NLTK, and returns an updated
+     json_conf list with the part-of-speech tags.
 
     Parameters:
     ...........
-    turn_df: pandas dataframe
-        A dataframe containing turn summary information
-    df_diff: pandas dataframe
-        A dataframe containing the word-level information
-            from the JSON response.
-    turn_list: list
-        List of transcribed text at the turn level.
-    turn_index: list
-        A list containing the indices of the first and last word
-            in each turn.
-    time_index: list
-        A list containing the names of the columns in json that contain
-            the start and end times of each word.
+    json_conf: list
+        JSON response object.
+    tag_dict: dict
+        A dictionary mapping the NLTK tags to more readable tags.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
 
     Returns:
     ...........
-    turn_df: pandas dataframe
-        The updated turn_df dataframe.
+    json_conf: list
+        The updated json_conf list.
 
     ------------------------------------------------------------------------------------------------------
     """
+    if len(json_conf) <= 0:
+        return json_conf
 
-    turn_starts = [uindex[0] for uindex in turn_index]
+    if "alternatives" not in json_conf[0].keys():
+        word_list = [word["word"] for word in json_conf if "word" in word]# local vosk transcriber
+    
+    else:
+        word_list = [item["alternatives"][0]["content"] for item in json_conf]# aws transcriber
 
-    # get the rows corresponding to the start of each turn
-    df_diff_turn = df_diff[
-        df_diff[measures["old_index"]].isin(turn_starts)
-    ]
+    tag_list = nltk.pos_tag(word_list)
+    for i, tag in enumerate(tag_list):
+        
+        if tag[1] in tag_dict.keys():
+            json_conf[i][measures["tag"]] = tag_dict[tag[1]]
+        
+        else:
+            json_conf[i][measures["tag"]] = "Other"
+    return json_conf
 
-    turn_df[measures["turn_pause"]] = df_diff_turn[measures["pause"]]
-    turn_df[measures["interrupt_flag"]] = False
-    # set pre_turn_pause to 0 if negative (due to overlapping turns)
-    # and set interrupt_flag to True
-    negative_pause = turn_df[measures["turn_pause"]] < 0
-    turn_df.loc[negative_pause, measures["turn_pause"]] = 0
-    turn_df.loc[negative_pause, measures["interrupt_flag"]] = True
+def get_tag_summ(json_conf, df_list, measures):
+    """
+    ------------------------------------------------------------------------------------------------------
 
-    turn_df = turn_df.reset_index(drop=True)
+    This function calculates the proportions of verbs,
+     pronouns, adjectives, and nouns in the
+     transcribed text, and adds them to the output dataframe summ_df.
 
-    turn_df = process_pause_feature(
-        df_diff, turn_df, turn_list, turn_index, time_index, measures["turn"], measures
-    )
+    Parameters:
+    ...........
+    json_conf: list
+        JSON response object.
+    df_list: list
+        List of pandas dataframes: word_df, turn_df, summ_df
+    measures: dict
+        A dictionary containing the names of the columns in the output dataframes.
 
-    return turn_df
+    Returns:
+    ...........
+    df_list: list
+        List of updated pandas dataframes.
 
+    ------------------------------------------------------------------------------------------------------
+    """
+    word_df, turn_df, summ_df = df_list
+    df_conf = pd.DataFrame(json_conf)
+    word_df[measures["part_of_speech"]] = df_conf[measures["tag"]]
+    
+    df_list = [word_df, turn_df, summ_df]
+    return df_list
 
-def get_pause_feature(json_conf, df_list, text_list, text_indices, measures):
+def get_sentiment(df_list, text_list, measures):
     """
     ------------------------------------------------------------------------------------------------------
 
-    This function calculates various pause-related
-     speech characteristic features
+    This function calculates the sentiment scores of the input text using
+     VADER, and adds them to the output dataframe summ_df.
 
     Parameters:
     ...........
-    json_conf: list
-        JSON response object.
     df_list: list
         List of pandas dataframes.
-            word_df, phrase_df, turn_df, summ_df
     text_list: list
         List of transcribed text.
-            split into words, phrases, turns, and full text.
-    text_indices: list
-        List of indices for text_list.
-            for phrases and turns.
     measures: dict
         A dictionary containing the names of the columns in the output dataframes.
 
     Returns:
     ...........
-    df_feature: list
+    df_list: list
         List of updated pandas dataframes.
-            word_df, phrase_df, turn_df, summ_df
 
     ------------------------------------------------------------------------------------------------------
     """
-    # Check if json_conf is empty
-    if len(json_conf) <= 0:
-        return df_list
-
-    word_df, phrase_df, turn_df, summ_df = df_list
-    word_list, phrase_list, turn_list, full_text = text_list
-    phrase_index, turn_index = text_indices
+    word_df, turn_df, summ_df = df_list
+    word_list, turn_list, full_text = text_list
 
-    # Convert json_conf to a pandas DataFrame
-    df_diff = pd.DataFrame(json_conf)
+    sentiment = SentimentIntensityAnalyzer()
+    cols = [measures["neg"], measures["neu"], measures["pos"], measures["compound"], measures["speech_mattr"]]
 
-    time_index = ["start", "end"]
+    for idx, u in enumerate(turn_list):
+        try:
+            
+            sentiment_dict = sentiment.polarity_scores(u)
+            mattr = get_mattr(u)
+            turn_df.loc[idx, cols] = list(sentiment_dict.values()) + [mattr]
+            
+        except Exception as e:
+            logger.error(f"Error in sentiment analysis: {e}")
+            continue
+            
+    sentiment_dict = sentiment.polarity_scores(full_text)
+    mattr = get_mattr(full_text)
 
-    # Calculate the pause time between
-    # each word and add the results to pause_list
-    if measures["pause"] not in df_diff.columns:
-        df_diff[measures["pause"]] = df_diff[time_index[0]].astype(float) - df_diff[
-            time_index[1]
-        ].astype(float).shift(1)
+    summ_df.loc[0, cols] = list(sentiment_dict.values()) + [mattr]
+    df_list = [word_df, turn_df, summ_df]
+    return df_list
 
-    # word-level analysis
-    word_df = get_pause_feature_word(word_df, df_diff, word_list, phrase_index, measures)
+def calculate_file_feature(json_data, model, speakers):
+    """
+    ------------------------------------------------------------------------------------------------------
 
-    # phrase-level analysis
-    phrase_df = get_pause_feature_phrase(
-        phrase_df, df_diff, phrase_list, phrase_index, turn_index, time_index, measures
-    )
+    Calculate file features based on JSON data.
 
-    # turn-level analysis
-    if len(turn_index) > 0:
-        turn_df = get_pause_feature_turn(
-            turn_df, df_diff, turn_list, turn_index, time_index, measures
-        )
+    Parameters:
+    ...........
+    json_conf: list
+        JSON response object.
+    model: str
+        model name (vosk/aws/whisper)
+    speakers: str
+        speakers label
 
-    # file-level analysis
-    summ_df = update_summ_df(
-        df_diff, summ_df, full_text, time_index, word_df, phrase_df, turn_df, measures
-    )
+    Returns:
+    ...........
+    tuple: A tuple containing two values - the total file length and the percentage of time spent speaking.
 
-    df_feature = [word_df, phrase_df, turn_df, summ_df]
+    ------------------------------------------------------------------------------------------------------
+    """
+    
+    if model == 'aws':
+        segments = json_data.get('items', [])
+        file_length = max(float(segment.get("end_time", "0")) for segment in segments)
+        
+        if speakers is None:
+            return file_length/60, np.NaN
 
-    return df_feature
+        speaking_time = sum(float(segment.get("end_time", "0") or "0") - float(segment.get("start_time", "0") or "0")
+                           for segment in segments if segment.get("speaker_label", "") in speakers)
+    else:
+        segments = json_data.get('segments', [])
+        file_length = max(segment.get('end', 0) for segment in segments)
+        
+        if speakers is None:
+            return file_length/60, np.NaN
+        speaking_time = sum(segment['end'] - segment['start'] for segment in segments if segment.get('speaker', '') in speakers)
 
+    speaking_pct = (speaking_time / file_length) * 100
+    return file_length/60, speaking_pct
 
-def process_language_feature(
-    json_conf, df_list, text_list,
-    text_indices, language, measures,
-):
+def process_language_feature(df_list, transcribe_info, language, time_index, measures):
     """
     ------------------------------------------------------------------------------------------------------
 
@@ -1358,17 +850,12 @@ def process_language_feature(
 
     Parameters:
     ...........
-    json_conf: list
-        JSON response object.
     df_list: list
         List of pandas dataframes.
-         word_df, phrase_df, turn_df, summ_df
-    text_list: list
-        List of transcribed text.
-         split into words, phrases, turns, and full text.
-    text_indices: list
-        List of indices for text_list.
-         for phrases and turns.
+    transcribe_info: list
+        transcribed info
+    time_index: list
+        timepoint index (start/end)
     language: str
         Language of the transcribed text.
     measures: dict
@@ -1376,25 +863,17 @@ def process_language_feature(
 
     Returns:
     ...........
-    word_df: pandas dataframe
-        A dataframe containing word summary information
-    phrase_df: pandas dataframe
-        A dataframe containing phrase summary information
-    turn_df: pandas dataframe
-        A dataframe containing turn summary information
-    summ_df: pandas dataframe
-        A dataframe containing summary information on the speech
+    df_list: list
+        List of pandas dataframes (word_df, turn_df and summ_df)
 
     ------------------------------------------------------------------------------------------------------
     """
-
-    df_list = get_pause_feature(json_conf, df_list, text_list, text_indices, measures)
+    json_conf, text_list, turn_indices = transcribe_info
+    df_list = get_pause_feature(json_conf, df_list, text_list, turn_indices, measures, time_index, language)
 
     if language == "en":
         json_conf = get_tag(json_conf, TAG_DICT, measures)
-        df_list = get_tag_summ(json_conf, df_list, text_indices, measures)
+        df_list = get_tag_summ(json_conf, df_list, measures)
 
         df_list = get_sentiment(df_list, text_list, measures)
-
-    word_df, phrase_df, turn_df, summ_df = df_list
-    return word_df, phrase_df, turn_df, summ_df
+    return df_list
diff --git a/setup.py b/setup.py
index 7545ba1..b9f51fc 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     install_requires = fp.read()
 
 setuptools.setup(name='openwillis',
-                 version='1.5.2',
+                 version='1.6',
                  description='digital health measurement',
                  long_description=long_description,
                  long_description_content_type="text/markdown",