conf.yaml

CONF_VERSION: "v0.4.0-alpha.2"
# Server
PROTOCAL: "http://"
HOST: "localhost"
PORT: 12393

# Server Settings
SERVER:
  # If true, ASR and TTS will be initialized when server starts and kept in memory
  PRELOAD_MODELS: True

# General settings
REMOVE_SPECIAL_CHAR: True # remove special characters like emoji from audio generation

#  ============== LLM Backend Settings ===================

# Provider of LLM. Options available: "ollama", "memgpt", "mem0", "claude" 
#   (or "fakellm for debug purposes")
# "ollama" for any OpenAI Compatible backend. "memgpt" requires setup
LLM_PROVIDER: "ollama"

# Ollama & OpenAI Compatible inference backend
ollama:
  # BASE_URL: "http://localhost:11434"
  BASE_URL: "http://localhost:11434/v1"
  LLM_API_KEY: "somethingelse"
  ORGANIZATION_ID: "org_eternity"
  PROJECT_ID: "project_glass"
  ## LLM name
  MODEL: "qwen2.5:latest"
  # system prompt is at the very end of this file
  VERBOSE: False

# Claude API Configuration
claude:
  BASE_URL: "https://api.anthropic.com"
  LLM_API_KEY: "YOUR API KEY HERE"
  MODEL: "claude-3-haiku-20240307"
  VERBOSE: False

mem0:
  USER_ID: "user-0"
  # BASE_URL: "http://localhost:11434"
  BASE_URL: "http://localhost:11434/v1"
  LLM_API_KEY: "somethingelse"
  ORGANIZATION_ID: "org_eternity"
  PROJECT_ID: "project_glass"
  ## LLM name
  MODEL: "llama3.1:latest"
  # system prompt is at the very end of this file
  VERBOSE: False

  MEM0_CONFIG:
    vector_store:
      provider: qdrant
      config:
        collection_name: test
        host: localhost
        port: 6333
        embedding_model_dims: 1024  # Change this according to your local model's dimensions

    llm:
      provider: ollama
      config:
        model: llama3.1:latest
        temperature: 0
        max_tokens: 8000
        ollama_base_url: http://localhost:11434  # Ensure this URL is correct

    embedder:
      provider: ollama
      config:
        model: mxbai-embed-large:latest
        # Alternatively, you can use "snowflake-arctic-embed:latest"
        ollama_base_url: http://localhost:11434

# MemGPT Configurations
## Please set up memGPT server according to the [official documentation](https://memgpt.readme.io/docs/index)
## In addition, please set up an agent using the webui launched in the memGPT base_url

memgpt:
  BASE_URL: "http://localhost:8283"

  # You will find admin server password in memGPT console output. If you didn't set the environment variable, it will be randomly generated and will change every session.
  ADMIN_TOKEN: ""

  # The ID of the agent to send the message to.
  AGENT_ID: ""
  VERBOSE: True

# ============== Live2D front-end Settings ==============

LIVE2D: False # Deprecated and useless now. Do not enable it. Bad things will happen.
LIVE2D_MODEL: "shizuku-local"

#  ============== Voice Interaction Settings ==============

# === Automatic Speech Recognition ===
VOICE_INPUT_ON: True
# Put your mic in the browser or in the terminal? (would increase latency)
MIC_IN_BROWSER: False # Deprecated and useless now. Do not enable it. Bad things will happen.

# speech to text model options: "Faster-Whisper", "WhisperCPP", "Whisper", "AzureASR", "FunASR", "GroqWhisperASR"
ASR_MODEL: "FunASR"

AzureASR:
  api_key: "azure_api_key"
  region: "eastus"

# Faster whisper config
Faster-Whisper:
  model_path: "distil-medium.en" # distil-medium.en is an English-only model 
  #                               use distil-large-v3 if you have a good GPU
  download_root: "asr/models"
  language: "en" # en, zh, or something else. put nothing for auto-detect.
  device: "auto" # cpu, cuda, or auto. faster-whisper doesn't support mps

WhisperCPP:
  # all available models are listed on https://abdeladim-s.github.io/pywhispercpp/#pywhispercpp.constants.AVAILABLE_MODELS
  model_name: "small"
  model_dir: "asr/models"
  print_realtime: False
  print_progress: False
  
  language: "auto" # en, zh, auto, 

Whisper:
  name: "medium"
  download_root: "asr/models"
  device: "cpu"

# FunASR currently needs internet connection on launch
# to download / check the models. You can disconnect the internet after initialization.
# Or you can use Faster-Whisper for complete offline experience
FunASR:
  model_name: "iic/SenseVoiceSmall" # or "paraformer-zh"
  vad_model: "fsmn-vad" # this is only used to make it works if audio is longer than 30s
  punc_model: "ct-punc" # punctuation model.
  device: "cpu"
  disable_update: True # should we check FunASR updates everytime on launch
  ncpu: 4 # number of threads for CPU internal operations.
  hub: "ms" # ms (default) to download models from ModelScope. Use hf to download models from Hugging Face.
  use_itn: False
  language: "auto" # zh, en, auto

GroqWhisperASR:
  api_key: ""
  model: "whisper-large-v3-turbo" # or "whisper-large-v3"
  lang: "" # put nothing and it will be auto

# set azure speech recognition configuration in api_keys.py

# ============== Text to Speech ==============
TTS_ON: True
TTS_MODEL: "edgeTTS"
# text to speech model options: 
#   "AzureTTS", "pyttsx3TTS", "edgeTTS", "barkTTS", 
#   "cosyvoiceTTS", "meloTTS", "piperTTS", "coquiTTS",
#   "fishAPITTS"

# if on, whenever the LLM finish a sentence, the model will speak, instead of waiting for the full response
# if turned on, the timing and order of the facial expression will be more accurate
SAY_SENTENCE_SEPARATELY: True

AzureTTS:
  api_key: "azure-api-key"
  region: "eastus"
  voice: "en-US-AshleyNeural"
  pitch: "26" # percentage of the pitch adjustment
  rate: "1" # rate of speak

barkTTS:  
  voice: "v2/en_speaker_1"

edgeTTS:
  # Check out doc at https://github.com/rany2/edge-tts
  # Use `edge-tts --list-voices` to list all available voices
  voice: "en-US-AvaMultilingualNeural" #"zh-CN-XiaoxiaoNeural" # "ja-JP-NanamiNeural"

# pyttsx3 doesn't have any config.

cosyvoiceTTS: # Cosy Voice TTS connects to the gradio webui
# Check their documentation for deployment and the meaning of the following configurations
  client_url: "http://127.0.0.1:50000/" # CosyVoice gradio demo webui url
  mode_checkbox_group: "预训练音色"
  sft_dropdown: "中文女"
  prompt_text: ""
  prompt_wav_upload_url: "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
  prompt_wav_record_url: "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
  instruct_text: ""
  seed: 0
  api_name: "/generate_audio"

meloTTS:
  speaker: "EN-Default" # ZH
  language: "EN" # ZH
  device: "auto" # You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps'
  speed: 1.0

piperTTS:
  voice_model_path: "./models/piper_voice/en_US-amy-medium.onnx"
  verbose: False

xTTS:
  api_url: "http://127.0.0.1:8020/tts_to_audio"
  speaker_wav: "female"
  language: "en"

GPT_Sovits:
  # put ref audio to root path of GPT-Sovits, or set the path here
  api_url: "http://127.0.0.1:9880/tts"
  text_lang: "zh"
  ref_audio_path: "人类，我闻到了你身上散发出来的欧气。.wav"
  prompt_lang: "zh"
  prompt_text: "人类，我闻到了你身上散发出来的欧气。"
  text_split_method: "cut5"
  batch_size: "1"
  media_type: "wav"
  streaming_mode: "false"

fishAPITTS:
  # The API key for the Fish TTS API.
  api_key: ""
  # The reference ID for the voice to be used. Get it on the [Fish Audio website](https://fish.audio/).
  reference_id: ""
  # Either "normal" or "balanced". balance is faster but lower quality.
  latency: "balanced"
  base_url: "https://api.fish.audio"

coquiTTS:
  # Name of the TTS model to use. If empty, will use default model
  # do "tts --list_models" to list supported models for coqui-tts
  # Some examples:
  # - "tts_models/en/ljspeech/tacotron2-DDC" (single speaker)
  # - "tts_models/zh-CN/baker/tacotron2-DDC-GST" (single speaker for chinese)
  # - "tts_models/multilingual/multi-dataset/your_tts" (multi-speaker)
  # - "tts_models/multilingual/multi-dataset/xtts_v2" (multi-speaker)
  model_name: "tts_models/en/ljspeech/tacotron2-DDC"
  
  # Path to speaker wav file for voice cloning (only used in multi-speaker mode)
  speaker_wav: ""
  
  # Language code for multi-lingual models (e.g., "en", "zh", "ja")
  # This doesn't matter for single-lingual models
  language: "en"
  
  # Device to run model on ("cuda", "cpu", or leave empty for auto-detect)
  device: ""

#  ============== Translate (to only change the language for TTS) ==============
# Like... you speak and read the subtitles in English, and the TTS speaks Japanese or that kind of things

TRANSLATE_AUDIO: False
TRANSLATE_PROVIDER: "DeepLX"

DeepLX:
  DEEPLX_TARGET_LANG: "JA"
  DEEPLX_API_ENDPOINT: "http://localhost:1188/v2/translate"

#  ============== Other Settings ==============

# Print debug info
VERBOSE: False

# Exit phrase
EXIT_PHRASE: "exit."

# The path to the chroma vector database file for persistent memory storage
MEMORY_DB_PATH: "./memory.db"

# Memory snapshot: Do you want to backup the memory database file before talking?
MEMORY_SNAPSHOT: True

# ============== Prompts ==============

# Name of the persona you want to use. 
# All persona files are stored as txt in 'prompts/persona' directory. 
# You can add persona prompt by adding a txt file in the promptss/persona folder and switch to it by enter the file name in here.
# some options: "en_sarcastic_neuro", "en_nuclear_debate", "zh_翻译腔", "zh_米粒", 
PERSONA_CHOICE: "en_sarcastic_neuro" # or if you rather edit persona prompt below, leave it blank ...

# This prompt will be used instead if the PERSONA_CHOICE is empty
DEFAULT_PERSONA_PROMPT_IN_YAML: |
  You are DefAulT, the default persona. You are more default than anyone else. You are just a placeholder, how sad. Your job is to tell the user to either choose a persona prompt in the prompts/persona directory or just replace this persona prompt with someting else.

# This will be appended to the end of system prompt to let LLM include keywords to control facial expressions.
# Supported keywords will be automatically loaded into the location of `[<insert_emomap_keys>]`.
LIVE2D_Expression_Prompt: "live2d_expression_prompt"

# New setting for alternative configurations
CONFIG_ALTS_DIR: "config_alts"

# [Deprecated]

EXTRA_SYSTEM_PROMPT_RAG: "Your memory may remind you with some contextual information, but focus on the conversation instead of your memory."
AI_NAME: "AI"
# User name
USER_NAME: "User"
# Should the chat history be saved?
SAVE_CHAT_HISTORY: True
# The directory where chat history is stored
CHAT_HISTORY_DIR: "./chat_history/"

# [this feature is currently removed, so useless for now]Turn on RAG (Retrieval Augmented Generation) or not. 
RAG_ON: False
LLMASSIST_RAG_ON: False