- Created AGENTS.md with architecture documentation - Fixed race conditions and async patterns - Added conversation history to LLM prompts - Fixed TTS audio shape handling - Added buffer limits and graceful shutdown - Fixed client.py with file sending support - Removed duplicate requirements - Added .gitignore
30 lines
864 B
Python
30 lines
864 B
Python
import os
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
env_path = Path(__file__).parent / ".env"
|
|
load_dotenv(env_path)
|
|
|
|
|
|
class Config:
|
|
# Models
|
|
STT_MODEL = os.getenv("STT_MODEL", "Systran/faster-whisper-large-v3")
|
|
LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
|
TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-rus")
|
|
|
|
# Audio settings
|
|
SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000"))
|
|
AUDIO_BUFFER_SECONDS = float(os.getenv("AUDIO_BUFFER_SECONDS", "2"))
|
|
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1024"))
|
|
|
|
# Server
|
|
HOST = os.getenv("HOST", "0.0.0.0")
|
|
PORT = int(os.getenv("PORT", "8000"))
|
|
|
|
# LLM settings
|
|
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "512"))
|
|
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
|
|
|
# GPU
|
|
DEVICE = os.getenv("DEVICE", "auto")
|