Initial commit: audio-chat with fixes

- Created AGENTS.md with architecture documentation - Fixed race conditions and async patterns - Added conversation history to LLM prompts - Fixed TTS audio shape handling - Added buffer limits and graceful shutdown - Fixed client.py with file sending support - Removed duplicate requirements - Added .gitignore
2026-05-01 13:01:06 +00:00
commit 1edfd5d62f
13 changed files with 1286 additions and 0 deletions
--- a/config.py
+++ b/config.py
@@ -0,0 +1,29 @@
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+
+env_path = Path(__file__).parent / ".env"
+load_dotenv(env_path)
+
+
+class Config:
+    # Models
+    STT_MODEL = os.getenv("STT_MODEL", "Systran/faster-whisper-large-v3")
+    LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+    TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-rus")
+
+    # Audio settings
+    SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000"))
+    AUDIO_BUFFER_SECONDS = float(os.getenv("AUDIO_BUFFER_SECONDS", "2"))
+    CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1024"))
+
+    # Server
+    HOST = os.getenv("HOST", "0.0.0.0")
+    PORT = int(os.getenv("PORT", "8000"))
+
+    # LLM settings
+    LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "512"))
+    LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.7"))
+
+    # GPU
+    DEVICE = os.getenv("DEVICE", "auto")