Add voice activation (VAD) and audio fade

- Add webrtcvad dependency for real-time voice activity detection - Create audio/fade.py with fade-in/fade-out utility - Add VAD voice activation to client recording (sends audio only during speech) - Apply 200ms fade-out to TTS output to avoid abrupt audio cuts - Fix tts.py indentation error in except block
2026-05-01 13:14:31 +00:00
parent 7b023cc698
commit e2d3cbe783
4 changed files with 85 additions and 10 deletions
--- a/engine/tts.py
+++ b/engine/tts.py
@@ -1,6 +1,7 @@
 from transformers import pipeline
 from config import Config
 import numpy as np
+from audio.fade import apply_fade


 class TTSEngine:
@@ -16,11 +17,11 @@ class TTSEngine:
                device=0 if __import__("torch").cuda.is_available() else -1,
            )
        except Exception:
-       self.tts_pipeline = pipeline(
-            "text-to-speech",
-            model=self._tts_model,
-            device=-1,
-        )
+            self.tts_pipeline = pipeline(
+                "text-to-speech",
+                model=self._tts_model,
+                device=-1,
+            )
        self.tts_pipeline.start()

    def synthesize(self, text: str, output_sample_rate: int = 24000) -> np.ndarray:
@@ -50,4 +51,7 @@ class TTSEngine:
        
        audio = audio.astype(np.float32)

+        # Apply fade-out to avoid abrupt audio cuts
+        audio = apply_fade(audio, output_sample_rate, fade_duration_ms=200)
+
        return audio