Add voice activation (VAD) and audio fade

- Add webrtcvad dependency for real-time voice activity detection
- Create audio/fade.py with fade-in/fade-out utility
- Add VAD voice activation to client recording (sends audio only during speech)
- Apply 200ms fade-out to TTS output to avoid abrupt audio cuts
- Fix tts.py indentation error in except block
This commit is contained in:
2026-05-01 13:14:31 +00:00
parent 7b023cc698
commit e2d3cbe783
4 changed files with 85 additions and 10 deletions

View File

@@ -1,6 +1,7 @@
from transformers import pipeline
from config import Config
import numpy as np
from audio.fade import apply_fade
class TTSEngine:
@@ -16,11 +17,11 @@ class TTSEngine:
device=0 if __import__("torch").cuda.is_available() else -1,
)
except Exception:
self.tts_pipeline = pipeline(
"text-to-speech",
model=self._tts_model,
device=-1,
)
self.tts_pipeline = pipeline(
"text-to-speech",
model=self._tts_model,
device=-1,
)
self.tts_pipeline.start()
def synthesize(self, text: str, output_sample_rate: int = 24000) -> np.ndarray:
@@ -50,4 +51,7 @@ class TTSEngine:
audio = audio.astype(np.float32)
# Apply fade-out to avoid abrupt audio cuts
audio = apply_fade(audio, output_sample_rate, fade_duration_ms=200)
return audio