Add voice activation (VAD) and audio fade
- Add webrtcvad dependency for real-time voice activity detection - Create audio/fade.py with fade-in/fade-out utility - Add VAD voice activation to client recording (sends audio only during speech) - Apply 200ms fade-out to TTS output to avoid abrupt audio cuts - Fix tts.py indentation error in except block
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from transformers import pipeline
|
||||
from config import Config
|
||||
import numpy as np
|
||||
from audio.fade import apply_fade
|
||||
|
||||
|
||||
class TTSEngine:
|
||||
@@ -16,11 +17,11 @@ class TTSEngine:
|
||||
device=0 if __import__("torch").cuda.is_available() else -1,
|
||||
)
|
||||
except Exception:
|
||||
self.tts_pipeline = pipeline(
|
||||
"text-to-speech",
|
||||
model=self._tts_model,
|
||||
device=-1,
|
||||
)
|
||||
self.tts_pipeline = pipeline(
|
||||
"text-to-speech",
|
||||
model=self._tts_model,
|
||||
device=-1,
|
||||
)
|
||||
self.tts_pipeline.start()
|
||||
|
||||
def synthesize(self, text: str, output_sample_rate: int = 24000) -> np.ndarray:
|
||||
@@ -50,4 +51,7 @@ class TTSEngine:
|
||||
|
||||
audio = audio.astype(np.float32)
|
||||
|
||||
# Apply fade-out to avoid abrupt audio cuts
|
||||
audio = apply_fade(audio, output_sample_rate, fade_duration_ms=200)
|
||||
|
||||
return audio
|
||||
|
||||
Reference in New Issue
Block a user