- Add webrtcvad dependency for real-time voice activity detection - Create audio/fade.py with fade-in/fade-out utility - Add VAD voice activation to client recording (sends audio only during speech) - Apply 200ms fade-out to TTS output to avoid abrupt audio cuts - Fix tts.py indentation error in except block
35 lines
857 B
Python
35 lines
857 B
Python
import numpy as np
|
|
|
|
|
|
def apply_fade(audio: np.ndarray, sample_rate: int = 24000, fade_duration_ms: int = 300) -> np.ndarray:
|
|
"""Apply fade-in and fade-out to audio array.
|
|
|
|
Args:
|
|
audio: numpy array of audio samples
|
|
sample_rate: audio sample rate in Hz
|
|
fade_duration_ms: fade duration in milliseconds
|
|
|
|
Returns:
|
|
Audio array with fade applied
|
|
"""
|
|
if len(audio) == 0:
|
|
return audio
|
|
|
|
fade_samples = int(sample_rate * fade_duration_ms / 1000)
|
|
fade_samples = min(fade_samples, len(audio) // 4)
|
|
|
|
if fade_samples <= 0:
|
|
return audio
|
|
|
|
# Create fade envelope
|
|
fade_in = np.linspace(0, 1, fade_samples)
|
|
fade_out = np.linspace(1, 0, fade_samples)
|
|
|
|
# Apply fade-in
|
|
audio[:fade_samples] *= fade_in
|
|
|
|
# Apply fade-out
|
|
audio[-fade_samples:] *= fade_out
|
|
|
|
return audio
|