- Add webrtcvad dependency for real-time voice activity detection - Create audio/fade.py with fade-in/fade-out utility - Add VAD voice activation to client recording (sends audio only during speech) - Apply 200ms fade-out to TTS output to avoid abrupt audio cuts - Fix tts.py indentation error in except block
197 lines
6.4 KiB
Python
197 lines
6.4 KiB
Python
import asyncio
|
|
import websockets
|
|
import struct
|
|
import wave
|
|
import numpy as np
|
|
import webrtcvad
|
|
|
|
# WebSocket URL
|
|
WS_URL = "ws://localhost:8000/ws"
|
|
|
|
|
|
async def start_recording():
|
|
"""Send start signal (b'S')"""
|
|
async with websockets.connect(WS_URL) as ws:
|
|
await ws.send(b"S")
|
|
|
|
|
|
async def send_audio(ws, audio_data: bytes):
|
|
"""Send audio data (b'A' + raw PCM)"""
|
|
await ws.send(b"A" + audio_data)
|
|
|
|
|
|
async def reset_session(ws):
|
|
"""Reset conversation (b'R')"""
|
|
await ws.send(b"R")
|
|
|
|
|
|
async def receive_messages(ws):
|
|
"""Receive TEXT and AUDIO messages"""
|
|
while True:
|
|
try:
|
|
msg = await asyncio.wait_for(ws.recv(), timeout=30.0)
|
|
if isinstance(msg, str):
|
|
if msg.startswith("TEXT:"):
|
|
print(f"[RECognized] {msg[5:]}")
|
|
else:
|
|
print(f"[Server] {msg}")
|
|
elif isinstance(msg, bytes):
|
|
if msg[0:1] == b"O":
|
|
audio = msg[1:]
|
|
print(f"[Audio] Received {len(audio)} bytes")
|
|
# Save to file
|
|
timestamp = int(asyncio.get_running_loop().time())
|
|
filename = f"response_{timestamp}.wav"
|
|
with open(filename, "wb") as f:
|
|
with wave.open(f, "wb") as wf:
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2)
|
|
wf.setframerate(24000)
|
|
wf.writeframes(audio)
|
|
print(f"[Audio] Saved to {filename}")
|
|
except asyncio.TimeoutError:
|
|
break
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
break
|
|
|
|
|
|
async def record_and_send():
|
|
"""Record audio from microphone and send with VAD voice activation"""
|
|
import pyaudio
|
|
|
|
CHUNK = 1024
|
|
FORMAT = pyaudio.paInt16
|
|
CHANNELS = 1
|
|
RATE = 16000
|
|
VAD_MODE = 3
|
|
SILENCE_THRESHOLD = 5 # consecutive silent chunks to stop sending
|
|
|
|
vad = webrtcvad.Vad()
|
|
vad.set_mode(VAD_MODE)
|
|
|
|
p = pyaudio.PyAudio()
|
|
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
|
|
|
async with websockets.connect(WS_URL) as ws:
|
|
print("Recording (VAD active)... Press Ctrl+C to stop")
|
|
speech_active = False
|
|
silence_count = 0
|
|
try:
|
|
while True:
|
|
data = stream.read(CHUNK)
|
|
is_speech = vad.is_speech(data, RATE)
|
|
|
|
if is_speech:
|
|
speech_active = True
|
|
silence_count = 0
|
|
await send_audio(ws, data)
|
|
elif speech_active:
|
|
silence_count += 1
|
|
if silence_count < SILENCE_THRESHOLD:
|
|
await send_audio(ws, data)
|
|
else:
|
|
speech_active = False
|
|
except KeyboardInterrupt:
|
|
print("\nStopped recording")
|
|
finally:
|
|
stream.stop_stream()
|
|
stream.close()
|
|
p.terminate()
|
|
|
|
|
|
async def send_audio_file(filepath: str):
|
|
"""Read and send an audio file to the server."""
|
|
try:
|
|
with open(filepath, "rb") as f:
|
|
file_data = f.read()
|
|
except FileNotFoundError:
|
|
print(f"Error: File '{filepath}' not found")
|
|
return
|
|
|
|
print(f"Reading audio file: {filepath} ({len(file_data)} bytes)")
|
|
|
|
async with websockets.connect(WS_URL) as ws:
|
|
print("Connected. Sending audio file...")
|
|
await ws.send(b"A" + file_data)
|
|
print("File sent. Waiting for response...")
|
|
|
|
try:
|
|
while True:
|
|
msg = await asyncio.wait_for(ws.recv(), timeout=60.0)
|
|
if isinstance(msg, str):
|
|
if msg.startswith("TEXT:"):
|
|
print(f"[Recognized] {msg[5:]}")
|
|
else:
|
|
print(f"[Server] {msg}")
|
|
elif isinstance(msg, bytes):
|
|
if msg[0:1] == b"O":
|
|
audio = msg[1:]
|
|
timestamp = int(asyncio.get_running_loop().time())
|
|
filename = f"response_{timestamp}.wav"
|
|
with open(filename, "wb") as f:
|
|
with wave.open(f, "wb") as wf:
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2)
|
|
wf.setframerate(24000)
|
|
wf.writeframes(audio)
|
|
print(f"[Audio] Saved response to {filename}")
|
|
except asyncio.TimeoutError:
|
|
print("Timed out waiting for response")
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
|
|
async def client():
|
|
"""Main client loop"""
|
|
print("Audio Chat Client")
|
|
print("1. Record from microphone")
|
|
print("2. Send audio file")
|
|
choice = input("Choice (1/2): ")
|
|
|
|
if choice == "1":
|
|
import webrtcvad
|
|
vad = webrtcvad.Vad()
|
|
vad.set_mode(3)
|
|
SILENCE_THRESHOLD = 5
|
|
|
|
p = pyaudio.PyAudio()
|
|
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
|
|
async with websockets.connect(WS_URL) as ws:
|
|
print("Recording (VAD active)... Press Ctrl+C to stop")
|
|
speech_active = False
|
|
silence_count = 0
|
|
try:
|
|
receive_task = asyncio.create_task(receive_messages(ws))
|
|
while True:
|
|
data = stream.read(1024)
|
|
is_speech = vad.is_speech(data, 16000)
|
|
if is_speech:
|
|
speech_active = True
|
|
silence_count = 0
|
|
await ws.send(b"A" + data)
|
|
elif speech_active:
|
|
silence_count += 1
|
|
if silence_count < SILENCE_THRESHOLD:
|
|
await ws.send(b"A" + data)
|
|
else:
|
|
speech_active = False
|
|
except KeyboardInterrupt:
|
|
receive_task.cancel()
|
|
finally:
|
|
stream.stop_stream()
|
|
stream.close()
|
|
p.terminate()
|
|
elif choice == "2":
|
|
filepath = input("Enter audio file path: ").strip()
|
|
if filepath:
|
|
await send_audio_file(filepath)
|
|
else:
|
|
print("No file path provided")
|
|
else:
|
|
print("Invalid choice")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(client())
|