audio-chat/client.py

import asyncio
import websockets
import struct
import wave
import numpy as np
import webrtcvad

# WebSocket URL
WS_URL = "ws://localhost:8000/ws"


async def start_recording():
    """Send start signal (b'S')"""
    async with websockets.connect(WS_URL) as ws:
        await ws.send(b"S")


async def send_audio(ws, audio_data: bytes):
    """Send audio data (b'A' + raw PCM)"""
    await ws.send(b"A" + audio_data)


async def reset_session(ws):
    """Reset conversation (b'R')"""
    await ws.send(b"R")


async def receive_messages(ws):
    """Receive TEXT and AUDIO messages"""
    while True:
        try:
            msg = await asyncio.wait_for(ws.recv(), timeout=30.0)
            if isinstance(msg, str):
                if msg.startswith("TEXT:"):
                    print(f"[RECognized] {msg[5:]}")
                else:
                    print(f"[Server] {msg}")
            elif isinstance(msg, bytes):
                if msg[0:1] == b"O":
                    audio = msg[1:]
                    print(f"[Audio] Received {len(audio)} bytes")
                    # Save to file
                    timestamp = int(asyncio.get_running_loop().time())
                    filename = f"response_{timestamp}.wav"
                    with open(filename, "wb") as f:
                        with wave.open(f, "wb") as wf:
                            wf.setnchannels(1)
                            wf.setsampwidth(2)
                            wf.setframerate(24000)
                            wf.writeframes(audio)
                    print(f"[Audio] Saved to {filename}")
        except asyncio.TimeoutError:
            break
        except Exception as e:
            print(f"Error: {e}")
            break


async def record_and_send():
    """Record audio from microphone and send with VAD voice activation"""
    import pyaudio

    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    VAD_MODE = 3
    SILENCE_THRESHOLD = 5  # consecutive silent chunks to stop sending

    vad = webrtcvad.Vad()
    vad.set_mode(VAD_MODE)

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)

    async with websockets.connect(WS_URL) as ws:
        print("Recording (VAD active)... Press Ctrl+C to stop")
        speech_active = False
        silence_count = 0
        try:
            while True:
                data = stream.read(CHUNK)
                is_speech = vad.is_speech(data, RATE)

                if is_speech:
                    speech_active = True
                    silence_count = 0
                    await send_audio(ws, data)
                elif speech_active:
                    silence_count += 1
                    if silence_count < SILENCE_THRESHOLD:
                        await send_audio(ws, data)
                    else:
                        speech_active = False
        except KeyboardInterrupt:
            print("\nStopped recording")
        finally:
            stream.stop_stream()
            stream.close()
            p.terminate()


async def send_audio_file(filepath: str):
    """Read and send an audio file to the server."""
    try:
        with open(filepath, "rb") as f:
            file_data = f.read()
    except FileNotFoundError:
        print(f"Error: File '{filepath}' not found")
        return

    print(f"Reading audio file: {filepath} ({len(file_data)} bytes)")

    async with websockets.connect(WS_URL) as ws:
        print("Connected. Sending audio file...")
        await ws.send(b"A" + file_data)
        print("File sent. Waiting for response...")

        try:
            while True:
                msg = await asyncio.wait_for(ws.recv(), timeout=60.0)
                if isinstance(msg, str):
                    if msg.startswith("TEXT:"):
                        print(f"[Recognized] {msg[5:]}")
                    else:
                        print(f"[Server] {msg}")
                elif isinstance(msg, bytes):
                    if msg[0:1] == b"O":
                        audio = msg[1:]
                        timestamp = int(asyncio.get_running_loop().time())
                        filename = f"response_{timestamp}.wav"
                        with open(filename, "wb") as f:
                            with wave.open(f, "wb") as wf:
                                wf.setnchannels(1)
                                wf.setsampwidth(2)
                                wf.setframerate(24000)
                                wf.writeframes(audio)
                        print(f"[Audio] Saved response to {filename}")
        except asyncio.TimeoutError:
            print("Timed out waiting for response")
        except Exception as e:
            print(f"Error: {e}")


async def client():
    """Main client loop"""
    print("Audio Chat Client")
    print("1. Record from microphone")
    print("2. Send audio file")
    choice = input("Choice (1/2): ")

    if choice == "1":
        import webrtcvad
        vad = webrtcvad.Vad()
        vad.set_mode(3)
        SILENCE_THRESHOLD = 5

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
        async with websockets.connect(WS_URL) as ws:
            print("Recording (VAD active)... Press Ctrl+C to stop")
            speech_active = False
            silence_count = 0
            try:
                receive_task = asyncio.create_task(receive_messages(ws))
                while True:
                    data = stream.read(1024)
                    is_speech = vad.is_speech(data, 16000)
                    if is_speech:
                        speech_active = True
                        silence_count = 0
                        await ws.send(b"A" + data)
                    elif speech_active:
                        silence_count += 1
                        if silence_count < SILENCE_THRESHOLD:
                            await ws.send(b"A" + data)
                        else:
                            speech_active = False
            except KeyboardInterrupt:
                receive_task.cancel()
            finally:
                stream.stop_stream()
                stream.close()
                p.terminate()
    elif choice == "2":
        filepath = input("Enter audio file path: ").strip()
        if filepath:
            await send_audio_file(filepath)
        else:
            print("No file path provided")
    else:
        print("Invalid choice")


if __name__ == "__main__":
    asyncio.run(client())