Initial commit: audio-chat with fixes

- Created AGENTS.md with architecture documentation - Fixed race conditions and async patterns - Added conversation history to LLM prompts - Fixed TTS audio shape handling - Added buffer limits and graceful shutdown - Fixed client.py with file sending support - Removed duplicate requirements - Added .gitignore
2026-05-01 13:01:06 +00:00
commit 1edfd5d62f
13 changed files with 1286 additions and 0 deletions
--- a/client.py
+++ b/client.py
@@ -0,0 +1,160 @@
+import asyncio
+import websockets
+import struct
+import wave
+import numpy as np
+
+# WebSocket URL
+WS_URL = "ws://localhost:8000/ws"
+
+
+async def start_recording():
+    """Send start signal (b'S')"""
+    async with websockets.connect(WS_URL) as ws:
+        await ws.send(b"S")
+
+
+async def send_audio(ws, audio_data: bytes):
+    """Send audio data (b'A' + raw PCM)"""
+    await ws.send(b"A" + audio_data)
+
+
+async def reset_session(ws):
+    """Reset conversation (b'R')"""
+    await ws.send(b"R")
+
+
+async def receive_messages(ws):
+    """Receive TEXT and AUDIO messages"""
+    while True:
+        try:
+            msg = await asyncio.wait_for(ws.recv(), timeout=30.0)
+            if isinstance(msg, str):
+                if msg.startswith("TEXT:"):
+                    print(f"[RECognized] {msg[5:]}")
+                else:
+                    print(f"[Server] {msg}")
+            elif isinstance(msg, bytes):
+                if msg[0:1] == b"O":
+                    audio = msg[1:]
+                    print(f"[Audio] Received {len(audio)} bytes")
+                    # Save to file
+                    timestamp = int(asyncio.get_running_loop().time())
+                    filename = f"response_{timestamp}.wav"
+                    with open(filename, "wb") as f:
+                        with wave.open(f, "wb") as wf:
+                            wf.setnchannels(1)
+                            wf.setsampwidth(2)
+                            wf.setframerate(24000)
+                            wf.writeframes(audio)
+                    print(f"[Audio] Saved to {filename}")
+        except asyncio.TimeoutError:
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+            break
+
+
+async def record_and_send():
+    """Record audio from microphone and send"""
+    import pyaudio
+
+    CHUNK = 1024
+    FORMAT = pyaudio.paInt16
+    CHANNELS = 1
+    RATE = 16000
+
+    p = pyaudio.PyAudio()
+    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+
+    async with websockets.connect(WS_URL) as ws:
+        print("Recording... Press Ctrl+C to stop")
+        try:
+            while True:
+                data = stream.read(CHUNK)
+                await send_audio(ws, data)
+        except KeyboardInterrupt:
+            print("\nStopped recording")
+        finally:
+            stream.stop_stream()
+            stream.close()
+            p.terminate()
+
+
+async def send_audio_file(filepath: str):
+    """Read and send an audio file to the server."""
+    try:
+        with open(filepath, "rb") as f:
+            file_data = f.read()
+    except FileNotFoundError:
+        print(f"Error: File '{filepath}' not found")
+        return
+
+    print(f"Reading audio file: {filepath} ({len(file_data)} bytes)")
+    
+    async with websockets.connect(WS_URL) as ws:
+        print("Connected. Sending audio file...")
+        await ws.send(b"A" + file_data)
+        print("File sent. Waiting for response...")
+        
+        try:
+            while True:
+                msg = await asyncio.wait_for(ws.recv(), timeout=60.0)
+                if isinstance(msg, str):
+                    if msg.startswith("TEXT:"):
+                        print(f"[Recognized] {msg[5:]}")
+                    else:
+                        print(f"[Server] {msg}")
+                elif isinstance(msg, bytes):
+                    if msg[0:1] == b"O":
+                        audio = msg[1:]
+                        timestamp = int(asyncio.get_running_loop().time())
+                        filename = f"response_{timestamp}.wav"
+                        with open(filename, "wb") as f:
+                            with wave.open(f, "wb") as wf:
+                                wf.setnchannels(1)
+                                wf.setsampwidth(2)
+                                wf.setframerate(24000)
+                                wf.writeframes(audio)
+                        print(f"[Audio] Saved response to {filename}")
+        except asyncio.TimeoutError:
+            print("Timed out waiting for response")
+        except Exception as e:
+            print(f"Error: {e}")
+
+
+async def client():
+    """Main client loop"""
+    print("Audio Chat Client")
+    print("1. Record from microphone")
+    print("2. Send audio file")
+    choice = input("Choice (1/2): ")
+
+    if choice == "1":
+        p = pyaudio.PyAudio()
+        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
+        async with websockets.connect(WS_URL) as ws:
+            print("Recording... Press Ctrl+C to stop")
+            try:
+                receive_task = asyncio.create_task(receive_messages(ws))
+                while True:
+                    data = stream.read(1024)
+                    await ws.send(b"A" + data)
+            except KeyboardInterrupt:
+                receive_task.cancel()
+            finally:
+                stream.stop_stream()
+                stream.close()
+                p.terminate()
+    elif choice == "2":
+        filepath = input("Enter audio file path: ").strip()
+        if filepath:
+            await send_audio_file(filepath)
+        else:
+            print("No file path provided")
+    else:
+        print("Invalid choice")
+
+
+if __name__ == "__main__":
+    asyncio.run(client())