ARG BASE_IMAGE=nvidia/cuda:12.2.2-runtime-ubuntu22.04

FROM ${BASE_IMAGE}

# Set environment variables
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    DEBIAN_FRONTEND=noninteractive \
    MODEL_CACHE_DIR=/models

# Install Python and system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-venv \
    python3-pip \
    build-essential \
    git \
    curl \
    wget \
    libsndfile1 \
    portaudio19-dev \
    ffmpeg \
    && rm -rf /var/lib/apt/lists/*

# Set up Python virtual environment
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Create model cache directory
RUN mkdir -p ${MODEL_CACHE_DIR}

# Set environment variables for models
ENV TRANSFORMERS_CACHE=${MODEL_CACHE_DIR} \
    HF_HOME=${MODEL_CACHE_DIR} \
    PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# Expose WebSocket port
EXPOSE 8000

# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]