services: gemma3-vllm: image: vllm/vllm-openai-rocm:latest container_name: gemma3-vllm restart: unless-stopped env_file: - ./backend/config/model.env environment: HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_TOKEN: ${HF_TOKEN} PYTORCH_ROCM_ARCH: gfx1103 command: - --model - ${GEMMA_MODEL_ID:-google/gemma-3-1b-it} - --host - 0.0.0.0 - --port - "8000" - --dtype - float16 - --max-model-len - ${VLLM_MAX_MODEL_LEN:-4096} - --gpu-memory-utilization - ${VLLM_GPU_MEMORY_UTILIZATION:-0.88} - --api-key - ${VLLM_API_KEY:-local-dev-key} devices: - /dev/kfd - /dev/dri group_add: - video cap_add: - SYS_PTRACE security_opt: - seccomp=unconfined ports: - "${BACKEND_PORT:-8000}:8000" volumes: - ${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}:/root/.cache/huggingface - ./models:/models healthcheck: test: ["CMD-SHELL", "curl -sf http://localhost:8000/health >/dev/null || exit 1"] interval: 30s timeout: 10s retries: 10 start_period: 120s chat-ui: image: ghcr.io/open-webui/open-webui:main container_name: gemma3-chat-ui restart: unless-stopped depends_on: gemma3-vllm: condition: service_started env_file: - ./frontend/config/frontend.env environment: WEBUI_AUTH: "False" OPENAI_API_BASE_URL: ${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1} OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key} ENABLE_OPENAI_API: "True" ENABLE_OLLAMA_API: "False" DEFAULT_MODELS: ${GEMMA_MODEL_ID:-google/gemma-3-1b-it} GLOBAL_LOG_LEVEL: INFO WEBUI_NAME: Gemma 3 via vLLM ports: - "${FRONTEND_PORT:-3000}:8080" volumes: - ${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}:/app/backend/data