Files
gemma3-vllm-stack/docker-compose.yml
2026-04-18 22:53:46 +05:30

70 lines
1.9 KiB
YAML

services:
gemma3-vllm:
image: vllm/vllm-openai-rocm:latest
container_name: gemma3-vllm
restart: unless-stopped
env_file:
- ./backend/config/model.env
environment:
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HF_TOKEN}
PYTORCH_ROCM_ARCH: gfx1103
command:
- --model
- ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
- --host
- 0.0.0.0
- --port
- "8000"
- --dtype
- float16
- --max-model-len
- ${VLLM_MAX_MODEL_LEN:-4096}
- --gpu-memory-utilization
- ${VLLM_GPU_MEMORY_UTILIZATION:-0.88}
- --api-key
- ${VLLM_API_KEY:-local-dev-key}
devices:
- /dev/kfd
- /dev/dri
group_add:
- video
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
ports:
- "${BACKEND_PORT:-8000}:8000"
volumes:
- ${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}:/root/.cache/huggingface
- ./models:/models
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health >/dev/null || exit 1"]
interval: 30s
timeout: 10s
retries: 10
start_period: 120s
chat-ui:
image: ghcr.io/open-webui/open-webui:main
container_name: gemma3-chat-ui
restart: unless-stopped
depends_on:
gemma3-vllm:
condition: service_started
env_file:
- ./frontend/config/frontend.env
environment:
WEBUI_AUTH: "False"
OPENAI_API_BASE_URL: ${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1}
OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key}
ENABLE_OPENAI_API: "True"
ENABLE_OLLAMA_API: "False"
DEFAULT_MODELS: ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
GLOBAL_LOG_LEVEL: INFO
WEBUI_NAME: Gemma 3 via vLLM
ports:
- "${FRONTEND_PORT:-3000}:8080"
volumes:
- ${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}:/app/backend/data