Initial production-ready Gemma 3 vLLM ROCm stack
Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
69
docker-compose.yml
Normal file
69
docker-compose.yml
Normal file
@ -0,0 +1,69 @@
|
||||
services:
|
||||
gemma3-vllm:
|
||||
image: vllm/vllm-openai-rocm:latest
|
||||
container_name: gemma3-vllm
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- ./backend/config/model.env
|
||||
environment:
|
||||
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
PYTORCH_ROCM_ARCH: gfx1103
|
||||
command:
|
||||
- --model
|
||||
- ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
|
||||
- --host
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
- "8000"
|
||||
- --dtype
|
||||
- float16
|
||||
- --max-model-len
|
||||
- ${VLLM_MAX_MODEL_LEN:-4096}
|
||||
- --gpu-memory-utilization
|
||||
- ${VLLM_GPU_MEMORY_UTILIZATION:-0.88}
|
||||
- --api-key
|
||||
- ${VLLM_API_KEY:-local-dev-key}
|
||||
devices:
|
||||
- /dev/kfd
|
||||
- /dev/dri
|
||||
group_add:
|
||||
- video
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
ports:
|
||||
- "${BACKEND_PORT:-8000}:8000"
|
||||
volumes:
|
||||
- ${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}:/root/.cache/huggingface
|
||||
- ./models:/models
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health >/dev/null || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 120s
|
||||
|
||||
chat-ui:
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
container_name: gemma3-chat-ui
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
gemma3-vllm:
|
||||
condition: service_started
|
||||
env_file:
|
||||
- ./frontend/config/frontend.env
|
||||
environment:
|
||||
WEBUI_AUTH: "False"
|
||||
OPENAI_API_BASE_URL: ${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1}
|
||||
OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key}
|
||||
ENABLE_OPENAI_API: "True"
|
||||
ENABLE_OLLAMA_API: "False"
|
||||
DEFAULT_MODELS: ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
|
||||
GLOBAL_LOG_LEVEL: INFO
|
||||
WEBUI_NAME: Gemma 3 via vLLM
|
||||
ports:
|
||||
- "${FRONTEND_PORT:-3000}:8080"
|
||||
volumes:
|
||||
- ${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}:/app/backend/data
|
||||
Reference in New Issue
Block a user