79 lines
2.3 KiB
YAML
79 lines
2.3 KiB
YAML
services:
|
|
gemma3-vllm:
|
|
image: vllm/vllm-openai-rocm:latest
|
|
container_name: gemma3-vllm
|
|
restart: unless-stopped
|
|
env_file:
|
|
- ./backend/config/model.env
|
|
environment:
|
|
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
|
HF_TOKEN: ${HF_TOKEN}
|
|
PYTORCH_ROCM_ARCH: gfx1103
|
|
HSA_OVERRIDE_GFX_VERSION: ${HSA_OVERRIDE_GFX_VERSION:-11.0.0}
|
|
command:
|
|
- --model
|
|
- ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}
|
|
- --host
|
|
- 0.0.0.0
|
|
- --port
|
|
- "8000"
|
|
- --dtype
|
|
- ${VLLM_DTYPE:-bfloat16}
|
|
- --attention-backend
|
|
- ${VLLM_ATTENTION_BACKEND:-TRITON_ATTN}
|
|
- --max-model-len
|
|
- ${VLLM_MAX_MODEL_LEN:-512}
|
|
- --max-num-batched-tokens
|
|
- ${VLLM_MAX_NUM_BATCHED_TOKENS:-256}
|
|
- --max-num-seqs
|
|
- ${VLLM_MAX_NUM_SEQS:-1}
|
|
- ${VLLM_CHUNKED_PREFILL_FLAG:---enable-chunked-prefill}
|
|
- --gpu-memory-utilization
|
|
- ${VLLM_GPU_MEMORY_UTILIZATION:-0.7}
|
|
- --enforce-eager
|
|
- --api-key
|
|
- ${VLLM_API_KEY:-local-dev-key}
|
|
devices:
|
|
- /dev/kfd
|
|
- /dev/dri
|
|
group_add:
|
|
- video
|
|
cap_add:
|
|
- SYS_PTRACE
|
|
security_opt:
|
|
- seccomp=unconfined
|
|
ports:
|
|
- "${BACKEND_PORT:-8000}:8000"
|
|
volumes:
|
|
- ${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}:/root/.cache/huggingface
|
|
- ./models:/models
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health >/dev/null || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 10
|
|
start_period: 120s
|
|
|
|
chat-ui:
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
container_name: gemma3-chat-ui
|
|
restart: unless-stopped
|
|
depends_on:
|
|
gemma3-vllm:
|
|
condition: service_started
|
|
env_file:
|
|
- ./frontend/config/frontend.env
|
|
environment:
|
|
WEBUI_AUTH: "False"
|
|
OPENAI_API_BASE_URL: ${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1}
|
|
OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key}
|
|
ENABLE_OPENAI_API: "True"
|
|
ENABLE_OLLAMA_API: "False"
|
|
DEFAULT_MODELS: ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}
|
|
GLOBAL_LOG_LEVEL: INFO
|
|
WEBUI_NAME: Gemma 3 via vLLM
|
|
ports:
|
|
- "${FRONTEND_PORT:-3000}:8080"
|
|
volumes:
|
|
- ${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}:/app/backend/data
|