Initial production-ready Gemma 3 vLLM ROCm stack
Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
4
backend/Dockerfile
Normal file
4
backend/Dockerfile
Normal file
@ -0,0 +1,4 @@
|
||||
# Optional backend Dockerfile.
|
||||
# This stack uses the official vLLM ROCm image directly from docker-compose.yml.
|
||||
# Keep this file for future customizations.
|
||||
FROM vllm/vllm-openai-rocm:latest
|
||||
7
backend/config/model.env.example
Normal file
7
backend/config/model.env.example
Normal file
@ -0,0 +1,7 @@
|
||||
HF_TOKEN=YOUR_HF_TOKEN_HERE
|
||||
VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE
|
||||
GEMMA_MODEL_ID=google/gemma-3-1b-it
|
||||
BACKEND_PORT=8000
|
||||
HUGGINGFACE_CACHE_DIR=/home/${USER}/.cache/huggingface
|
||||
VLLM_MAX_MODEL_LEN=4096
|
||||
VLLM_GPU_MEMORY_UTILIZATION=0.88
|
||||
Reference in New Issue
Block a user