HF_TOKEN=YOUR_HF_TOKEN_HERE VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE GEMMA_MODEL_ID=google/gemma-3-1b-it BACKEND_PORT=8000 FRONTEND_PORT=3000 HUGGINGFACE_CACHE_DIR=/home/${USER}/.cache/huggingface OPEN_WEBUI_DATA_DIR=./frontend/data/open-webui VLLM_MAX_MODEL_LEN=512 VLLM_MAX_NUM_BATCHED_TOKENS=256 VLLM_CHUNKED_PREFILL_FLAG=--enable-chunked-prefill VLLM_MAX_NUM_SEQS=1 VLLM_GPU_MEMORY_UTILIZATION=0.7 VLLM_DTYPE=bfloat16 VLLM_ATTENTION_BACKEND=TRITON_ATTN HSA_OVERRIDE_GFX_VERSION=11.0.0