17 lines
486 B
Plaintext
17 lines
486 B
Plaintext
HF_TOKEN=YOUR_HF_TOKEN_HERE
|
|
VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE
|
|
GEMMA_MODEL_ID=google/gemma-3-1b-it
|
|
BACKEND_PORT=8000
|
|
FRONTEND_PORT=3000
|
|
HUGGINGFACE_CACHE_DIR=/home/${USER}/.cache/huggingface
|
|
OPEN_WEBUI_DATA_DIR=./frontend/data/open-webui
|
|
VLLM_MAX_MODEL_LEN=512
|
|
VLLM_MAX_NUM_BATCHED_TOKENS=256
|
|
VLLM_CHUNKED_PREFILL_FLAG=--enable-chunked-prefill
|
|
VLLM_MAX_NUM_SEQS=1
|
|
VLLM_GPU_MEMORY_UTILIZATION=0.7
|
|
VLLM_DTYPE=bfloat16
|
|
|
|
VLLM_ATTENTION_BACKEND=TRITON_ATTN
|
|
HSA_OVERRIDE_GFX_VERSION=11.0.0
|