Update docs, configs, and scripts
Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
11
README.md
11
README.md
@ -57,16 +57,17 @@ Detailed architecture: `docs/ARCHITECTURE.md`.
|
||||
cd gemma3-vllm-stack
|
||||
```
|
||||
|
||||
2. Create configuration files:
|
||||
2. Create the main configuration file:
|
||||
```bash
|
||||
cp .env.example .env
|
||||
cp backend/config/model.env.example backend/config/model.env
|
||||
cp frontend/config/frontend.env.example frontend/config/frontend.env
|
||||
```
|
||||
|
||||
3. Edit `.env` and set at least:
|
||||
- `HF_TOKEN`
|
||||
- `VLLM_API_KEY` (recommended even on LAN)
|
||||
- `GEMMA_MODEL_ID`
|
||||
|
||||
`backend/config/model.env` and `frontend/config/frontend.env` are auto-synced from `.env` by `scripts/install.sh` and `scripts/restart.sh`.
|
||||
|
||||
4. Install/start stack:
|
||||
```bash
|
||||
@ -124,3 +125,7 @@ Adjust using `.env`:
|
||||
|
||||
## Notes for `chat.bhatfamily.in`
|
||||
This repository intentionally does not terminate TLS. Bindings are plain HTTP on host ports and are designed for external reverse proxy + TLS handling (nginx/Caddy/Cloudflare Tunnel).
|
||||
|
||||
Current homelab edge mapping (verified 2026-04-19):
|
||||
- `https://chat.bhatfamily.in` is served on `443/tcp` by the shared Caddy edge.
|
||||
- Direct host ports `3000/tcp` (UI) and `8000/tcp` (API) are currently publicly reachable; restrict firewall/NAT exposure if direct internet access is not intended.
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
# Generated from .env by scripts/install.sh and scripts/restart.sh
|
||||
# Keep values in .env as the source of truth.
|
||||
HF_TOKEN=YOUR_HF_TOKEN_HERE
|
||||
VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE
|
||||
GEMMA_MODEL_ID=google/gemma-3-1b-it
|
||||
GEMMA_MODEL_ID=SET_IN_DOT_ENV
|
||||
BACKEND_PORT=8000
|
||||
HUGGINGFACE_CACHE_DIR=/home/${USER}/.cache/huggingface
|
||||
VLLM_MAX_MODEL_LEN=4096
|
||||
|
||||
@ -12,7 +12,7 @@ services:
|
||||
HSA_OVERRIDE_GFX_VERSION: ${HSA_OVERRIDE_GFX_VERSION:-11.0.0}
|
||||
command:
|
||||
- --model
|
||||
- ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
|
||||
- ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}
|
||||
- --host
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
@ -69,7 +69,7 @@ services:
|
||||
OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key}
|
||||
ENABLE_OPENAI_API: "True"
|
||||
ENABLE_OLLAMA_API: "False"
|
||||
DEFAULT_MODELS: ${GEMMA_MODEL_ID:-google/gemma-3-1b-it}
|
||||
DEFAULT_MODELS: ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}
|
||||
GLOBAL_LOG_LEVEL: INFO
|
||||
WEBUI_NAME: Gemma 3 via vLLM
|
||||
ports:
|
||||
|
||||
@ -83,7 +83,8 @@ Ensure `HF_TOKEN` is set to a valid token with access to Gemma 3.
|
||||
```bash
|
||||
grep '^GEMMA_MODEL_ID=' .env
|
||||
```
|
||||
Use a valid model, e.g. `google/gemma-3-1b-it`.
|
||||
Set `GEMMA_MODEL_ID` in `.env` to the model you intend to run and ensure your `HF_TOKEN` has access to it.
|
||||
After editing `.env`, run `./scripts/restart.sh` so synced service env files and containers pick up the change.
|
||||
|
||||
3. ROCm runtime/device issues:
|
||||
```bash
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
# Generated from .env by scripts/install.sh and scripts/restart.sh
|
||||
# Keep values in .env as the source of truth.
|
||||
FRONTEND_PORT=3000
|
||||
OPENAI_API_BASE_URL=http://gemma3-vllm:8000/v1
|
||||
VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE
|
||||
GEMMA_MODEL_ID=google/gemma-3-1b-it
|
||||
GEMMA_MODEL_ID=SET_IN_DOT_ENV
|
||||
OPEN_WEBUI_DATA_DIR=./frontend/data/open-webui
|
||||
|
||||
@ -4,15 +4,49 @@ set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
log() {
|
||||
printf '[install] %s
|
||||
' "$*"
|
||||
printf '[install] %s\n' "$*"
|
||||
}
|
||||
|
||||
err() {
|
||||
printf '[install][error] %s
|
||||
' "$*" >&2
|
||||
printf '[install][error] %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
load_env_file() {
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
set +a
|
||||
|
||||
: "${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}"
|
||||
}
|
||||
|
||||
sync_service_env_files() {
|
||||
cat > "${REPO_ROOT}/backend/config/model.env" <<EOF
|
||||
HF_TOKEN=${HF_TOKEN:-YOUR_HF_TOKEN_HERE}
|
||||
VLLM_API_KEY=${VLLM_API_KEY:-YOUR_LOCAL_API_KEY_HERE}
|
||||
GEMMA_MODEL_ID=${GEMMA_MODEL_ID}
|
||||
BACKEND_PORT=${BACKEND_PORT:-8000}
|
||||
HUGGINGFACE_CACHE_DIR=${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}
|
||||
VLLM_MAX_MODEL_LEN=${VLLM_MAX_MODEL_LEN:-512}
|
||||
VLLM_GPU_MEMORY_UTILIZATION=${VLLM_GPU_MEMORY_UTILIZATION:-0.7}
|
||||
EOF
|
||||
|
||||
cat > "${REPO_ROOT}/frontend/config/frontend.env" <<EOF
|
||||
FRONTEND_PORT=${FRONTEND_PORT:-3000}
|
||||
OPENAI_API_BASE_URL=${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1}
|
||||
VLLM_API_KEY=${VLLM_API_KEY:-YOUR_LOCAL_API_KEY_HERE}
|
||||
GEMMA_MODEL_ID=${GEMMA_MODEL_ID}
|
||||
OPEN_WEBUI_DATA_DIR=${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}
|
||||
EOF
|
||||
|
||||
log "Synced backend/config/model.env and frontend/config/frontend.env from .env."
|
||||
}
|
||||
|
||||
require_linux() {
|
||||
@ -39,7 +73,8 @@ install_docker_ubuntu() {
|
||||
local codename
|
||||
codename="${VERSION_CODENAME:-jammy}"
|
||||
|
||||
echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename} stable" | sudo tee /etc/apt/sources.list.d/docker.list >/dev/null
|
||||
echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename} stable" \
|
||||
| sudo tee /etc/apt/sources.list.d/docker.list >/dev/null
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
@ -89,23 +124,17 @@ check_or_install_docker() {
|
||||
}
|
||||
|
||||
prepare_env_files() {
|
||||
if [[ ! -f "${REPO_ROOT}/.env" ]]; then
|
||||
cp "${REPO_ROOT}/.env.example" "${REPO_ROOT}/.env"
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
cp "${REPO_ROOT}/.env.example" "${ENV_FILE}"
|
||||
log "Created .env from .env.example."
|
||||
err "IMPORTANT: edit .env and set HF_TOKEN (and optionally VLLM_API_KEY) before production use."
|
||||
err "IMPORTANT: edit .env and set HF_TOKEN, VLLM_API_KEY, GEMMA_MODEL_ID, and other values before production use."
|
||||
fi
|
||||
|
||||
if [[ ! -f "${REPO_ROOT}/backend/config/model.env" ]]; then
|
||||
cp "${REPO_ROOT}/backend/config/model.env.example" "${REPO_ROOT}/backend/config/model.env"
|
||||
log "Created backend/config/model.env from example."
|
||||
fi
|
||||
mkdir -p "${REPO_ROOT}/backend/config" "${REPO_ROOT}/frontend/config" "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui"
|
||||
|
||||
if [[ ! -f "${REPO_ROOT}/frontend/config/frontend.env" ]]; then
|
||||
cp "${REPO_ROOT}/frontend/config/frontend.env.example" "${REPO_ROOT}/frontend/config/frontend.env"
|
||||
log "Created frontend/config/frontend.env from example."
|
||||
fi
|
||||
|
||||
mkdir -p "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui"
|
||||
load_env_file
|
||||
log "Using GEMMA_MODEL_ID=${GEMMA_MODEL_ID}"
|
||||
sync_service_env_files
|
||||
}
|
||||
|
||||
warn_if_rocm_devices_missing() {
|
||||
@ -117,27 +146,24 @@ warn_if_rocm_devices_missing() {
|
||||
|
||||
start_stack() {
|
||||
log "Pulling container images."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" pull
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" pull
|
||||
|
||||
log "Starting containers in detached mode."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" up -d
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d --force-recreate
|
||||
}
|
||||
|
||||
show_status_and_urls() {
|
||||
local backend_port frontend_port
|
||||
backend_port="$(grep -E '^BACKEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
|
||||
frontend_port="$(grep -E '^FRONTEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
|
||||
backend_port="${backend_port:-8000}"
|
||||
frontend_port="${frontend_port:-3000}"
|
||||
backend_port="${BACKEND_PORT:-8000}"
|
||||
frontend_port="${FRONTEND_PORT:-3000}"
|
||||
|
||||
log "Backend status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps gemma3-vllm || true
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps gemma3-vllm || true
|
||||
|
||||
log "Frontend status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps chat-ui || true
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps chat-ui || true
|
||||
|
||||
printf '
|
||||
'
|
||||
printf '\n'
|
||||
log "API endpoint: http://localhost:${backend_port}/v1"
|
||||
log "Chat UI endpoint: http://localhost:${frontend_port}"
|
||||
log "If startup fails, inspect logs with: docker compose logs --tail=200 gemma3-vllm chat-ui"
|
||||
|
||||
@ -7,19 +7,60 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
log() {
|
||||
printf '[restart] %s
|
||||
' "$*"
|
||||
printf '[restart] %s\n' "$*"
|
||||
}
|
||||
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
ENV_FILE="${REPO_ROOT}/.env.example"
|
||||
fi
|
||||
err() {
|
||||
printf '[restart][error] %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
load_env_file() {
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
err ".env file not found. Copy .env.example to .env first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
set +a
|
||||
|
||||
: "${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}"
|
||||
}
|
||||
|
||||
sync_service_env_files() {
|
||||
mkdir -p "${REPO_ROOT}/backend/config" "${REPO_ROOT}/frontend/config"
|
||||
|
||||
cat > "${REPO_ROOT}/backend/config/model.env" <<EOF
|
||||
HF_TOKEN=${HF_TOKEN:-YOUR_HF_TOKEN_HERE}
|
||||
VLLM_API_KEY=${VLLM_API_KEY:-YOUR_LOCAL_API_KEY_HERE}
|
||||
GEMMA_MODEL_ID=${GEMMA_MODEL_ID}
|
||||
BACKEND_PORT=${BACKEND_PORT:-8000}
|
||||
HUGGINGFACE_CACHE_DIR=${HUGGINGFACE_CACHE_DIR:-/home/${USER}/.cache/huggingface}
|
||||
VLLM_MAX_MODEL_LEN=${VLLM_MAX_MODEL_LEN:-512}
|
||||
VLLM_GPU_MEMORY_UTILIZATION=${VLLM_GPU_MEMORY_UTILIZATION:-0.7}
|
||||
EOF
|
||||
|
||||
cat > "${REPO_ROOT}/frontend/config/frontend.env" <<EOF
|
||||
FRONTEND_PORT=${FRONTEND_PORT:-3000}
|
||||
OPENAI_API_BASE_URL=${OPENAI_API_BASE_URL:-http://gemma3-vllm:8000/v1}
|
||||
VLLM_API_KEY=${VLLM_API_KEY:-YOUR_LOCAL_API_KEY_HERE}
|
||||
GEMMA_MODEL_ID=${GEMMA_MODEL_ID}
|
||||
OPEN_WEBUI_DATA_DIR=${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}
|
||||
EOF
|
||||
|
||||
log "Synced backend/config/model.env and frontend/config/frontend.env from .env."
|
||||
}
|
||||
|
||||
load_env_file
|
||||
log "Using GEMMA_MODEL_ID=${GEMMA_MODEL_ID}"
|
||||
sync_service_env_files
|
||||
|
||||
log "Stopping stack."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down
|
||||
|
||||
log "Starting stack."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d --force-recreate
|
||||
|
||||
log "Current status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps
|
||||
|
||||
@ -11,11 +11,13 @@ if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
set +a
|
||||
|
||||
BACKEND_PORT="${BACKEND_PORT:-8000}"
|
||||
GEMMA_MODEL_ID="${GEMMA_MODEL_ID:-google/gemma-3-1b-it}"
|
||||
GEMMA_MODEL_ID="${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}"
|
||||
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
|
||||
API_URL="http://localhost:${BACKEND_PORT}/v1/chat/completions"
|
||||
|
||||
@ -35,7 +37,7 @@ cat > "${payload_file}" <<JSON
|
||||
}
|
||||
JSON
|
||||
|
||||
http_status="$(curl -sS -o "${response_file}" -w '%{http_code}' -H "Content-Type: application/json" -H "Authorization: Bearer ${VLLM_API_KEY}" -X POST "${API_URL}" --data @"${payload_file}")"
|
||||
http_status="$(curl -sS -o "${response_file}" -w '%{http_code}' -H "Content-Type: application/json" -H "Authorization: Bearer ${VLLM_API_KEY}" -X POST "${API_URL}" --data @"${payload_file}")"
|
||||
|
||||
if [[ ! "${http_status}" =~ ^2 ]]; then
|
||||
echo "[test_api][error] API returned HTTP ${http_status}" >&2
|
||||
|
||||
@ -16,9 +16,12 @@ def load_dotenv(dotenv_path: Path) -> None:
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
if line.startswith("export "):
|
||||
line = line[len("export ") :].strip()
|
||||
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
value = os.path.expandvars(value.strip().strip('"').strip("'"))
|
||||
os.environ.setdefault(key, value)
|
||||
|
||||
|
||||
@ -27,7 +30,10 @@ def main() -> int:
|
||||
load_dotenv(repo_root / ".env")
|
||||
|
||||
backend_port = os.getenv("BACKEND_PORT", "8000")
|
||||
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
|
||||
model_id = os.getenv("GEMMA_MODEL_ID")
|
||||
if not model_id:
|
||||
print("[test_python_client][error] GEMMA_MODEL_ID is not set in .env.", file=sys.stderr)
|
||||
return 1
|
||||
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
|
||||
base_url = f"http://localhost:{backend_port}/v1"
|
||||
|
||||
|
||||
@ -7,8 +7,10 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
if [[ -f "${ENV_FILE}" ]]; then
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
set +a
|
||||
fi
|
||||
|
||||
FRONTEND_PORT="${FRONTEND_PORT:-3000}"
|
||||
|
||||
@ -7,13 +7,22 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
PURGE=0
|
||||
|
||||
log() {
|
||||
printf '[uninstall] %s
|
||||
' "$*"
|
||||
printf '[uninstall] %s\n' "$*"
|
||||
}
|
||||
|
||||
err() {
|
||||
printf '[uninstall][error] %s
|
||||
' "$*" >&2
|
||||
printf '[uninstall][error] %s\n' "$*" >&2
|
||||
}
|
||||
|
||||
load_env_file() {
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
set +a
|
||||
}
|
||||
|
||||
usage() {
|
||||
@ -53,6 +62,7 @@ ENV_FILE="${REPO_ROOT}/.env"
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
ENV_FILE="${REPO_ROOT}/.env.example"
|
||||
fi
|
||||
load_env_file
|
||||
|
||||
log "Stopping stack and removing containers, networks, and named/anonymous volumes."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down -v || true
|
||||
@ -60,22 +70,20 @@ docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" dow
|
||||
if [[ ${PURGE} -eq 1 ]]; then
|
||||
log "Purge requested. Removing local data directories used by this stack."
|
||||
|
||||
huggingface_cache_dir="$(grep -E '^HUGGINGFACE_CACHE_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
|
||||
open_webui_data_dir="$(grep -E '^OPEN_WEBUI_DATA_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
|
||||
huggingface_cache_dir="${HUGGINGFACE_CACHE_DIR:-}"
|
||||
open_webui_data_dir="${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}"
|
||||
|
||||
if [[ -n "${huggingface_cache_dir}" ]]; then
|
||||
# Expand potential variables such as ${USER}
|
||||
evaluated_hf_dir="$(eval "printf '%s' "${huggingface_cache_dir}"")"
|
||||
if [[ -d "${evaluated_hf_dir}" ]]; then
|
||||
log "Removing Hugging Face cache directory: ${evaluated_hf_dir}"
|
||||
rm -rf "${evaluated_hf_dir}"
|
||||
else
|
||||
log "Hugging Face cache directory not found: ${evaluated_hf_dir}"
|
||||
if [[ "${huggingface_cache_dir}" == ./* ]]; then
|
||||
huggingface_cache_dir="${REPO_ROOT}/${huggingface_cache_dir#./}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${open_webui_data_dir}" ]]; then
|
||||
open_webui_data_dir="./frontend/data/open-webui"
|
||||
if [[ -d "${huggingface_cache_dir}" ]]; then
|
||||
log "Removing Hugging Face cache directory: ${huggingface_cache_dir}"
|
||||
rm -rf "${huggingface_cache_dir}"
|
||||
else
|
||||
log "Hugging Face cache directory not found: ${huggingface_cache_dir}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${open_webui_data_dir}" == ./* ]]; then
|
||||
|
||||
Reference in New Issue
Block a user