From 319881a09f03bc31532bfa24363b7ac6239d372d Mon Sep 17 00:00:00 2001 From: Raghav <1858838+quantumrag@users.noreply.github.com> Date: Sun, 19 Apr 2026 09:42:52 +0530 Subject: [PATCH] Update docs, configs, and scripts Co-Authored-By: Oz --- README.md | 11 +++- backend/config/model.env.example | 4 +- docker-compose.yml | 4 +- docs/TROUBLESHOOTING.md | 3 +- frontend/config/frontend.env.example | 4 +- scripts/install.sh | 82 ++++++++++++++++++---------- scripts/restart.sh | 53 ++++++++++++++++-- scripts/test_api.sh | 6 +- scripts/test_python_client.py | 10 +++- scripts/test_ui.sh | 2 + scripts/uninstall.sh | 40 ++++++++------ 11 files changed, 157 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 4c0b1c2..057e852 100644 --- a/README.md +++ b/README.md @@ -57,16 +57,17 @@ Detailed architecture: `docs/ARCHITECTURE.md`. cd gemma3-vllm-stack ``` -2. Create configuration files: +2. Create the main configuration file: ```bash cp .env.example .env - cp backend/config/model.env.example backend/config/model.env - cp frontend/config/frontend.env.example frontend/config/frontend.env ``` 3. Edit `.env` and set at least: - `HF_TOKEN` - `VLLM_API_KEY` (recommended even on LAN) + - `GEMMA_MODEL_ID` + + `backend/config/model.env` and `frontend/config/frontend.env` are auto-synced from `.env` by `scripts/install.sh` and `scripts/restart.sh`. 4. Install/start stack: ```bash @@ -124,3 +125,7 @@ Adjust using `.env`: ## Notes for `chat.bhatfamily.in` This repository intentionally does not terminate TLS. Bindings are plain HTTP on host ports and are designed for external reverse proxy + TLS handling (nginx/Caddy/Cloudflare Tunnel). + +Current homelab edge mapping (verified 2026-04-19): +- `https://chat.bhatfamily.in` is served on `443/tcp` by the shared Caddy edge. +- Direct host ports `3000/tcp` (UI) and `8000/tcp` (API) are currently publicly reachable; restrict firewall/NAT exposure if direct internet access is not intended. diff --git a/backend/config/model.env.example b/backend/config/model.env.example index e5d1da6..c150017 100644 --- a/backend/config/model.env.example +++ b/backend/config/model.env.example @@ -1,6 +1,8 @@ +# Generated from .env by scripts/install.sh and scripts/restart.sh +# Keep values in .env as the source of truth. HF_TOKEN=YOUR_HF_TOKEN_HERE VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE -GEMMA_MODEL_ID=google/gemma-3-1b-it +GEMMA_MODEL_ID=SET_IN_DOT_ENV BACKEND_PORT=8000 HUGGINGFACE_CACHE_DIR=/home/${USER}/.cache/huggingface VLLM_MAX_MODEL_LEN=4096 diff --git a/docker-compose.yml b/docker-compose.yml index f07fda8..760ae9c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: HSA_OVERRIDE_GFX_VERSION: ${HSA_OVERRIDE_GFX_VERSION:-11.0.0} command: - --model - - ${GEMMA_MODEL_ID:-google/gemma-3-1b-it} + - ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env} - --host - 0.0.0.0 - --port @@ -69,7 +69,7 @@ services: OPENAI_API_KEY: ${VLLM_API_KEY:-local-dev-key} ENABLE_OPENAI_API: "True" ENABLE_OLLAMA_API: "False" - DEFAULT_MODELS: ${GEMMA_MODEL_ID:-google/gemma-3-1b-it} + DEFAULT_MODELS: ${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env} GLOBAL_LOG_LEVEL: INFO WEBUI_NAME: Gemma 3 via vLLM ports: diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 3f2183b..826e94c 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -83,7 +83,8 @@ Ensure `HF_TOKEN` is set to a valid token with access to Gemma 3. ```bash grep '^GEMMA_MODEL_ID=' .env ``` -Use a valid model, e.g. `google/gemma-3-1b-it`. +Set `GEMMA_MODEL_ID` in `.env` to the model you intend to run and ensure your `HF_TOKEN` has access to it. +After editing `.env`, run `./scripts/restart.sh` so synced service env files and containers pick up the change. 3. ROCm runtime/device issues: ```bash diff --git a/frontend/config/frontend.env.example b/frontend/config/frontend.env.example index 80e0dc3..eefa184 100644 --- a/frontend/config/frontend.env.example +++ b/frontend/config/frontend.env.example @@ -1,5 +1,7 @@ +# Generated from .env by scripts/install.sh and scripts/restart.sh +# Keep values in .env as the source of truth. FRONTEND_PORT=3000 OPENAI_API_BASE_URL=http://gemma3-vllm:8000/v1 VLLM_API_KEY=YOUR_LOCAL_API_KEY_HERE -GEMMA_MODEL_ID=google/gemma-3-1b-it +GEMMA_MODEL_ID=SET_IN_DOT_ENV OPEN_WEBUI_DATA_DIR=./frontend/data/open-webui diff --git a/scripts/install.sh b/scripts/install.sh index ce35fa0..22829d8 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -4,15 +4,49 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +ENV_FILE="${REPO_ROOT}/.env" log() { - printf '[install] %s -' "$*" + printf '[install] %s\n' "$*" } err() { - printf '[install][error] %s -' "$*" >&2 + printf '[install][error] %s\n' "$*" >&2 +} + +load_env_file() { + if [[ ! -f "${ENV_FILE}" ]]; then + return + fi + + set -a + # shellcheck disable=SC1090 + source "${ENV_FILE}" + set +a + + : "${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}" +} + +sync_service_env_files() { + cat > "${REPO_ROOT}/backend/config/model.env" < "${REPO_ROOT}/frontend/config/frontend.env" </dev/null + echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename} stable" \ + | sudo tee /etc/apt/sources.list.d/docker.list >/dev/null sudo apt-get update sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin @@ -89,23 +124,17 @@ check_or_install_docker() { } prepare_env_files() { - if [[ ! -f "${REPO_ROOT}/.env" ]]; then - cp "${REPO_ROOT}/.env.example" "${REPO_ROOT}/.env" + if [[ ! -f "${ENV_FILE}" ]]; then + cp "${REPO_ROOT}/.env.example" "${ENV_FILE}" log "Created .env from .env.example." - err "IMPORTANT: edit .env and set HF_TOKEN (and optionally VLLM_API_KEY) before production use." + err "IMPORTANT: edit .env and set HF_TOKEN, VLLM_API_KEY, GEMMA_MODEL_ID, and other values before production use." fi - if [[ ! -f "${REPO_ROOT}/backend/config/model.env" ]]; then - cp "${REPO_ROOT}/backend/config/model.env.example" "${REPO_ROOT}/backend/config/model.env" - log "Created backend/config/model.env from example." - fi + mkdir -p "${REPO_ROOT}/backend/config" "${REPO_ROOT}/frontend/config" "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui" - if [[ ! -f "${REPO_ROOT}/frontend/config/frontend.env" ]]; then - cp "${REPO_ROOT}/frontend/config/frontend.env.example" "${REPO_ROOT}/frontend/config/frontend.env" - log "Created frontend/config/frontend.env from example." - fi - - mkdir -p "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui" + load_env_file + log "Using GEMMA_MODEL_ID=${GEMMA_MODEL_ID}" + sync_service_env_files } warn_if_rocm_devices_missing() { @@ -117,27 +146,24 @@ warn_if_rocm_devices_missing() { start_stack() { log "Pulling container images." - docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" pull + docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" pull log "Starting containers in detached mode." - docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" up -d + docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d --force-recreate } show_status_and_urls() { local backend_port frontend_port - backend_port="$(grep -E '^BACKEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)" - frontend_port="$(grep -E '^FRONTEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)" - backend_port="${backend_port:-8000}" - frontend_port="${frontend_port:-3000}" + backend_port="${BACKEND_PORT:-8000}" + frontend_port="${FRONTEND_PORT:-3000}" log "Backend status:" - docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps gemma3-vllm || true + docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps gemma3-vllm || true log "Frontend status:" - docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps chat-ui || true + docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps chat-ui || true - printf ' -' + printf '\n' log "API endpoint: http://localhost:${backend_port}/v1" log "Chat UI endpoint: http://localhost:${frontend_port}" log "If startup fails, inspect logs with: docker compose logs --tail=200 gemma3-vllm chat-ui" diff --git a/scripts/restart.sh b/scripts/restart.sh index 4a98b48..d0d62a6 100755 --- a/scripts/restart.sh +++ b/scripts/restart.sh @@ -7,19 +7,60 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" ENV_FILE="${REPO_ROOT}/.env" log() { - printf '[restart] %s -' "$*" + printf '[restart] %s\n' "$*" } -if [[ ! -f "${ENV_FILE}" ]]; then - ENV_FILE="${REPO_ROOT}/.env.example" -fi +err() { + printf '[restart][error] %s\n' "$*" >&2 +} + +load_env_file() { + if [[ ! -f "${ENV_FILE}" ]]; then + err ".env file not found. Copy .env.example to .env first." + exit 1 + fi + + set -a + # shellcheck disable=SC1090 + source "${ENV_FILE}" + set +a + + : "${GEMMA_MODEL_ID:?GEMMA_MODEL_ID must be set in .env}" +} + +sync_service_env_files() { + mkdir -p "${REPO_ROOT}/backend/config" "${REPO_ROOT}/frontend/config" + + cat > "${REPO_ROOT}/backend/config/model.env" < "${REPO_ROOT}/frontend/config/frontend.env" < "${payload_file}" <&2 diff --git a/scripts/test_python_client.py b/scripts/test_python_client.py index 66cfe2f..bd870ed 100755 --- a/scripts/test_python_client.py +++ b/scripts/test_python_client.py @@ -16,9 +16,12 @@ def load_dotenv(dotenv_path: Path) -> None: line = raw_line.strip() if not line or line.startswith("#") or "=" not in line: continue + if line.startswith("export "): + line = line[len("export ") :].strip() + key, value = line.split("=", 1) key = key.strip() - value = value.strip().strip('"').strip("'") + value = os.path.expandvars(value.strip().strip('"').strip("'")) os.environ.setdefault(key, value) @@ -27,7 +30,10 @@ def main() -> int: load_dotenv(repo_root / ".env") backend_port = os.getenv("BACKEND_PORT", "8000") - model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it") + model_id = os.getenv("GEMMA_MODEL_ID") + if not model_id: + print("[test_python_client][error] GEMMA_MODEL_ID is not set in .env.", file=sys.stderr) + return 1 api_key = os.getenv("VLLM_API_KEY", "EMPTY") base_url = f"http://localhost:{backend_port}/v1" diff --git a/scripts/test_ui.sh b/scripts/test_ui.sh index efb035e..a0c54c4 100755 --- a/scripts/test_ui.sh +++ b/scripts/test_ui.sh @@ -7,8 +7,10 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" ENV_FILE="${REPO_ROOT}/.env" if [[ -f "${ENV_FILE}" ]]; then + set -a # shellcheck disable=SC1090 source "${ENV_FILE}" + set +a fi FRONTEND_PORT="${FRONTEND_PORT:-3000}" diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index 736fa8e..95965b6 100755 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -7,13 +7,22 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" PURGE=0 log() { - printf '[uninstall] %s -' "$*" + printf '[uninstall] %s\n' "$*" } err() { - printf '[uninstall][error] %s -' "$*" >&2 + printf '[uninstall][error] %s\n' "$*" >&2 +} + +load_env_file() { + if [[ ! -f "${ENV_FILE}" ]]; then + return + fi + + set -a + # shellcheck disable=SC1090 + source "${ENV_FILE}" + set +a } usage() { @@ -53,6 +62,7 @@ ENV_FILE="${REPO_ROOT}/.env" if [[ ! -f "${ENV_FILE}" ]]; then ENV_FILE="${REPO_ROOT}/.env.example" fi +load_env_file log "Stopping stack and removing containers, networks, and named/anonymous volumes." docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down -v || true @@ -60,22 +70,20 @@ docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" dow if [[ ${PURGE} -eq 1 ]]; then log "Purge requested. Removing local data directories used by this stack." - huggingface_cache_dir="$(grep -E '^HUGGINGFACE_CACHE_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)" - open_webui_data_dir="$(grep -E '^OPEN_WEBUI_DATA_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)" + huggingface_cache_dir="${HUGGINGFACE_CACHE_DIR:-}" + open_webui_data_dir="${OPEN_WEBUI_DATA_DIR:-./frontend/data/open-webui}" if [[ -n "${huggingface_cache_dir}" ]]; then - # Expand potential variables such as ${USER} - evaluated_hf_dir="$(eval "printf '%s' "${huggingface_cache_dir}"")" - if [[ -d "${evaluated_hf_dir}" ]]; then - log "Removing Hugging Face cache directory: ${evaluated_hf_dir}" - rm -rf "${evaluated_hf_dir}" - else - log "Hugging Face cache directory not found: ${evaluated_hf_dir}" + if [[ "${huggingface_cache_dir}" == ./* ]]; then + huggingface_cache_dir="${REPO_ROOT}/${huggingface_cache_dir#./}" fi - fi - if [[ -z "${open_webui_data_dir}" ]]; then - open_webui_data_dir="./frontend/data/open-webui" + if [[ -d "${huggingface_cache_dir}" ]]; then + log "Removing Hugging Face cache directory: ${huggingface_cache_dir}" + rm -rf "${huggingface_cache_dir}" + else + log "Hugging Face cache directory not found: ${huggingface_cache_dir}" + fi fi if [[ "${open_webui_data_dir}" == ./* ]]; then