Initial production-ready Gemma 3 vLLM ROCm stack

Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
Raghav
2026-04-18 22:53:38 +05:30
commit ef8537e923
18 changed files with 988 additions and 0 deletions

155
scripts/install.sh Executable file
View File

@ -0,0 +1,155 @@
#!/usr/bin/env bash
# Installs prerequisites (if needed), prepares config files, and starts Gemma 3 + vLLM + chat UI stack.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
log() {
printf '[install] %s
' "$*"
}
err() {
printf '[install][error] %s
' "$*" >&2
}
require_linux() {
if [[ "$(uname -s)" != "Linux" ]]; then
err "This script supports Linux only."
exit 1
fi
}
install_docker_ubuntu() {
log "Installing Docker Engine and Compose plugin using official Docker apt repository."
sudo apt-get update
sudo apt-get install -y ca-certificates curl gnupg
sudo install -m 0755 -d /etc/apt/keyrings
if [[ ! -f /etc/apt/keyrings/docker.gpg ]]; then
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
sudo chmod a+r /etc/apt/keyrings/docker.gpg
fi
source /etc/os-release
local arch
arch="$(dpkg --print-architecture)"
local codename
codename="${VERSION_CODENAME:-jammy}"
echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename} stable" | sudo tee /etc/apt/sources.list.d/docker.list >/dev/null
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
if ! sudo systemctl is-active --quiet docker; then
sudo systemctl enable --now docker
fi
if ! getent group docker >/dev/null; then
sudo groupadd docker
fi
if ! id -nG "${USER}" | grep -qw docker; then
sudo usermod -aG docker "${USER}"
log "Added ${USER} to docker group. You may need to log out and back in for group membership to apply."
fi
}
check_or_install_docker() {
local have_docker=1
local have_compose=1
if ! command -v docker >/dev/null 2>&1; then
have_docker=0
fi
if ! docker compose version >/dev/null 2>&1; then
have_compose=0
fi
if [[ ${have_docker} -eq 1 && ${have_compose} -eq 1 ]]; then
log "Docker and Compose plugin are already available."
return
fi
if [[ -f /etc/os-release ]]; then
source /etc/os-release
if [[ "${ID:-}" == "ubuntu" ]]; then
install_docker_ubuntu
return
fi
fi
err "Docker/Compose missing and automatic installation is implemented for Ubuntu only."
err "See docs/TROUBLESHOOTING.md#docker-and-compose-not-available"
exit 1
}
prepare_env_files() {
if [[ ! -f "${REPO_ROOT}/.env" ]]; then
cp "${REPO_ROOT}/.env.example" "${REPO_ROOT}/.env"
log "Created .env from .env.example."
err "IMPORTANT: edit .env and set HF_TOKEN (and optionally VLLM_API_KEY) before production use."
fi
if [[ ! -f "${REPO_ROOT}/backend/config/model.env" ]]; then
cp "${REPO_ROOT}/backend/config/model.env.example" "${REPO_ROOT}/backend/config/model.env"
log "Created backend/config/model.env from example."
fi
if [[ ! -f "${REPO_ROOT}/frontend/config/frontend.env" ]]; then
cp "${REPO_ROOT}/frontend/config/frontend.env.example" "${REPO_ROOT}/frontend/config/frontend.env"
log "Created frontend/config/frontend.env from example."
fi
mkdir -p "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui"
}
warn_if_rocm_devices_missing() {
if [[ ! -e /dev/kfd || ! -d /dev/dri ]]; then
err "ROCm device files /dev/kfd or /dev/dri are not available."
err "See docs/TROUBLESHOOTING.md#rocm-devices-not-visible-in-host"
fi
}
start_stack() {
log "Pulling container images."
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" pull
log "Starting containers in detached mode."
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" up -d
}
show_status_and_urls() {
local backend_port frontend_port
backend_port="$(grep -E '^BACKEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
frontend_port="$(grep -E '^FRONTEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
backend_port="${backend_port:-8000}"
frontend_port="${frontend_port:-3000}"
log "Backend status:"
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps gemma3-vllm || true
log "Frontend status:"
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps chat-ui || true
printf '
'
log "API endpoint: http://localhost:${backend_port}/v1"
log "Chat UI endpoint: http://localhost:${frontend_port}"
log "If startup fails, inspect logs with: docker compose logs --tail=200 gemma3-vllm chat-ui"
}
main() {
require_linux
check_or_install_docker
prepare_env_files
warn_if_rocm_devices_missing
start_stack
show_status_and_urls
}
main "$@"

25
scripts/restart.sh Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Restarts the Gemma 3 vLLM stack and shows service status.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
ENV_FILE="${REPO_ROOT}/.env"
log() {
printf '[restart] %s
' "$*"
}
if [[ ! -f "${ENV_FILE}" ]]; then
ENV_FILE="${REPO_ROOT}/.env.example"
fi
log "Stopping stack."
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down
log "Starting stack."
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d
log "Current status:"
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps

54
scripts/test_api.sh Executable file
View File

@ -0,0 +1,54 @@
#!/usr/bin/env bash
# Tests local vLLM OpenAI-compatible API using curl and validates response shape.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
ENV_FILE="${REPO_ROOT}/.env"
if [[ ! -f "${ENV_FILE}" ]]; then
echo "[test_api][error] .env file not found. Copy .env.example to .env first." >&2
exit 1
fi
# shellcheck disable=SC1090
source "${ENV_FILE}"
BACKEND_PORT="${BACKEND_PORT:-8000}"
GEMMA_MODEL_ID="${GEMMA_MODEL_ID:-google/gemma-3-1b-it}"
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
API_URL="http://localhost:${BACKEND_PORT}/v1/chat/completions"
payload_file="$(mktemp)"
response_file="$(mktemp)"
trap 'rm -f "${payload_file}" "${response_file}"' EXIT
cat > "${payload_file}" <<JSON
{
"model": "${GEMMA_MODEL_ID}",
"messages": [
{"role": "system", "content": "You are a concise assistant."},
{"role": "user", "content": "Say hello from Gemma 3 running on vLLM."}
],
"temperature": 0.2,
"max_tokens": 64
}
JSON
http_status="$(curl -sS -o "${response_file}" -w '%{http_code}' -H "Content-Type: application/json" -H "Authorization: Bearer ${VLLM_API_KEY}" -X POST "${API_URL}" --data @"${payload_file}")"
if [[ ! "${http_status}" =~ ^2 ]]; then
echo "[test_api][error] API returned HTTP ${http_status}" >&2
cat "${response_file}" >&2
echo "[test_api][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks" >&2
exit 1
fi
if ! grep -q '"choices"' "${response_file}"; then
echo "[test_api][error] API response did not include expected 'choices' field." >&2
cat "${response_file}" >&2
exit 1
fi
echo "[test_api] Success. API responded with expected structure."
cat "${response_file}"

75
scripts/test_python_client.py Executable file
View File

@ -0,0 +1,75 @@
#!/usr/bin/env python3
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
from __future__ import annotations
import os
import sys
from pathlib import Path
def load_dotenv(dotenv_path: Path) -> None:
if not dotenv_path.exists():
return
for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip().strip('"').strip("'")
os.environ.setdefault(key, value)
def main() -> int:
repo_root = Path(__file__).resolve().parent.parent
load_dotenv(repo_root / ".env")
backend_port = os.getenv("BACKEND_PORT", "8000")
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
base_url = f"http://localhost:{backend_port}/v1"
try:
from openai import OpenAI
except ImportError:
print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
print("Install it with: python3 -m pip install openai", file=sys.stderr)
return 1
client = OpenAI(api_key=api_key, base_url=base_url)
try:
response = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "You are a concise assistant."},
{
"role": "user",
"content": "Say hello from Gemma 3 running on vLLM in one sentence.",
},
],
temperature=0.2,
max_tokens=64,
)
except Exception as exc:
print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
print(
"[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
file=sys.stderr,
)
return 1
if not response.choices or not response.choices[0].message:
print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
return 1
content = response.choices[0].message.content or ""
print("[test_python_client] Success. Assistant response:")
print(content.strip())
return 0
if __name__ == "__main__":
raise SystemExit(main())

25
scripts/test_ui.sh Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Tests whether the chat UI is reachable on localhost frontend port.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
ENV_FILE="${REPO_ROOT}/.env"
if [[ -f "${ENV_FILE}" ]]; then
# shellcheck disable=SC1090
source "${ENV_FILE}"
fi
FRONTEND_PORT="${FRONTEND_PORT:-3000}"
UI_URL="http://localhost:${FRONTEND_PORT}"
http_status="$(curl -sS -o /dev/null -w '%{http_code}' "${UI_URL}")"
if [[ "${http_status}" != "200" && "${http_status}" != "301" && "${http_status}" != "302" ]]; then
echo "[test_ui][error] UI check failed with HTTP status ${http_status} at ${UI_URL}" >&2
echo "[test_ui][hint] See docs/TROUBLESHOOTING.md#ui-loads-but-cannot-reach-vllm-backend" >&2
exit 1
fi
echo "[test_ui] Chat UI is reachable at ${UI_URL} (HTTP ${http_status})."

98
scripts/uninstall.sh Executable file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Stops and removes the Gemma 3 vLLM stack. Optional --purge removes local model/cache data.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
PURGE=0
log() {
printf '[uninstall] %s
' "$*"
}
err() {
printf '[uninstall][error] %s
' "$*" >&2
}
usage() {
cat <<'EOF'
Usage: scripts/uninstall.sh [--purge]
Options:
--purge Remove local Hugging Face cache directory and ./models data in addition to containers/volumes.
-h, --help Show this help message.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--purge)
PURGE=1
;;
-h|--help)
usage
exit 0
;;
*)
err "Unknown argument: $1"
usage
exit 1
;;
esac
shift
done
if [[ ! -f "${REPO_ROOT}/docker-compose.yml" ]]; then
err "docker-compose.yml not found at ${REPO_ROOT}."
exit 1
fi
ENV_FILE="${REPO_ROOT}/.env"
if [[ ! -f "${ENV_FILE}" ]]; then
ENV_FILE="${REPO_ROOT}/.env.example"
fi
log "Stopping stack and removing containers, networks, and named/anonymous volumes."
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down -v || true
if [[ ${PURGE} -eq 1 ]]; then
log "Purge requested. Removing local data directories used by this stack."
huggingface_cache_dir="$(grep -E '^HUGGINGFACE_CACHE_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
open_webui_data_dir="$(grep -E '^OPEN_WEBUI_DATA_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
if [[ -n "${huggingface_cache_dir}" ]]; then
# Expand potential variables such as ${USER}
evaluated_hf_dir="$(eval "printf '%s' "${huggingface_cache_dir}"")"
if [[ -d "${evaluated_hf_dir}" ]]; then
log "Removing Hugging Face cache directory: ${evaluated_hf_dir}"
rm -rf "${evaluated_hf_dir}"
else
log "Hugging Face cache directory not found: ${evaluated_hf_dir}"
fi
fi
if [[ -z "${open_webui_data_dir}" ]]; then
open_webui_data_dir="./frontend/data/open-webui"
fi
if [[ "${open_webui_data_dir}" == ./* ]]; then
open_webui_data_dir="${REPO_ROOT}/${open_webui_data_dir#./}"
fi
if [[ -d "${open_webui_data_dir}" ]]; then
log "Removing Open WebUI data directory: ${open_webui_data_dir}"
rm -rf "${open_webui_data_dir}"
fi
if [[ -d "${REPO_ROOT}/models" ]]; then
log "Removing local models directory: ${REPO_ROOT}/models"
rm -rf "${REPO_ROOT}/models"
fi
else
log "Safe mode enabled (default). Local model/cache data was preserved."
fi
log "Uninstall complete."