Initial production-ready Gemma 3 vLLM ROCm stack
Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
155
scripts/install.sh
Executable file
155
scripts/install.sh
Executable file
@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env bash
|
||||
# Installs prerequisites (if needed), prepares config files, and starts Gemma 3 + vLLM + chat UI stack.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
|
||||
log() {
|
||||
printf '[install] %s
|
||||
' "$*"
|
||||
}
|
||||
|
||||
err() {
|
||||
printf '[install][error] %s
|
||||
' "$*" >&2
|
||||
}
|
||||
|
||||
require_linux() {
|
||||
if [[ "$(uname -s)" != "Linux" ]]; then
|
||||
err "This script supports Linux only."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
install_docker_ubuntu() {
|
||||
log "Installing Docker Engine and Compose plugin using official Docker apt repository."
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ca-certificates curl gnupg
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
|
||||
if [[ ! -f /etc/apt/keyrings/docker.gpg ]]; then
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
fi
|
||||
|
||||
source /etc/os-release
|
||||
local arch
|
||||
arch="$(dpkg --print-architecture)"
|
||||
local codename
|
||||
codename="${VERSION_CODENAME:-jammy}"
|
||||
|
||||
echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename} stable" | sudo tee /etc/apt/sources.list.d/docker.list >/dev/null
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
|
||||
if ! sudo systemctl is-active --quiet docker; then
|
||||
sudo systemctl enable --now docker
|
||||
fi
|
||||
|
||||
if ! getent group docker >/dev/null; then
|
||||
sudo groupadd docker
|
||||
fi
|
||||
|
||||
if ! id -nG "${USER}" | grep -qw docker; then
|
||||
sudo usermod -aG docker "${USER}"
|
||||
log "Added ${USER} to docker group. You may need to log out and back in for group membership to apply."
|
||||
fi
|
||||
}
|
||||
|
||||
check_or_install_docker() {
|
||||
local have_docker=1
|
||||
local have_compose=1
|
||||
|
||||
if ! command -v docker >/dev/null 2>&1; then
|
||||
have_docker=0
|
||||
fi
|
||||
|
||||
if ! docker compose version >/dev/null 2>&1; then
|
||||
have_compose=0
|
||||
fi
|
||||
|
||||
if [[ ${have_docker} -eq 1 && ${have_compose} -eq 1 ]]; then
|
||||
log "Docker and Compose plugin are already available."
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ -f /etc/os-release ]]; then
|
||||
source /etc/os-release
|
||||
if [[ "${ID:-}" == "ubuntu" ]]; then
|
||||
install_docker_ubuntu
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
err "Docker/Compose missing and automatic installation is implemented for Ubuntu only."
|
||||
err "See docs/TROUBLESHOOTING.md#docker-and-compose-not-available"
|
||||
exit 1
|
||||
}
|
||||
|
||||
prepare_env_files() {
|
||||
if [[ ! -f "${REPO_ROOT}/.env" ]]; then
|
||||
cp "${REPO_ROOT}/.env.example" "${REPO_ROOT}/.env"
|
||||
log "Created .env from .env.example."
|
||||
err "IMPORTANT: edit .env and set HF_TOKEN (and optionally VLLM_API_KEY) before production use."
|
||||
fi
|
||||
|
||||
if [[ ! -f "${REPO_ROOT}/backend/config/model.env" ]]; then
|
||||
cp "${REPO_ROOT}/backend/config/model.env.example" "${REPO_ROOT}/backend/config/model.env"
|
||||
log "Created backend/config/model.env from example."
|
||||
fi
|
||||
|
||||
if [[ ! -f "${REPO_ROOT}/frontend/config/frontend.env" ]]; then
|
||||
cp "${REPO_ROOT}/frontend/config/frontend.env.example" "${REPO_ROOT}/frontend/config/frontend.env"
|
||||
log "Created frontend/config/frontend.env from example."
|
||||
fi
|
||||
|
||||
mkdir -p "${REPO_ROOT}/models" "${REPO_ROOT}/frontend/data/open-webui"
|
||||
}
|
||||
|
||||
warn_if_rocm_devices_missing() {
|
||||
if [[ ! -e /dev/kfd || ! -d /dev/dri ]]; then
|
||||
err "ROCm device files /dev/kfd or /dev/dri are not available."
|
||||
err "See docs/TROUBLESHOOTING.md#rocm-devices-not-visible-in-host"
|
||||
fi
|
||||
}
|
||||
|
||||
start_stack() {
|
||||
log "Pulling container images."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" pull
|
||||
|
||||
log "Starting containers in detached mode."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" up -d
|
||||
}
|
||||
|
||||
show_status_and_urls() {
|
||||
local backend_port frontend_port
|
||||
backend_port="$(grep -E '^BACKEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
|
||||
frontend_port="$(grep -E '^FRONTEND_PORT=' "${REPO_ROOT}/.env" | tail -n1 | cut -d'=' -f2 || true)"
|
||||
backend_port="${backend_port:-8000}"
|
||||
frontend_port="${frontend_port:-3000}"
|
||||
|
||||
log "Backend status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps gemma3-vllm || true
|
||||
|
||||
log "Frontend status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${REPO_ROOT}/.env" ps chat-ui || true
|
||||
|
||||
printf '
|
||||
'
|
||||
log "API endpoint: http://localhost:${backend_port}/v1"
|
||||
log "Chat UI endpoint: http://localhost:${frontend_port}"
|
||||
log "If startup fails, inspect logs with: docker compose logs --tail=200 gemma3-vllm chat-ui"
|
||||
}
|
||||
|
||||
main() {
|
||||
require_linux
|
||||
check_or_install_docker
|
||||
prepare_env_files
|
||||
warn_if_rocm_devices_missing
|
||||
start_stack
|
||||
show_status_and_urls
|
||||
}
|
||||
|
||||
main "$@"
|
||||
25
scripts/restart.sh
Executable file
25
scripts/restart.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
# Restarts the Gemma 3 vLLM stack and shows service status.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
log() {
|
||||
printf '[restart] %s
|
||||
' "$*"
|
||||
}
|
||||
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
ENV_FILE="${REPO_ROOT}/.env.example"
|
||||
fi
|
||||
|
||||
log "Stopping stack."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down
|
||||
|
||||
log "Starting stack."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" up -d
|
||||
|
||||
log "Current status:"
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" ps
|
||||
54
scripts/test_api.sh
Executable file
54
scripts/test_api.sh
Executable file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests local vLLM OpenAI-compatible API using curl and validates response shape.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
echo "[test_api][error] .env file not found. Copy .env.example to .env first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
|
||||
BACKEND_PORT="${BACKEND_PORT:-8000}"
|
||||
GEMMA_MODEL_ID="${GEMMA_MODEL_ID:-google/gemma-3-1b-it}"
|
||||
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
|
||||
API_URL="http://localhost:${BACKEND_PORT}/v1/chat/completions"
|
||||
|
||||
payload_file="$(mktemp)"
|
||||
response_file="$(mktemp)"
|
||||
trap 'rm -f "${payload_file}" "${response_file}"' EXIT
|
||||
|
||||
cat > "${payload_file}" <<JSON
|
||||
{
|
||||
"model": "${GEMMA_MODEL_ID}",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a concise assistant."},
|
||||
{"role": "user", "content": "Say hello from Gemma 3 running on vLLM."}
|
||||
],
|
||||
"temperature": 0.2,
|
||||
"max_tokens": 64
|
||||
}
|
||||
JSON
|
||||
|
||||
http_status="$(curl -sS -o "${response_file}" -w '%{http_code}' -H "Content-Type: application/json" -H "Authorization: Bearer ${VLLM_API_KEY}" -X POST "${API_URL}" --data @"${payload_file}")"
|
||||
|
||||
if [[ ! "${http_status}" =~ ^2 ]]; then
|
||||
echo "[test_api][error] API returned HTTP ${http_status}" >&2
|
||||
cat "${response_file}" >&2
|
||||
echo "[test_api][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q '"choices"' "${response_file}"; then
|
||||
echo "[test_api][error] API response did not include expected 'choices' field." >&2
|
||||
cat "${response_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[test_api] Success. API responded with expected structure."
|
||||
cat "${response_file}"
|
||||
75
scripts/test_python_client.py
Executable file
75
scripts/test_python_client.py
Executable file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_dotenv(dotenv_path: Path) -> None:
|
||||
if not dotenv_path.exists():
|
||||
return
|
||||
|
||||
for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
os.environ.setdefault(key, value)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
load_dotenv(repo_root / ".env")
|
||||
|
||||
backend_port = os.getenv("BACKEND_PORT", "8000")
|
||||
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
|
||||
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
|
||||
base_url = f"http://localhost:{backend_port}/v1"
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
|
||||
print("Install it with: python3 -m pip install openai", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a concise assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Say hello from Gemma 3 running on vLLM in one sentence.",
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=64,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
|
||||
print(
|
||||
"[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
if not response.choices or not response.choices[0].message:
|
||||
print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
content = response.choices[0].message.content or ""
|
||||
print("[test_python_client] Success. Assistant response:")
|
||||
print(content.strip())
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
25
scripts/test_ui.sh
Executable file
25
scripts/test_ui.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests whether the chat UI is reachable on localhost frontend port.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
if [[ -f "${ENV_FILE}" ]]; then
|
||||
# shellcheck disable=SC1090
|
||||
source "${ENV_FILE}"
|
||||
fi
|
||||
|
||||
FRONTEND_PORT="${FRONTEND_PORT:-3000}"
|
||||
UI_URL="http://localhost:${FRONTEND_PORT}"
|
||||
|
||||
http_status="$(curl -sS -o /dev/null -w '%{http_code}' "${UI_URL}")"
|
||||
|
||||
if [[ "${http_status}" != "200" && "${http_status}" != "301" && "${http_status}" != "302" ]]; then
|
||||
echo "[test_ui][error] UI check failed with HTTP status ${http_status} at ${UI_URL}" >&2
|
||||
echo "[test_ui][hint] See docs/TROUBLESHOOTING.md#ui-loads-but-cannot-reach-vllm-backend" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[test_ui] Chat UI is reachable at ${UI_URL} (HTTP ${http_status})."
|
||||
98
scripts/uninstall.sh
Executable file
98
scripts/uninstall.sh
Executable file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env bash
|
||||
# Stops and removes the Gemma 3 vLLM stack. Optional --purge removes local model/cache data.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
PURGE=0
|
||||
|
||||
log() {
|
||||
printf '[uninstall] %s
|
||||
' "$*"
|
||||
}
|
||||
|
||||
err() {
|
||||
printf '[uninstall][error] %s
|
||||
' "$*" >&2
|
||||
}
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage: scripts/uninstall.sh [--purge]
|
||||
|
||||
Options:
|
||||
--purge Remove local Hugging Face cache directory and ./models data in addition to containers/volumes.
|
||||
-h, --help Show this help message.
|
||||
EOF
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--purge)
|
||||
PURGE=1
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
err "Unknown argument: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ ! -f "${REPO_ROOT}/docker-compose.yml" ]]; then
|
||||
err "docker-compose.yml not found at ${REPO_ROOT}."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
if [[ ! -f "${ENV_FILE}" ]]; then
|
||||
ENV_FILE="${REPO_ROOT}/.env.example"
|
||||
fi
|
||||
|
||||
log "Stopping stack and removing containers, networks, and named/anonymous volumes."
|
||||
docker compose -f "${REPO_ROOT}/docker-compose.yml" --env-file "${ENV_FILE}" down -v || true
|
||||
|
||||
if [[ ${PURGE} -eq 1 ]]; then
|
||||
log "Purge requested. Removing local data directories used by this stack."
|
||||
|
||||
huggingface_cache_dir="$(grep -E '^HUGGINGFACE_CACHE_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
|
||||
open_webui_data_dir="$(grep -E '^OPEN_WEBUI_DATA_DIR=' "${ENV_FILE}" | tail -n1 | cut -d'=' -f2- || true)"
|
||||
|
||||
if [[ -n "${huggingface_cache_dir}" ]]; then
|
||||
# Expand potential variables such as ${USER}
|
||||
evaluated_hf_dir="$(eval "printf '%s' "${huggingface_cache_dir}"")"
|
||||
if [[ -d "${evaluated_hf_dir}" ]]; then
|
||||
log "Removing Hugging Face cache directory: ${evaluated_hf_dir}"
|
||||
rm -rf "${evaluated_hf_dir}"
|
||||
else
|
||||
log "Hugging Face cache directory not found: ${evaluated_hf_dir}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${open_webui_data_dir}" ]]; then
|
||||
open_webui_data_dir="./frontend/data/open-webui"
|
||||
fi
|
||||
|
||||
if [[ "${open_webui_data_dir}" == ./* ]]; then
|
||||
open_webui_data_dir="${REPO_ROOT}/${open_webui_data_dir#./}"
|
||||
fi
|
||||
|
||||
if [[ -d "${open_webui_data_dir}" ]]; then
|
||||
log "Removing Open WebUI data directory: ${open_webui_data_dir}"
|
||||
rm -rf "${open_webui_data_dir}"
|
||||
fi
|
||||
|
||||
if [[ -d "${REPO_ROOT}/models" ]]; then
|
||||
log "Removing local models directory: ${REPO_ROOT}/models"
|
||||
rm -rf "${REPO_ROOT}/models"
|
||||
fi
|
||||
else
|
||||
log "Safe mode enabled (default). Local model/cache data was preserved."
|
||||
fi
|
||||
|
||||
log "Uninstall complete."
|
||||
Reference in New Issue
Block a user