Initial production-ready Gemma 3 vLLM ROCm stack

Co-Authored-By: Oz <oz-agent@warp.dev>
2026-04-18 22:53:38 +05:30
commit ef8537e923
18 changed files with 988 additions and 0 deletions
--- a/scripts/test_python_client.py
+++ b/scripts/test_python_client.py
@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
+
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+
+def load_dotenv(dotenv_path: Path) -> None:
+    if not dotenv_path.exists():
+        return
+
+    for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
+        line = raw_line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        key = key.strip()
+        value = value.strip().strip('"').strip("'")
+        os.environ.setdefault(key, value)
+
+
+def main() -> int:
+    repo_root = Path(__file__).resolve().parent.parent
+    load_dotenv(repo_root / ".env")
+
+    backend_port = os.getenv("BACKEND_PORT", "8000")
+    model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
+    api_key = os.getenv("VLLM_API_KEY", "EMPTY")
+    base_url = f"http://localhost:{backend_port}/v1"
+
+    try:
+        from openai import OpenAI
+    except ImportError:
+        print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
+        print("Install it with: python3 -m pip install openai", file=sys.stderr)
+        return 1
+
+    client = OpenAI(api_key=api_key, base_url=base_url)
+
+    try:
+        response = client.chat.completions.create(
+            model=model_id,
+            messages=[
+                {"role": "system", "content": "You are a concise assistant."},
+                {
+                    "role": "user",
+                    "content": "Say hello from Gemma 3 running on vLLM in one sentence.",
+                },
+            ],
+            temperature=0.2,
+            max_tokens=64,
+        )
+    except Exception as exc:
+        print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
+        print(
+            "[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
+            file=sys.stderr,
+        )
+        return 1
+
+    if not response.choices or not response.choices[0].message:
+        print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
+        return 1
+
+    content = response.choices[0].message.content or ""
+    print("[test_python_client] Success. Assistant response:")
+    print(content.strip())
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())