Initial production-ready Gemma 3 vLLM ROCm stack
Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
75
scripts/test_python_client.py
Executable file
75
scripts/test_python_client.py
Executable file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_dotenv(dotenv_path: Path) -> None:
|
||||
if not dotenv_path.exists():
|
||||
return
|
||||
|
||||
for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
os.environ.setdefault(key, value)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
load_dotenv(repo_root / ".env")
|
||||
|
||||
backend_port = os.getenv("BACKEND_PORT", "8000")
|
||||
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
|
||||
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
|
||||
base_url = f"http://localhost:{backend_port}/v1"
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
|
||||
print("Install it with: python3 -m pip install openai", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a concise assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Say hello from Gemma 3 running on vLLM in one sentence.",
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=64,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
|
||||
print(
|
||||
"[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
if not response.choices or not response.choices[0].message:
|
||||
print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
content = response.choices[0].message.content or ""
|
||||
print("[test_python_client] Success. Assistant response:")
|
||||
print(content.strip())
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user