Files
gemma3-vllm-stack/scripts/test_python_client.py
2026-04-18 22:53:46 +05:30

76 lines
2.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
from __future__ import annotations
import os
import sys
from pathlib import Path
def load_dotenv(dotenv_path: Path) -> None:
if not dotenv_path.exists():
return
for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip().strip('"').strip("'")
os.environ.setdefault(key, value)
def main() -> int:
repo_root = Path(__file__).resolve().parent.parent
load_dotenv(repo_root / ".env")
backend_port = os.getenv("BACKEND_PORT", "8000")
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
base_url = f"http://localhost:{backend_port}/v1"
try:
from openai import OpenAI
except ImportError:
print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
print("Install it with: python3 -m pip install openai", file=sys.stderr)
return 1
client = OpenAI(api_key=api_key, base_url=base_url)
try:
response = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "You are a concise assistant."},
{
"role": "user",
"content": "Say hello from Gemma 3 running on vLLM in one sentence.",
},
],
temperature=0.2,
max_tokens=64,
)
except Exception as exc:
print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
print(
"[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
file=sys.stderr,
)
return 1
if not response.choices or not response.choices[0].message:
print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
return 1
content = response.choices[0].message.content or ""
print("[test_python_client] Success. Assistant response:")
print(content.strip())
return 0
if __name__ == "__main__":
raise SystemExit(main())