76 lines
2.4 KiB
Python
Executable File
76 lines
2.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def load_dotenv(dotenv_path: Path) -> None:
|
|
if not dotenv_path.exists():
|
|
return
|
|
|
|
for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
|
|
line = raw_line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
key, value = line.split("=", 1)
|
|
key = key.strip()
|
|
value = value.strip().strip('"').strip("'")
|
|
os.environ.setdefault(key, value)
|
|
|
|
|
|
def main() -> int:
|
|
repo_root = Path(__file__).resolve().parent.parent
|
|
load_dotenv(repo_root / ".env")
|
|
|
|
backend_port = os.getenv("BACKEND_PORT", "8000")
|
|
model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
|
|
api_key = os.getenv("VLLM_API_KEY", "EMPTY")
|
|
base_url = f"http://localhost:{backend_port}/v1"
|
|
|
|
try:
|
|
from openai import OpenAI
|
|
except ImportError:
|
|
print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
|
|
print("Install it with: python3 -m pip install openai", file=sys.stderr)
|
|
return 1
|
|
|
|
client = OpenAI(api_key=api_key, base_url=base_url)
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model=model_id,
|
|
messages=[
|
|
{"role": "system", "content": "You are a concise assistant."},
|
|
{
|
|
"role": "user",
|
|
"content": "Say hello from Gemma 3 running on vLLM in one sentence.",
|
|
},
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=64,
|
|
)
|
|
except Exception as exc:
|
|
print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
|
|
print(
|
|
"[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
if not response.choices or not response.choices[0].message:
|
|
print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
|
|
return 1
|
|
|
|
content = response.choices[0].message.content or ""
|
|
print("[test_python_client] Success. Assistant response:")
|
|
print(content.strip())
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|