gemma3-vllm-stack/scripts/test_python_client.py

#!/usr/bin/env python3
"""Tests local vLLM OpenAI-compatible API using openai>=1.x Python client."""

from __future__ import annotations

import os
import sys
from pathlib import Path


def load_dotenv(dotenv_path: Path) -> None:
    if not dotenv_path.exists():
        return

    for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
        line = raw_line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        key = key.strip()
        value = value.strip().strip('"').strip("'")
        os.environ.setdefault(key, value)


def main() -> int:
    repo_root = Path(__file__).resolve().parent.parent
    load_dotenv(repo_root / ".env")

    backend_port = os.getenv("BACKEND_PORT", "8000")
    model_id = os.getenv("GEMMA_MODEL_ID", "google/gemma-3-1b-it")
    api_key = os.getenv("VLLM_API_KEY", "EMPTY")
    base_url = f"http://localhost:{backend_port}/v1"

    try:
        from openai import OpenAI
    except ImportError:
        print("[test_python_client][error] openai package is not installed.", file=sys.stderr)
        print("Install it with: python3 -m pip install openai", file=sys.stderr)
        return 1

    client = OpenAI(api_key=api_key, base_url=base_url)

    try:
        response = client.chat.completions.create(
            model=model_id,
            messages=[
                {"role": "system", "content": "You are a concise assistant."},
                {
                    "role": "user",
                    "content": "Say hello from Gemma 3 running on vLLM in one sentence.",
                },
            ],
            temperature=0.2,
            max_tokens=64,
        )
    except Exception as exc:
        print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr)
        print(
            "[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks",
            file=sys.stderr,
        )
        return 1

    if not response.choices or not response.choices[0].message:
        print("[test_python_client][error] No completion choices returned.", file=sys.stderr)
        return 1

    content = response.choices[0].message.content or ""
    print("[test_python_client] Success. Assistant response:")
    print(content.strip())
    return 0


if __name__ == "__main__":
    raise SystemExit(main())