#!/usr/bin/env python3 """Tests local vLLM OpenAI-compatible API using openai>=1.x Python client.""" from __future__ import annotations import os import sys from pathlib import Path def load_dotenv(dotenv_path: Path) -> None: if not dotenv_path.exists(): return for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines(): line = raw_line.strip() if not line or line.startswith("#") or "=" not in line: continue if line.startswith("export "): line = line[len("export ") :].strip() key, value = line.split("=", 1) key = key.strip() value = os.path.expandvars(value.strip().strip('"').strip("'")) os.environ.setdefault(key, value) def main() -> int: repo_root = Path(__file__).resolve().parent.parent load_dotenv(repo_root / ".env") backend_port = os.getenv("BACKEND_PORT", "8000") model_id = os.getenv("GEMMA_MODEL_ID") if not model_id: print("[test_python_client][error] GEMMA_MODEL_ID is not set in .env.", file=sys.stderr) return 1 api_key = os.getenv("VLLM_API_KEY", "EMPTY") base_url = f"http://localhost:{backend_port}/v1" try: from openai import OpenAI except ImportError: print("[test_python_client][error] openai package is not installed.", file=sys.stderr) print("Install it with: python3 -m pip install openai", file=sys.stderr) return 1 client = OpenAI(api_key=api_key, base_url=base_url) try: response = client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": "You are a concise assistant."}, { "role": "user", "content": "Say hello from Gemma 3 running on vLLM in one sentence.", }, ], temperature=0.2, max_tokens=64, ) except Exception as exc: print(f"[test_python_client][error] Request failed: {exc}", file=sys.stderr) print( "[test_python_client][hint] See docs/TROUBLESHOOTING.md#vllm-container-exits-or-fails-healthchecks", file=sys.stderr, ) return 1 if not response.choices or not response.choices[0].message: print("[test_python_client][error] No completion choices returned.", file=sys.stderr) return 1 content = response.choices[0].message.content or "" print("[test_python_client] Success. Assistant response:") print(content.strip()) return 0 if __name__ == "__main__": raise SystemExit(main())