|
"""This simple script shows how to interact with an OpenAI-compatible server from a client.""" |
|
|
|
|
|
|
|
|
|
from openai import OpenAI |
|
import os |
|
|
|
client = OpenAI(api_key=os.getenv("modal_api")) |
|
client.base_url = ( |
|
"https://alexprincecursor--example-vllm-openai-compatible-serve.modal.run/v1/" |
|
) |
|
|
|
response = client.chat.completions.create( |
|
model="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16", |
|
messages=[ |
|
{"role": "system", "content": "You are a rockstar lyric generator. You are given a song and you need to generate a lyric for it."}, |
|
{"role": "user", "content":"The song is 'Bohemian Rhapsody' by Queen."} |
|
], |
|
max_tokens=512, |
|
temperature=0.7 |
|
) |
|
print(response.choices[0].message.content) |