Spaces:
Sleeping
Sleeping
File size: 2,573 Bytes
f7c0abb 19fe1fe fa8e2ce 19fe1fe d0fc55f 19fe1fe f7c0abb 19fe1fe fa8e2ce 19fe1fe 6025f1c 2372d93 6025f1c 19fe1fe d0fc55f f7c0abb 19fe1fe f7c0abb 9ab6d04 6025f1c d0fc55f f7c0abb d0fc55f 045ef7e f7c0abb 045ef7e 19fe1fe f7c0abb 19fe1fe b9e465f 9ab6d04 19fe1fe fa8e2ce 19fe1fe 93c4b1f 7a83ce6 20d0b59 387e225 19fe1fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import os
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from openai import AsyncOpenAI
from typing import Optional
app = FastAPI()
class GenerateRequest(BaseModel):
prompt: str
async def generate_ai_response(prompt: str, model: str):
# Configuration for AI endpoint
token = os.getenv("GITHUB_TOKEN")
endpoint = os.getenv("AI_SERVER_URL", "https://models.github.ai/inference") # Default fallback
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
client = AsyncOpenAI(base_url=endpoint, api_key=token)
try
stream = await client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant named Orion, created by Abdullah Ali"},
{"role": "user", "content": prompt}
],
model=model,
temperature=1.0,
top_p=1.0,
stream=True
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail=f"AI generation failed: {str(err)}")
@app.post("/generate", summary="Generate AI response", response_description="Streaming AI response")
async def generate_response(
model: str = Query("default-model", description="The AI model to use"),
prompt: Optional[str] = Query(None, description="The input text prompt for the AI"),
request: Optional[GenerateRequest] = None
):
"""
Generate a streaming AI response based on the provided prompt and model.
- **model**: The AI model to use (specified as a query parameter, defaults to default-model)
- **prompt**: The input text prompt for the AI (can be in query parameter or request body)
"""
# Determine prompt source: query parameter or request body
final_prompt = prompt if prompt is not None else (request.prompt if request is not None else None)
if not final_prompt or not final_prompt.strip():
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
if not model or not model.strip():
raise HTTPException(status_code=400, detail="Model cannot be empty")
return StreamingResponse(
generate_ai_response(final_prompt, model),
media_type="text/event-stream"
)
def get_app():
return app
|