Spaces:
Sleeping
Sleeping
File size: 2,073 Bytes
f7c0abb e7b1f60 fa8e2ce d0fc55f f7c0abb 05d6121 f9d8346 e7b1f60 05d6121 fa8e2ce 6025f1c 05d6121 e7b1f60 2372d93 05d6121 6025f1c 603790a d0fc55f f7c0abb 05d6121 f7c0abb 9ab6d04 6025f1c e181176 f7c0abb d0fc55f 045ef7e f7c0abb 05d6121 f7c0abb 05d6121 e7b1f60 05d6121 b9e465f 9ab6d04 fa8e2ce e7b1f60 93c4b1f 7a83ce6 20d0b59 387e225 05d6121 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from openai import AsyncOpenAI
app = FastAPI()
# Define available models (you can expand this list)
AVAILABLE_MODELS = {
"openai/gpt-4.1-mini": "OpenAI GPT-4 Mini",
"deepseek/DeepSeek-V3-0324": "DeepSeek V3 0324",
# Add more models as needed
}
async def generate_ai_response(prompt: str, model: str):
# Configuration for unofficial GitHub AI endpoint
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
endpoint = "https://models.github.ai/inference"
# Validate the model
if model not in AVAILABLE_MODELS:
raise HTTPException(status_code=400, detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
client = AsyncOpenAI(base_url=endpoint, api_key=token)
try:
stream = await client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant named Orion and made by Abdullah Ali"},
{"role": "user", "content": prompt}
],
model=model,
temperature=1.0,
top_p=1.0,
stream=True
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail="AI generation failed")
@app.post("/generate")
async def generate_response(
prompt: str = Query(..., description="The prompt for the AI"),
model: str = Query("openai/gpt-4.1-mini", description="The model to use for generation")
):
if not prompt:
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
return StreamingResponse(
generate_ai_response(prompt, model),
media_type="text/event-stream"
)
def get_app():
return app |