File size: 2,573 Bytes
f7c0abb
19fe1fe
fa8e2ce
19fe1fe
d0fc55f
19fe1fe
f7c0abb
 
 
19fe1fe
 
 
 
 
fa8e2ce
19fe1fe
 
6025f1c
 
2372d93
6025f1c
 
19fe1fe
d0fc55f
f7c0abb
19fe1fe
f7c0abb
 
9ab6d04
6025f1c
 
d0fc55f
f7c0abb
 
d0fc55f
045ef7e
 
f7c0abb
 
045ef7e
19fe1fe
f7c0abb
19fe1fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9e465f
9ab6d04
19fe1fe
 
 
fa8e2ce
19fe1fe
93c4b1f
7a83ce6
20d0b59
387e225
19fe1fe
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from openai import AsyncOpenAI
from typing import Optional

app = FastAPI()

class GenerateRequest(BaseModel):
    prompt: str

async def generate_ai_response(prompt: str, model: str):
    # Configuration for AI endpoint
    token = os.getenv("GITHUB_TOKEN")
    endpoint = os.getenv("AI_SERVER_URL", "https://models.github.ai/inference")  # Default fallback
    
    if not token:
        raise HTTPException(status_code=500, detail="GitHub token not configured")

    client = AsyncOpenAI(base_url=endpoint, api_key=token)

    try
        stream = await client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a helpful assistant named Orion, created by Abdullah Ali"},
                {"role": "user", "content": prompt}
            ],
            model=model,
            temperature=1.0,
            top_p=1.0,
            stream=True
        )

        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content

    except Exception as err:
        yield f"Error: {str(err)}"
        raise HTTPException(status_code=500, detail=f"AI generation failed: {str(err)}")

@app.post("/generate", summary="Generate AI response", response_description="Streaming AI response")
async def generate_response(
    model: str = Query("default-model", description="The AI model to use"),
    prompt: Optional[str] = Query(None, description="The input text prompt for the AI"),
    request: Optional[GenerateRequest] = None
):
    """
    Generate a streaming AI response based on the provided prompt and model.
    
    - **model**: The AI model to use (specified as a query parameter, defaults to default-model)
    - **prompt**: The input text prompt for the AI (can be in query parameter or request body)
    """
    # Determine prompt source: query parameter or request body
    final_prompt = prompt if prompt is not None else (request.prompt if request is not None else None)
    
    if not final_prompt or not final_prompt.strip():
        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
    
    if not model or not model.strip():
        raise HTTPException(status_code=400, detail="Model cannot be empty")
    
    return StreamingResponse(
        generate_ai_response(final_prompt, model),
        media_type="text/event-stream"
    )

def get_app():
    return app