aiapp / main.py
abdullahalioo's picture
Update main.py
1eaf71e verified
raw
history blame
1.96 kB
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
import httpx
import asyncio
import json
# FastAPI app
app = FastAPI()
# CORS Middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Request body model
class Question(BaseModel):
question: str
# Your OWN Hosted HuggingFace Space URL
YOUR_SPACE_URL = "https://abdullahalioo-aiapp.hf.space" # πŸ”₯ change this!
async def generate_response_chunks(prompt: str):
payload = {
"messages": [
{"role": "system", "content": "You are an Orion AI assistant created by Abdullah Ali who is very intelligent, 13 years old, and lives in Lahore."},
{"role": "user", "content": prompt}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": True # Tell your server to stream output
}
async with httpx.AsyncClient(timeout=None) as client:
async with client.stream("POST", f"{YOUR_SPACE_URL}/v1/chat/completions", json=payload) as response:
async for line in response.aiter_lines():
if line.strip():
try:
# The server sends stream chunks, decode them
data = json.loads(line)
content = data['choices'][0]['delta']['content']
if content:
for letter in content:
yield letter
await asyncio.sleep(0.01) # simulate typing
except Exception as e:
yield f"Error decoding stream: {e}"
@app.post("/ask")
async def ask(question: Question):
return StreamingResponse(
generate_response_chunks(question.question),
media_type="text/plain"
)