Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import StreamingResponse | |
import httpx | |
import asyncio | |
import json | |
# FastAPI app | |
app = FastAPI() | |
# CORS Middleware | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Request body model | |
class Question(BaseModel): | |
question: str | |
# Your OWN Hosted HuggingFace Space URL | |
YOUR_SPACE_URL = "https://abdullahalioo-aiapp.hf.space" # π₯ change this! | |
async def generate_response_chunks(prompt: str): | |
payload = { | |
"messages": [ | |
{"role": "system", "content": "You are an Orion AI assistant created by Abdullah Ali who is very intelligent, 13 years old, and lives in Lahore."}, | |
{"role": "user", "content": prompt} | |
], | |
"temperature": 0.7, | |
"max_tokens": 512, | |
"stream": True # Tell your server to stream output | |
} | |
async with httpx.AsyncClient(timeout=None) as client: | |
async with client.stream("POST", f"{YOUR_SPACE_URL}/v1/chat/completions", json=payload) as response: | |
async for line in response.aiter_lines(): | |
if line.strip(): | |
try: | |
# The server sends stream chunks, decode them | |
data = json.loads(line) | |
content = data['choices'][0]['delta']['content'] | |
if content: | |
for letter in content: | |
yield letter | |
await asyncio.sleep(0.01) # simulate typing | |
except Exception as e: | |
yield f"Error decoding stream: {e}" | |
async def ask(question: Question): | |
return StreamingResponse( | |
generate_response_chunks(question.question), | |
media_type="text/plain" | |
) | |