from fastapi import FastAPI from pydantic import BaseModel from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse import httpx import asyncio import json # FastAPI app app = FastAPI() # CORS Middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Request body model class Question(BaseModel): question: str # Your OWN Hosted HuggingFace Space URL YOUR_SPACE_URL = "https://abdullahalioo-aiapp.hf.space" # 🔥 change this! async def generate_response_chunks(prompt: str): payload = { "messages": [ {"role": "system", "content": "You are an Orion AI assistant created by Abdullah Ali who is very intelligent, 13 years old, and lives in Lahore."}, {"role": "user", "content": prompt} ], "temperature": 0.7, "max_tokens": 512, "stream": True # Tell your server to stream output } async with httpx.AsyncClient(timeout=None) as client: async with client.stream("POST", f"{YOUR_SPACE_URL}/v1/chat/completions", json=payload) as response: async for line in response.aiter_lines(): if line.strip(): try: # The server sends stream chunks, decode them data = json.loads(line) content = data['choices'][0]['delta']['content'] if content: for letter in content: yield letter await asyncio.sleep(0.01) # simulate typing except Exception as e: yield f"Error decoding stream: {e}" @app.post("/ask") async def ask(question: Question): return StreamingResponse( generate_response_chunks(question.question), media_type="text/plain" )