Spaces:

abdullahalioo
/

aiapp

Sleeping

File size: 1,964 Bytes

2c97dd8
d6be5f7
 
 
2c97dd8
d6be5f7
2c97dd8
d6be5f7
 
 
 
2c97dd8
d6be5f7
 
2c97dd8
d6be5f7
 
 
 
 
 
 
 
 
2c97dd8
1eaf71e
d6be5f7
 
2c97dd8
 
1eaf71e
d6be5f7
2c97dd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6be5f7
 
 
 
 
 
a6a8da7

from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
import httpx
import asyncio
import json

# FastAPI app
app = FastAPI()

# CORS Middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request body model
class Question(BaseModel):
    question: str

# Your OWN Hosted HuggingFace Space URL
YOUR_SPACE_URL = "https://abdullahalioo-aiapp.hf.space"  # 🔥 change this!

async def generate_response_chunks(prompt: str):
    payload = {
        "messages": [
            {"role": "system", "content": "You are an Orion AI assistant created by Abdullah Ali who is very intelligent, 13 years old, and lives in Lahore."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.7,
        "max_tokens": 512,
        "stream": True  # Tell your server to stream output
    }

    async with httpx.AsyncClient(timeout=None) as client:
        async with client.stream("POST", f"{YOUR_SPACE_URL}/v1/chat/completions", json=payload) as response:
            async for line in response.aiter_lines():
                if line.strip():
                    try:
                        # The server sends stream chunks, decode them
                        data = json.loads(line)
                        content = data['choices'][0]['delta']['content']
                        if content:
                            for letter in content:
                                yield letter
                                await asyncio.sleep(0.01)  # simulate typing
                    except Exception as e:
                        yield f"Error decoding stream: {e}"

@app.post("/ask")
async def ask(question: Question):
    return StreamingResponse(
        generate_response_chunks(question.question),
        media_type="text/plain"
    )