from fastapi import FastAPI, Form, Request
from fastapi.responses import HTMLResponse, StreamingResponse
import os
import json
import requests
import random
app = FastAPI()
# Load environment variables
MODEL = "gpt-4o-mini"
API_URL = os.getenv("API_URL")
DISABLED = os.getenv("DISABLED") == 'True'
OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS", "").split(",")
NUM_THREADS = int(os.getenv("NUM_THREADS", 1))
# HTML with embedded CSS and JS
HTML_CONTENT = """
GPT-4o Mini Chat
GPT-4o Mini: Research Preview
"""
@app.get("/", response_class=HTMLResponse)
async def home():
if DISABLED:
return "This app has reached OpenAI's usage limit. Please check back tomorrow.
"
return HTML_CONTENT
@app.post("/chat")
async def chat(input: str = Form(...), top_p: float = Form(1.0), temperature: float = Form(1.0)):
if DISABLED:
return StreamingResponse(iter(["Usage limit reached."]), media_type="text/plain")
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": input}],
"temperature": temperature,
"top_p": top_p,
"n": 1,
"stream": True,
"presence_penalty": 0,
"frequency_penalty": 0,
}
OPENAI_API_KEY = random.choice(OPENAI_API_KEYS)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}",
}
def stream_response():
try:
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
response.raise_for_status()
for chunk in response.iter_lines():
if chunk:
chunk_data = chunk.decode('utf-8')
if chunk_data.startswith("data: "):
chunk_json = json.loads(chunk_data[6:])
if "choices" in chunk_json and "delta" in chunk_json["choices"][0] and "content" in chunk_json["choices"][0]["delta"]:
yield chunk_json["choices"][0]["delta"]["content"]
except Exception as e:
yield f"Error: {str(e)}"
return StreamingResponse(stream_response(), media_type="text/plain")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)