import os from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse from openai import AsyncOpenAI import asyncio app = FastAPI() # Initialize OpenAI client once when the app starts client = AsyncOpenAI( api_key=os.getenv("OPENAI_API_KEY") # Get API key from environment variable ) async def generate_ai_response(prompt: str): try: # Create streaming chat completion stream = await client.chat.completions.create( model="gpt-3.5-turbo", # or "gpt-3.5-turbo" for faster/cheaper responses messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], temperature=0.7, # Slightly less random than 1.0 for better coherence top_p=1.0, stream=True ) # Process the stream async for chunk in stream: if chunk.choices and len(chunk.choices) > 0: content = chunk.choices[0].delta.content or "" yield content except Exception as err: yield f"Error generating response: {str(err)}" @app.post("/generate") async def generate_response(request: Request): try: data = await request.json() prompt = data.get("prompt", "") # Empty string if no prompt provided if not prompt: return {"error": "No prompt provided"}, 400 return StreamingResponse( generate_ai_response(prompt), media_type="text/event-stream" ) except Exception as e: return {"error": f"Invalid request: {str(e)}"}, 400