import io # 1. For byte stream handling (file uploads) from fastapi import FastAPI, File, UploadFile, Form # 2. FastAPI imports for API endpoints and file handling from fastapi.responses import JSONResponse # 3. Used to return errors as JSON # from transformers import BlipProcessor, BlipForConditionalGeneration # 4. BLIP for image captioning from PIL import Image # 5. Pillow for image processing import openai # 6. OpenAI library for DALL·E API calls import os # 7. OS for environment variables from face_to_prompt import extract_face_prompt from desc import describe_image_with_gpt4o # 8. Create the FastAPI app app = FastAPI() # 9. Load BLIP processor and model at startup to avoid reloading on every request # processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") # model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # 10. Get the OpenAI API key from environment variable OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") # 11. Define the /generate POST endpoint @app.post("/generate") async def generate( image: UploadFile = File(...), # 12. The uploaded image file style: str = Form("chibi"), # 13. The desired style (chibi/anime/cartoon), defaults to "chibi" ): # 14. Load and convert the uploaded image to RGB # img_bytes = await image.read() # img = Image.open(io.BytesIO(img_bytes)).convert("RGB") # # 15. Caption the image using BLIP # inputs = processor(img, return_tensors="pt") # out = model.generate(**inputs) # caption = processor.decode(out[0], skip_special_tokens=True) with open("/tmp/temp_input.jpg", "wb") as f: f.write(await image.read()) # caption = extract_face_prompt("/tmp/temp_input.jpg") caption = describe_image_with_gpt4o("/tmp/temp_input.jpg",OPENAI_API_KEY) # 16. Construct the DALL·E prompt using the style and the caption prompt = ( f"A set of twelve {style}-style digital stickers of {caption}, " "each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. " "Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style." ) # 17. Set the OpenAI API key openai.api_key = OPENAI_API_KEY try: # 18. Call DALL·E 3 to generate the image response = openai.images.generate( model="dall-e-3", prompt=prompt, n=1, size="1024x1024" ) image_url = response.data[0].url # 19. Get the image URL from the response except Exception as e: import traceback print("Error in /generate:", traceback.format_exc()) # 20. Return a JSON error message if the API call fails return JSONResponse(content={"error": str(e)}, status_code=500) # 21. Return the BLIP caption, the constructed prompt, and the generated image URL return {"caption": caption, "prompt": prompt, "image_url": image_url} @app.get("/latest") def get_latest(): if last_image_data["image_url"]: return last_image_data else: return {"error": "No image generated yet."} @app.get("/") def root(): return {"status": "ok"}