StickerGen / app.py
KarthikAI's picture
Upload 4 files
54e8ea7 verified
raw
history blame
2.65 kB
import io # 1. For byte stream handling (file uploads)
from fastapi import FastAPI, File, UploadFile, Form # 2. FastAPI imports for API endpoints and file handling
from fastapi.responses import JSONResponse # 3. Used to return errors as JSON
from transformers import BlipProcessor, BlipForConditionalGeneration # 4. BLIP for image captioning
from PIL import Image # 5. Pillow for image processing
import openai # 6. OpenAI library for DALL路E API calls
import os # 7. OS for environment variables
# 8. Create the FastAPI app
app = FastAPI()
# 9. Load BLIP processor and model at startup to avoid reloading on every request
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# 10. Get the OpenAI API key from environment variable
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# 11. Define the /generate POST endpoint
@app.post("/generate")
async def generate(
image: UploadFile = File(...), # 12. The uploaded image file
style: str = Form("chibi"), # 13. The desired style (chibi/anime/cartoon), defaults to "chibi"
):
# 14. Load and convert the uploaded image to RGB
img_bytes = await image.read()
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
# 15. Caption the image using BLIP
inputs = processor(img, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
# 16. Construct the DALL路E prompt using the style and the caption
prompt = (
f"A set of twelve {style}-style digital stickers of {caption}, "
"each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. "
"Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style."
)
# 17. Set the OpenAI API key
openai.api_key = OPENAI_API_KEY
try:
# 18. Call DALL路E 3 to generate the image
response = openai.images.generate(
model="dall-e-3",
prompt=prompt,
n=1,
size="1024x1024"
)
image_url = response.data[0].url # 19. Get the image URL from the response
except Exception as e:
# 20. Return a JSON error message if the API call fails
return JSONResponse(content={"error": str(e)}, status_code=500)
# 21. Return the BLIP caption, the constructed prompt, and the generated image URL
return {"caption": caption, "prompt": prompt, "image_url": image_url}