Spaces:

KarthikAI
/

StickerGen

Running

App Files Files Community

StickerGen / app.py

KarthikAI

Update app.py

7c13684 verified about 15 hours ago

raw

history blame contribute delete

3.29 kB

	import io # 1. For byte stream handling (file uploads)
	from fastapi import FastAPI, File, UploadFile, Form # 2. FastAPI imports for API endpoints and file handling
	from fastapi.responses import JSONResponse # 3. Used to return errors as JSON
	# from transformers import BlipProcessor, BlipForConditionalGeneration # 4. BLIP for image captioning
	from PIL import Image # 5. Pillow for image processing
	import openai # 6. OpenAI library for DALL·E API calls
	import os # 7. OS for environment variables
	from face_to_prompt import extract_face_prompt
	from desc import describe_image_with_gpt4o


	# 8. Create the FastAPI app
	app = FastAPI()

	# 9. Load BLIP processor and model at startup to avoid reloading on every request
	# processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# 10. Get the OpenAI API key from environment variable
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

	# 11. Define the /generate POST endpoint
	@app.post("/generate")
	async def generate(
	image: UploadFile = File(...), # 12. The uploaded image file
	style: str = Form("chibi"), # 13. The desired style (chibi/anime/cartoon), defaults to "chibi"
	):
	# 14. Load and convert the uploaded image to RGB
	# img_bytes = await image.read()
	# img = Image.open(io.BytesIO(img_bytes)).convert("RGB")

	# # 15. Caption the image using BLIP
	# inputs = processor(img, return_tensors="pt")
	# out = model.generate(**inputs)
	# caption = processor.decode(out[0], skip_special_tokens=True)

	with open("/tmp/temp_input.jpg", "wb") as f:
	f.write(await image.read())

	# caption = extract_face_prompt("/tmp/temp_input.jpg")
	caption = describe_image_with_gpt4o("/tmp/temp_input.jpg",OPENAI_API_KEY)

	# 16. Construct the DALL·E prompt using the style and the caption
	prompt = (
	f"A set of twelve {style}-style digital stickers of {caption}, "
	"each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. "
	"Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style."
	)

	# 17. Set the OpenAI API key
	openai.api_key = OPENAI_API_KEY
	try:
	# 18. Call DALL·E 3 to generate the image
	response = openai.images.generate(
	model="dall-e-3",
	prompt=prompt,
	n=1,
	size="1024x1024"
	)
	image_url = response.data[0].url # 19. Get the image URL from the response
	except Exception as e:
	import traceback
	print("Error in /generate:", traceback.format_exc())
	# 20. Return a JSON error message if the API call fails
	return JSONResponse(content={"error": str(e)}, status_code=500)

	# 21. Return the BLIP caption, the constructed prompt, and the generated image URL
	return {"caption": caption, "prompt": prompt, "image_url": image_url}

	@app.get("/latest")
	def get_latest():
	if last_image_data["image_url"]:
	return last_image_data
	else:
	return {"error": "No image generated yet."}

	@app.get("/")
	def root():
	return {"status": "ok"}