KarthikAI commited on
Commit
54e8ea7
·
verified ·
1 Parent(s): 90821c3

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +7 -0
  2. README.md +39 -10
  3. app.py +57 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Minimal Dockerfile for Hugging Face Spaces or local deployment
2
+ FROM python:3.10-slim
3
+ WORKDIR /code
4
+ COPY . .
5
+ RUN pip install --upgrade pip && pip install -r requirements.txt
6
+ EXPOSE 7860
7
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,39 @@
1
- ---
2
- title: StickerGen
3
- emoji: 😻
4
- colorFrom: indigo
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Space: Image to Chibi/Anime/Cartoon Sticker Generator
2
+
3
+ This Space provides an API endpoint `/generate` that:
4
+ 1. Accepts an image and a style (chibi, anime, cartoon, etc.).
5
+ 2. Captions the image using BLIP.
6
+ 3. Constructs a DALL·E 3 prompt and calls OpenAI's API to generate a set of stickers.
7
+ 4. Returns the generated sticker image URL.
8
+
9
+ ## Flow (Numbered)
10
+ 1. **User uploads an image** (`image`) and specifies a sticker style (`style`).
11
+ 2. **App generates a caption** using BLIP (image captioning).
12
+ 3. **App constructs a DALL·E 3 prompt** combining the style and generated caption.
13
+ 4. **App sends the prompt to OpenAI DALL·E 3** and retrieves a sticker image URL.
14
+ 5. **App returns a JSON** containing the caption, prompt, and image URL.
15
+
16
+ ## Usage
17
+
18
+ ### POST `/generate`
19
+ - `image`: (file, required) — Input image (JPEG/PNG).
20
+ - `style`: (text, optional, default: "chibi") — Sticker style ("chibi", "anime", "cartoon", etc.).
21
+
22
+ #### Response
23
+ ```json
24
+ {
25
+ "caption": "...",
26
+ "prompt": "...",
27
+ "image_url": "..."
28
+ }
29
+ ```
30
+
31
+ ### Requirements
32
+ - Hugging Face installs dependencies from `requirements.txt`
33
+ - Set your OpenAI API key as environment variable:
34
+ `OPENAI_API_KEY=sk-...`
35
+
36
+ ### Running locally
37
+ ```bash
38
+ uvicorn app:app --reload
39
+ ```
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io # 1. For byte stream handling (file uploads)
2
+ from fastapi import FastAPI, File, UploadFile, Form # 2. FastAPI imports for API endpoints and file handling
3
+ from fastapi.responses import JSONResponse # 3. Used to return errors as JSON
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration # 4. BLIP for image captioning
5
+ from PIL import Image # 5. Pillow for image processing
6
+ import openai # 6. OpenAI library for DALL·E API calls
7
+ import os # 7. OS for environment variables
8
+
9
+ # 8. Create the FastAPI app
10
+ app = FastAPI()
11
+
12
+ # 9. Load BLIP processor and model at startup to avoid reloading on every request
13
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
15
+
16
+ # 10. Get the OpenAI API key from environment variable
17
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
+
19
+ # 11. Define the /generate POST endpoint
20
+ @app.post("/generate")
21
+ async def generate(
22
+ image: UploadFile = File(...), # 12. The uploaded image file
23
+ style: str = Form("chibi"), # 13. The desired style (chibi/anime/cartoon), defaults to "chibi"
24
+ ):
25
+ # 14. Load and convert the uploaded image to RGB
26
+ img_bytes = await image.read()
27
+ img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
28
+
29
+ # 15. Caption the image using BLIP
30
+ inputs = processor(img, return_tensors="pt")
31
+ out = model.generate(**inputs)
32
+ caption = processor.decode(out[0], skip_special_tokens=True)
33
+
34
+ # 16. Construct the DALL·E prompt using the style and the caption
35
+ prompt = (
36
+ f"A set of twelve {style}-style digital stickers of {caption}, "
37
+ "each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. "
38
+ "Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style."
39
+ )
40
+
41
+ # 17. Set the OpenAI API key
42
+ openai.api_key = OPENAI_API_KEY
43
+ try:
44
+ # 18. Call DALL·E 3 to generate the image
45
+ response = openai.images.generate(
46
+ model="dall-e-3",
47
+ prompt=prompt,
48
+ n=1,
49
+ size="1024x1024"
50
+ )
51
+ image_url = response.data[0].url # 19. Get the image URL from the response
52
+ except Exception as e:
53
+ # 20. Return a JSON error message if the API call fails
54
+ return JSONResponse(content={"error": str(e)}, status_code=500)
55
+
56
+ # 21. Return the BLIP caption, the constructed prompt, and the generated image URL
57
+ return {"caption": caption, "prompt": prompt, "image_url": image_url}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Requirements for FastAPI app and AI inference
2
+ fastapi # Web API framework for Python
3
+ uvicorn # ASGI server for running FastAPI
4
+ pillow # Image processing (PIL)
5
+ transformers # Hugging Face Transformers library for BLIP model
6
+ torch # Required by BLIP for inference
7
+ openai # OpenAI API client for DALL·E 3 image generation
8
+ python-multipart # For file uploads with FastAPI