StyleSync / app.py
Fiqa's picture
Update app.py
ea2971c verified
raw
history blame
1.92 kB
import os
import requests
from PIL import Image
import torch
import gradio as gr
from huggingface_hub import login
from transformers import AutoProcessor, AutoModelForCausalLM
from diffusers import DiffusionPipeline
# Hugging Face token setup
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)
# Initialize Stable Diffusion pipeline
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
# Initialize captioning model and processor
caption_model_name = "pretrained-caption-model" # Replace with the actual model name
processor = AutoProcessor.from_pretrained(caption_model_name)
model = AutoModelForCausalLM.from_pretrained(caption_model_name)
# Check for GPU availability (handled automatically by Hugging Face Spaces)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(device)
model.to(device)
# Function to process the image and generate caption and design
@spaces.GPU
def generate_caption_and_design(image):
# Generate caption
inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
inputs = {key: val.to(device) for key, val in inputs.items()}
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
# Generate design based on caption
generated_image = pipe(caption).images[0]
return caption, generated_image
# Gradio Interface
interface = gr.Interface(
fn=generate_caption_and_design,
inputs=gr.Image(type="pil", label="Upload an Image"),
outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
title="Image Caption and Design Generator",
description="Upload an image or provide an image URL to generate a caption and use it to create a similar design.",
)
# Launch Gradio app
interface.launch()