Spaces:

Fiqa
/

StyleSync

Runtime error

File size: 1,924 Bytes

import os
import requests
from PIL import Image
import torch
import gradio as gr
from huggingface_hub import login
from transformers import AutoProcessor, AutoModelForCausalLM
from diffusers import DiffusionPipeline

# Hugging Face token setup
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
    raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)

# Initialize Stable Diffusion pipeline
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")

# Initialize captioning model and processor
caption_model_name = "pretrained-caption-model"  # Replace with the actual model name
processor = AutoProcessor.from_pretrained(caption_model_name)
model = AutoModelForCausalLM.from_pretrained(caption_model_name)

# Check for GPU availability (handled automatically by Hugging Face Spaces)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(device)
model.to(device)

# Function to process the image and generate caption and design
@spaces.GPU
def generate_caption_and_design(image):
    # Generate caption
    inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)

    # Generate design based on caption
    generated_image = pipe(caption).images[0]
    
    return caption, generated_image

# Gradio Interface
interface = gr.Interface(
    fn=generate_caption_and_design,
    inputs=gr.Image(type="pil", label="Upload an Image"),
    outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
    title="Image Caption and Design Generator",
    description="Upload an image or provide an image URL to generate a caption and use it to create a similar design.",
)

# Launch Gradio app
interface.launch()