preston-cell's picture
Update app.py
3ce024b verified
raw
history blame
1.23 kB
import gradio as gr
from transformers import pipeline
# Load image-to-text model
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
def process_image(input_image):
try:
# Step 1: Generate caption
caption = captioner(input_image)[0]['generated_text']
return caption
except Exception as e:
return str(e)
# Set up Gradio app
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# SeeSay - Powered by Sesame CSM")
gr.Markdown("This Space extracts captions from images and generates expressive speech using CSM.")
gr.Markdown("Sign in with your Hugging Face account to access the model.")
button = gr.LoginButton("Sign in")
# Image Upload and Caption Generation
image_input = gr.Image(type="pil", label="Upload Image")
caption_output = gr.Textbox(label="Generated Caption")
# Speech Generation using CSM
with gr.Row():
gr.Markdown("### Speech Generation")
gr.load("models/sesame/csm-1b", accept_token=button, provider="hf-inference")
# Link input and output
image_input.change(fn=process_image, inputs=image_input, outputs=caption_output)
demo.launch()