Spaces:
Sleeping
Sleeping
File size: 1,229 Bytes
f30b843 e363033 608498c 1129fe7 83cd235 608498c 3ce024b 608498c 3ce024b 608498c 3ce024b 83cd235 3ce024b 83cd235 3ce024b ba2d445 3ce024b 5c86456 3ce024b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import gradio as gr
from transformers import pipeline
# Load image-to-text model
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
def process_image(input_image):
try:
# Step 1: Generate caption
caption = captioner(input_image)[0]['generated_text']
return caption
except Exception as e:
return str(e)
# Set up Gradio app
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# SeeSay - Powered by Sesame CSM")
gr.Markdown("This Space extracts captions from images and generates expressive speech using CSM.")
gr.Markdown("Sign in with your Hugging Face account to access the model.")
button = gr.LoginButton("Sign in")
# Image Upload and Caption Generation
image_input = gr.Image(type="pil", label="Upload Image")
caption_output = gr.Textbox(label="Generated Caption")
# Speech Generation using CSM
with gr.Row():
gr.Markdown("### Speech Generation")
gr.load("models/sesame/csm-1b", accept_token=button, provider="hf-inference")
# Link input and output
image_input.change(fn=process_image, inputs=image_input, outputs=caption_output)
demo.launch() |