Spaces:

preston-cell
/

image-text-to-text

Sleeping

image-text-to-text / app.py

Update app.py

3ce024b verified 5 months ago

1.23 kB

	import gradio as gr
	from transformers import pipeline

	# Load image-to-text model
	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	def process_image(input_image):
	try:
	# Step 1: Generate caption
	caption = captioner(input_image)[0]['generated_text']
	return caption
	except Exception as e:
	return str(e)

	# Set up Gradio app
	with gr.Blocks(fill_height=True) as demo:
	with gr.Sidebar():
	gr.Markdown("# SeeSay - Powered by Sesame CSM")
	gr.Markdown("This Space extracts captions from images and generates expressive speech using CSM.")
	gr.Markdown("Sign in with your Hugging Face account to access the model.")
	button = gr.LoginButton("Sign in")

	# Image Upload and Caption Generation
	image_input = gr.Image(type="pil", label="Upload Image")
	caption_output = gr.Textbox(label="Generated Caption")

	# Speech Generation using CSM
	with gr.Row():
	gr.Markdown("### Speech Generation")
	gr.load("models/sesame/csm-1b", accept_token=button, provider="hf-inference")

	# Link input and output
	image_input.change(fn=process_image, inputs=image_input, outputs=caption_output)

	demo.launch()