Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from PIL import Image as PILImage | |
# Load the image captioning model and tokenizer | |
caption_model_name = "Salesforce/blip-image-captioning-large" | |
caption_processor = BlipProcessor.from_pretrained(caption_model_name) | |
caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name) | |
# Load the emotion analysis model | |
emotion_model_name = "SamLowe/roberta-base-go_emotions" | |
emotion_classifier = pipeline("text-classification", model=emotion_model_name, return_all_scores=True) | |
def generate_caption_and_analyze_emotions(image=None, text=None): | |
try: | |
if image is not None: | |
# Preprocess the image for caption generation | |
caption_inputs = caption_processor(images=image, return_tensors="pt") | |
# Generate caption using the caption model | |
caption_ids = caption_model.generate(**caption_inputs) | |
# Decode the output caption | |
decoded_caption = caption_processor.decode(caption_ids[0], skip_special_tokens=True) | |
else: | |
decoded_caption = text | |
# Perform emotion analysis on the generated caption or provided text | |
results = emotion_classifier(decoded_caption) | |
# Prepare data for visualization | |
labels = [result['label'] for result in results[0]] | |
scores = [result['score'] for result in results[0]] | |
# Plot the emotion visualization | |
plt.figure(figsize=(10, 5)) | |
plt.bar(labels, scores, color='skyblue') | |
plt.xlabel('Emotions') | |
plt.ylabel('Scores') | |
plt.title('Emotion Analysis') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
# Save the plot as an image | |
plt_path = "emotion_visualization.png" | |
plt.savefig(plt_path) | |
plt.close() | |
# Load the saved image for Gradio | |
vis_image = PILImage.open(plt_path) | |
sentiment_label = results[0][0]['label'] | |
if sentiment_label == 'neutral': | |
sentiment_text = "Sentiment of the text is" | |
else: | |
sentiment_text = "Sentiment of the text shows" | |
caption_output = f"Caption: '{decoded_caption}'" | |
sentiment_output = f"{sentiment_text} {sentiment_label}." | |
return caption_output, sentiment_output, vis_image | |
except Exception as e: | |
return f"An error occurred: {e}", "", None | |
# Define the Gradio interface using the new API | |
image_input = gr.Image(label="Upload an image") | |
text_input = gr.Textbox(label="Or enter text", lines=2) | |
outputs = [gr.Textbox(label="Generated Caption"), gr.Textbox(label="Sentiment Analysis"), gr.Image(label="Emotion Visualization")] | |
# Create the Gradio app | |
app = gr.Interface(fn=generate_caption_and_analyze_emotions, inputs=[image_input, text_input], outputs=outputs) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
app.launch() | |