import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image as PILImage

# Load the image captioning model and tokenizer
caption_model_name = "Salesforce/blip-image-captioning-large"
caption_processor = BlipProcessor.from_pretrained(caption_model_name)
caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name)

# Load the emotion analysis model
emotion_model_name = "SamLowe/roberta-base-go_emotions"
emotion_classifier = pipeline("text-classification", model=emotion_model_name, return_all_scores=True)

def generate_caption_and_analyze_emotions(image=None, text=None):
    try:
        if image is not None:
            # Preprocess the image for caption generation
            caption_inputs = caption_processor(images=image, return_tensors="pt")

            # Generate caption using the caption model
            caption_ids = caption_model.generate(**caption_inputs)

            # Decode the output caption
            decoded_caption = caption_processor.decode(caption_ids[0], skip_special_tokens=True)
        else:
            decoded_caption = text
        
        # Perform emotion analysis on the generated caption or provided text
        results = emotion_classifier(decoded_caption)
        
        # Prepare data for visualization
        labels = [result['label'] for result in results[0]]
        scores = [result['score'] for result in results[0]]

        # Plot the emotion visualization
        plt.figure(figsize=(10, 5))
        plt.bar(labels, scores, color='skyblue')
        plt.xlabel('Emotions')
        plt.ylabel('Scores')
        plt.title('Emotion Analysis')
        plt.xticks(rotation=45)
        plt.tight_layout()
        
        # Save the plot as an image
        plt_path = "emotion_visualization.png"
        plt.savefig(plt_path)
        plt.close()

        # Load the saved image for Gradio
        vis_image = PILImage.open(plt_path)

        sentiment_label = results[0][0]['label']
        if sentiment_label == 'neutral':
            sentiment_text = "Sentiment of the text is"
        else:
            sentiment_text = "Sentiment of the text shows"

        caption_output = f"Caption: '{decoded_caption}'"
        sentiment_output = f"{sentiment_text} {sentiment_label}."
        
        return caption_output, sentiment_output, vis_image
    except Exception as e:
        return f"An error occurred: {e}", "", None

# Define the Gradio interface using the new API
image_input = gr.Image(label="Upload an image")
text_input = gr.Textbox(label="Or enter text", lines=2)

outputs = [gr.Textbox(label="Generated Caption"), gr.Textbox(label="Sentiment Analysis"), gr.Image(label="Emotion Visualization")]

# Create the Gradio app
app = gr.Interface(fn=generate_caption_and_analyze_emotions, inputs=[image_input, text_input], outputs=outputs)

# Launch the Gradio app
if __name__ == "__main__":
    app.launch()