🇧🇩 Bengali Sentiment Analysis

import gradio as gr
import cloudpickle
import codecs
import string
from bnltk.tokenize import Tokenizers

# Global variables to store loaded models and components
model = None
tfidf_vectorizer = None
tokenizer = None
bangla_stopwords = None
punctuation_marks = None

def load_models_and_components():
    """Load the saved model, vectorizer, and preprocessing components"""
    global model, tfidf_vectorizer, tokenizer, bangla_stopwords, punctuation_marks
    
    try:
        # Load the SVM Optimized model
        with open('model.pkl', 'rb') as f:
            model = cloudpickle.load(f)
        
        # Load the TF-IDF Vectorizer
        with open('tfidf_VECt.pkl', 'rb') as f:
            tfidf_vectorizer = cloudpickle.load(f)
        
        # Initialize tokenizer
        tokenizer = Tokenizers()
        
        # Load stopwords
        stopwords_list = "stopwords.txt"
        bangla_stopwords = codecs.open(stopwords_list, 'r', encoding='utf-8').read().split()
        
        # Define punctuation marks
        punctuation_marks = set(string.punctuation)
        
        return "Models and components loaded successfully!"
        
    except Exception as e:
        return f"Error loading models: {str(e)}"

def preprocess_text(text):
    """Preprocess the input text similar to training data preprocessing"""
    # Tokenize the sentence
    words = tokenizer.bn_word_tokenizer(text)
    
    # Exclude punctuation marks
    words_no_punct = [word for word in words if word not in punctuation_marks]
    
    # Exclude stopwords
    words_clean = [word for word in words_no_punct if word not in bangla_stopwords]
    
    # Join words back into a string
    return ' '.join(words_clean)

def predict_sentiment(input_text):
    """Predict sentiment for the input text"""
    if not input_text.strip():
        return "Please enter some text to analyze.", ""
    
    if model is None or tfidf_vectorizer is None:
        return "Models not loaded. Please load models first.", ""
    
    try:
        # Preprocess the input text
        processed_text = preprocess_text(input_text)
        
        if not processed_text.strip():
            return "After preprocessing, no valid words found. Please try different text.", ""
        
        # Transform using the loaded TF-IDF vectorizer
        transformed_input = tfidf_vectorizer.transform([processed_text])
        
        # Predict using the loaded model
        prediction = model.predict(transformed_input)[0]
        
        # Get prediction probability for confidence score
        prediction_proba = model.predict_proba(transformed_input)[0]
        confidence = max(prediction_proba) * 100
        
        # Determine sentiment
        sentiment = "Positive 😊" if prediction == 1 else "Negative 😞"
        
        # Create detailed result
        result = f"**Sentiment:** {sentiment}\n**Confidence:** {confidence:.2f}%"
        
        # Additional info
        details = f"**Processed Text:** {processed_text}\n**Raw Prediction:** {prediction}\n**Probabilities:** Negative: {prediction_proba[0]:.3f}, Positive: {prediction_proba[1]:.3f}"
        
        return result, details
        
    except Exception as e:
        return f"Error during prediction: {str(e)}", ""

def create_gradio_interface():
    """Create and configure the Gradio interface"""
    
    # Custom CSS for better styling
    css = """
    .gradio-container {
        font-family: 'Arial', sans-serif;
    }
    .main-header {
        text-align: center;
        color: #2d3748;
        margin-bottom: 20px;
    }
    .prediction-box {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 15px;
        border-radius: 10px;
        margin: 10px 0;
    }
    """
    
    with gr.Blocks(css=css, title="Bengali Sentiment Analysis") as demo:
        gr.HTML("""
        <div class="main-header">
            <h1>🇧🇩 Bengali Sentiment Analysis</h1>
            <p>Analyze the sentiment of Bengali text using machine learning</p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                # Input section
                gr.Markdown("### 📝 Enter Bengali Text")
                input_text = gr.Textbox(
                    label="Bengali Text",
                    placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bengali text here...)",
                    lines=4,
                    max_lines=8
                )
                
                with gr.Row():
                    predict_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                
                # Load models button
                gr.Markdown("### ⚙️ Model Management")
                load_btn = gr.Button("📥 Load Models", variant="secondary")
                load_status = gr.Textbox(label="Load Status", interactive=False)
            
            with gr.Column(scale=2):
                # Output section
                gr.Markdown("### 📊 Results")
                output_sentiment = gr.Markdown(label="Sentiment Analysis Result")
                output_details = gr.Textbox(
                    label="Analysis Details",
                    lines=6,
                    interactive=False
                )
        
        # Examples section
        gr.Markdown("### 💡 Example Texts to Try")
        gr.Examples(
            examples=[
                ["এই পণ্যটি অসাধারণ! আমি খুবই সন্তুষ্ট।"],
                ["এই পণ্যটি কাজ করছে না। খুবই খারাপ।"],
                ["দারুণ সার্ভিস! দ্রুত ডেলিভারি পেয়েছি।"],
                ["প্রোডাক্ট কোয়ালিটি ভালো না। টাকার অপচয়।"],
                ["চমৎকার অভিজ্ঞতা! আবার কিনব।"]
            ],
            inputs=[input_text],
            label="Click on any example to try it"
        )
        
        # Event handlers
        predict_btn.click(
            fn=predict_sentiment,
            inputs=[input_text],
            outputs=[output_sentiment, output_details]
        )
        
        clear_btn.click(
            fn=lambda: ("", "", ""),
            outputs=[input_text, output_sentiment, output_details]
        )
        
        load_btn.click(
            fn=load_models_and_components,
            outputs=[load_status]
        )
        
        # Footer
        gr.HTML("""
        <div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
            <p><strong>Bengali Sentiment Analysis App</strong></p>
            <p>Powered by SVM with TF-IDF features | Built with Gradio</p>
            <p><em>Load the models first, then enter Bengali text to analyze sentiment</em></p>
        </div>
        """)
    
    return demo

def main():
    """Main function to run the Gradio app"""
    print("Starting Bengali Sentiment Analysis App...")
    print("Make sure you have the following files in the specified paths:")
    print("- model.pkl")
    print("- tfidf_VECt.pkl") 
    print("- stopwords.txt")
    
    # Create and launch the interface
    demo = create_gradio_interface()
    
    # Launch the app
    demo.launch(
        share=True,  # Creates a public link
        inbrowser=True,  # Opens in browser automatically
        server_name="0.0.0.0",  # Makes it accessible from any IP
        server_port=7860,  # Port number
        show_error=True  # Shows detailed error messages
    )

if __name__ == "__main__":
    # Install required packages if not already installed
    try:
        import gradio
    except ImportError:
        print("Installing Gradio...")
        import subprocess
        subprocess.check_call(["pip", "install", "gradio"])
    
    main()