đ§đŠ Bengali Sentiment Analysis
Analyze the sentiment of Bengali text using machine learning
import gradio as gr import cloudpickle import codecs import string from bnltk.tokenize import Tokenizers # Global variables to store loaded models and components model = None tfidf_vectorizer = None tokenizer = None bangla_stopwords = None punctuation_marks = None def load_models_and_components(): """Load the saved model, vectorizer, and preprocessing components""" global model, tfidf_vectorizer, tokenizer, bangla_stopwords, punctuation_marks try: # Load the SVM Optimized model with open('model.pkl', 'rb') as f: model = cloudpickle.load(f) # Load the TF-IDF Vectorizer with open('tfidf_VECt.pkl', 'rb') as f: tfidf_vectorizer = cloudpickle.load(f) # Initialize tokenizer tokenizer = Tokenizers() # Load stopwords stopwords_list = "stopwords.txt" bangla_stopwords = codecs.open(stopwords_list, 'r', encoding='utf-8').read().split() # Define punctuation marks punctuation_marks = set(string.punctuation) return "Models and components loaded successfully!" except Exception as e: return f"Error loading models: {str(e)}" def preprocess_text(text): """Preprocess the input text similar to training data preprocessing""" # Tokenize the sentence words = tokenizer.bn_word_tokenizer(text) # Exclude punctuation marks words_no_punct = [word for word in words if word not in punctuation_marks] # Exclude stopwords words_clean = [word for word in words_no_punct if word not in bangla_stopwords] # Join words back into a string return ' '.join(words_clean) def predict_sentiment(input_text): """Predict sentiment for the input text""" if not input_text.strip(): return "Please enter some text to analyze.", "" if model is None or tfidf_vectorizer is None: return "Models not loaded. Please load models first.", "" try: # Preprocess the input text processed_text = preprocess_text(input_text) if not processed_text.strip(): return "After preprocessing, no valid words found. Please try different text.", "" # Transform using the loaded TF-IDF vectorizer transformed_input = tfidf_vectorizer.transform([processed_text]) # Predict using the loaded model prediction = model.predict(transformed_input)[0] # Get prediction probability for confidence score prediction_proba = model.predict_proba(transformed_input)[0] confidence = max(prediction_proba) * 100 # Determine sentiment sentiment = "Positive đ" if prediction == 1 else "Negative đ" # Create detailed result result = f"**Sentiment:** {sentiment}\n**Confidence:** {confidence:.2f}%" # Additional info details = f"**Processed Text:** {processed_text}\n**Raw Prediction:** {prediction}\n**Probabilities:** Negative: {prediction_proba[0]:.3f}, Positive: {prediction_proba[1]:.3f}" return result, details except Exception as e: return f"Error during prediction: {str(e)}", "" def create_gradio_interface(): """Create and configure the Gradio interface""" # Custom CSS for better styling css = """ .gradio-container { font-family: 'Arial', sans-serif; } .main-header { text-align: center; color: #2d3748; margin-bottom: 20px; } .prediction-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 15px; border-radius: 10px; margin: 10px 0; } """ with gr.Blocks(css=css, title="Bengali Sentiment Analysis") as demo: gr.HTML("""
Analyze the sentiment of Bengali text using machine learning
Bengali Sentiment Analysis App
Powered by SVM with TF-IDF features | Built with Gradio
Load the models first, then enter Bengali text to analyze sentiment