File size: 8,128 Bytes
8bccf80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
import gradio as gr
import cloudpickle
import codecs
import string
from bnltk.tokenize import Tokenizers
# Global variables to store loaded models and components
model = None
tfidf_vectorizer = None
tokenizer = None
bangla_stopwords = None
punctuation_marks = None
def load_models_and_components():
"""Load the saved model, vectorizer, and preprocessing components"""
global model, tfidf_vectorizer, tokenizer, bangla_stopwords, punctuation_marks
try:
# Load the SVM Optimized model
with open('model.pkl', 'rb') as f:
model = cloudpickle.load(f)
# Load the TF-IDF Vectorizer
with open('tfidf_VECt.pkl', 'rb') as f:
tfidf_vectorizer = cloudpickle.load(f)
# Initialize tokenizer
tokenizer = Tokenizers()
# Load stopwords
stopwords_list = "stopwords.txt"
bangla_stopwords = codecs.open(stopwords_list, 'r', encoding='utf-8').read().split()
# Define punctuation marks
punctuation_marks = set(string.punctuation)
return "Models and components loaded successfully!"
except Exception as e:
return f"Error loading models: {str(e)}"
def preprocess_text(text):
"""Preprocess the input text similar to training data preprocessing"""
# Tokenize the sentence
words = tokenizer.bn_word_tokenizer(text)
# Exclude punctuation marks
words_no_punct = [word for word in words if word not in punctuation_marks]
# Exclude stopwords
words_clean = [word for word in words_no_punct if word not in bangla_stopwords]
# Join words back into a string
return ' '.join(words_clean)
def predict_sentiment(input_text):
"""Predict sentiment for the input text"""
if not input_text.strip():
return "Please enter some text to analyze.", ""
if model is None or tfidf_vectorizer is None:
return "Models not loaded. Please load models first.", ""
try:
# Preprocess the input text
processed_text = preprocess_text(input_text)
if not processed_text.strip():
return "After preprocessing, no valid words found. Please try different text.", ""
# Transform using the loaded TF-IDF vectorizer
transformed_input = tfidf_vectorizer.transform([processed_text])
# Predict using the loaded model
prediction = model.predict(transformed_input)[0]
# Get prediction probability for confidence score
prediction_proba = model.predict_proba(transformed_input)[0]
confidence = max(prediction_proba) * 100
# Determine sentiment
sentiment = "Positive 😊" if prediction == 1 else "Negative 😞"
# Create detailed result
result = f"**Sentiment:** {sentiment}\n**Confidence:** {confidence:.2f}%"
# Additional info
details = f"**Processed Text:** {processed_text}\n**Raw Prediction:** {prediction}\n**Probabilities:** Negative: {prediction_proba[0]:.3f}, Positive: {prediction_proba[1]:.3f}"
return result, details
except Exception as e:
return f"Error during prediction: {str(e)}", ""
def create_gradio_interface():
"""Create and configure the Gradio interface"""
# Custom CSS for better styling
css = """
.gradio-container {
font-family: 'Arial', sans-serif;
}
.main-header {
text-align: center;
color: #2d3748;
margin-bottom: 20px;
}
.prediction-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 15px;
border-radius: 10px;
margin: 10px 0;
}
"""
with gr.Blocks(css=css, title="Bengali Sentiment Analysis") as demo:
gr.HTML("""
<div class="main-header">
<h1>🇧🇩 Bengali Sentiment Analysis</h1>
<p>Analyze the sentiment of Bengali text using machine learning</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Input section
gr.Markdown("### 📝 Enter Bengali Text")
input_text = gr.Textbox(
label="Bengali Text",
placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bengali text here...)",
lines=4,
max_lines=8
)
with gr.Row():
predict_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
# Load models button
gr.Markdown("### ⚙️ Model Management")
load_btn = gr.Button("📥 Load Models", variant="secondary")
load_status = gr.Textbox(label="Load Status", interactive=False)
with gr.Column(scale=2):
# Output section
gr.Markdown("### 📊 Results")
output_sentiment = gr.Markdown(label="Sentiment Analysis Result")
output_details = gr.Textbox(
label="Analysis Details",
lines=6,
interactive=False
)
# Examples section
gr.Markdown("### 💡 Example Texts to Try")
gr.Examples(
examples=[
["এই পণ্যটি অসাধারণ! আমি খুবই সন্তুষ্ট।"],
["এই পণ্যটি কাজ করছে না। খুবই খারাপ।"],
["দারুণ সার্ভিস! দ্রুত ডেলিভারি পেয়েছি।"],
["প্রোডাক্ট কোয়ালিটি ভালো না। টাকার অপচয়।"],
["চমৎকার অভিজ্ঞতা! আবার কিনব।"]
],
inputs=[input_text],
label="Click on any example to try it"
)
# Event handlers
predict_btn.click(
fn=predict_sentiment,
inputs=[input_text],
outputs=[output_sentiment, output_details]
)
clear_btn.click(
fn=lambda: ("", "", ""),
outputs=[input_text, output_sentiment, output_details]
)
load_btn.click(
fn=load_models_and_components,
outputs=[load_status]
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
<p><strong>Bengali Sentiment Analysis App</strong></p>
<p>Powered by SVM with TF-IDF features | Built with Gradio</p>
<p><em>Load the models first, then enter Bengali text to analyze sentiment</em></p>
</div>
""")
return demo
def main():
"""Main function to run the Gradio app"""
print("Starting Bengali Sentiment Analysis App...")
print("Make sure you have the following files in the specified paths:")
print("- model.pkl")
print("- tfidf_VECt.pkl")
print("- stopwords.txt")
# Create and launch the interface
demo = create_gradio_interface()
# Launch the app
demo.launch(
share=True, # Creates a public link
inbrowser=True, # Opens in browser automatically
server_name="0.0.0.0", # Makes it accessible from any IP
server_port=7860, # Port number
show_error=True # Shows detailed error messages
)
if __name__ == "__main__":
# Install required packages if not already installed
try:
import gradio
except ImportError:
print("Installing Gradio...")
import subprocess
subprocess.check_call(["pip", "install", "gradio"])
main() |