shukdevdattaEX commited on
Commit
8bccf80
·
verified ·
1 Parent(s): 2562afd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -0
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cloudpickle
3
+ import codecs
4
+ import string
5
+ from bnltk.tokenize import Tokenizers
6
+
7
+ # Global variables to store loaded models and components
8
+ model = None
9
+ tfidf_vectorizer = None
10
+ tokenizer = None
11
+ bangla_stopwords = None
12
+ punctuation_marks = None
13
+
14
+ def load_models_and_components():
15
+ """Load the saved model, vectorizer, and preprocessing components"""
16
+ global model, tfidf_vectorizer, tokenizer, bangla_stopwords, punctuation_marks
17
+
18
+ try:
19
+ # Load the SVM Optimized model
20
+ with open('model.pkl', 'rb') as f:
21
+ model = cloudpickle.load(f)
22
+
23
+ # Load the TF-IDF Vectorizer
24
+ with open('tfidf_VECt.pkl', 'rb') as f:
25
+ tfidf_vectorizer = cloudpickle.load(f)
26
+
27
+ # Initialize tokenizer
28
+ tokenizer = Tokenizers()
29
+
30
+ # Load stopwords
31
+ stopwords_list = "stopwords.txt"
32
+ bangla_stopwords = codecs.open(stopwords_list, 'r', encoding='utf-8').read().split()
33
+
34
+ # Define punctuation marks
35
+ punctuation_marks = set(string.punctuation)
36
+
37
+ return "Models and components loaded successfully!"
38
+
39
+ except Exception as e:
40
+ return f"Error loading models: {str(e)}"
41
+
42
+ def preprocess_text(text):
43
+ """Preprocess the input text similar to training data preprocessing"""
44
+ # Tokenize the sentence
45
+ words = tokenizer.bn_word_tokenizer(text)
46
+
47
+ # Exclude punctuation marks
48
+ words_no_punct = [word for word in words if word not in punctuation_marks]
49
+
50
+ # Exclude stopwords
51
+ words_clean = [word for word in words_no_punct if word not in bangla_stopwords]
52
+
53
+ # Join words back into a string
54
+ return ' '.join(words_clean)
55
+
56
+ def predict_sentiment(input_text):
57
+ """Predict sentiment for the input text"""
58
+ if not input_text.strip():
59
+ return "Please enter some text to analyze.", ""
60
+
61
+ if model is None or tfidf_vectorizer is None:
62
+ return "Models not loaded. Please load models first.", ""
63
+
64
+ try:
65
+ # Preprocess the input text
66
+ processed_text = preprocess_text(input_text)
67
+
68
+ if not processed_text.strip():
69
+ return "After preprocessing, no valid words found. Please try different text.", ""
70
+
71
+ # Transform using the loaded TF-IDF vectorizer
72
+ transformed_input = tfidf_vectorizer.transform([processed_text])
73
+
74
+ # Predict using the loaded model
75
+ prediction = model.predict(transformed_input)[0]
76
+
77
+ # Get prediction probability for confidence score
78
+ prediction_proba = model.predict_proba(transformed_input)[0]
79
+ confidence = max(prediction_proba) * 100
80
+
81
+ # Determine sentiment
82
+ sentiment = "Positive 😊" if prediction == 1 else "Negative 😞"
83
+
84
+ # Create detailed result
85
+ result = f"**Sentiment:** {sentiment}\n**Confidence:** {confidence:.2f}%"
86
+
87
+ # Additional info
88
+ details = f"**Processed Text:** {processed_text}\n**Raw Prediction:** {prediction}\n**Probabilities:** Negative: {prediction_proba[0]:.3f}, Positive: {prediction_proba[1]:.3f}"
89
+
90
+ return result, details
91
+
92
+ except Exception as e:
93
+ return f"Error during prediction: {str(e)}", ""
94
+
95
+ def create_gradio_interface():
96
+ """Create and configure the Gradio interface"""
97
+
98
+ # Custom CSS for better styling
99
+ css = """
100
+ .gradio-container {
101
+ font-family: 'Arial', sans-serif;
102
+ }
103
+ .main-header {
104
+ text-align: center;
105
+ color: #2d3748;
106
+ margin-bottom: 20px;
107
+ }
108
+ .prediction-box {
109
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
110
+ color: white;
111
+ padding: 15px;
112
+ border-radius: 10px;
113
+ margin: 10px 0;
114
+ }
115
+ """
116
+
117
+ with gr.Blocks(css=css, title="Bengali Sentiment Analysis") as demo:
118
+ gr.HTML("""
119
+ <div class="main-header">
120
+ <h1>🇧🇩 Bengali Sentiment Analysis</h1>
121
+ <p>Analyze the sentiment of Bengali text using machine learning</p>
122
+ </div>
123
+ """)
124
+
125
+ with gr.Row():
126
+ with gr.Column(scale=2):
127
+ # Input section
128
+ gr.Markdown("### 📝 Enter Bengali Text")
129
+ input_text = gr.Textbox(
130
+ label="Bengali Text",
131
+ placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bengali text here...)",
132
+ lines=4,
133
+ max_lines=8
134
+ )
135
+
136
+ with gr.Row():
137
+ predict_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
138
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary")
139
+
140
+ # Load models button
141
+ gr.Markdown("### ⚙️ Model Management")
142
+ load_btn = gr.Button("📥 Load Models", variant="secondary")
143
+ load_status = gr.Textbox(label="Load Status", interactive=False)
144
+
145
+ with gr.Column(scale=2):
146
+ # Output section
147
+ gr.Markdown("### 📊 Results")
148
+ output_sentiment = gr.Markdown(label="Sentiment Analysis Result")
149
+ output_details = gr.Textbox(
150
+ label="Analysis Details",
151
+ lines=6,
152
+ interactive=False
153
+ )
154
+
155
+ # Examples section
156
+ gr.Markdown("### 💡 Example Texts to Try")
157
+ gr.Examples(
158
+ examples=[
159
+ ["এই পণ্যটি অসাধারণ! আমি খুবই সন্তুষ্ট।"],
160
+ ["এই পণ্যটি কাজ করছে না। খুবই খারাপ।"],
161
+ ["দারুণ সার্ভিস! দ্রুত ডেলিভারি পেয়েছি।"],
162
+ ["প্রোডাক্ট কোয়ালিটি ভালো না। টাকার অপচয়।"],
163
+ ["চমৎকার অভিজ্ঞতা! আবার কিনব।"]
164
+ ],
165
+ inputs=[input_text],
166
+ label="Click on any example to try it"
167
+ )
168
+
169
+ # Event handlers
170
+ predict_btn.click(
171
+ fn=predict_sentiment,
172
+ inputs=[input_text],
173
+ outputs=[output_sentiment, output_details]
174
+ )
175
+
176
+ clear_btn.click(
177
+ fn=lambda: ("", "", ""),
178
+ outputs=[input_text, output_sentiment, output_details]
179
+ )
180
+
181
+ load_btn.click(
182
+ fn=load_models_and_components,
183
+ outputs=[load_status]
184
+ )
185
+
186
+ # Footer
187
+ gr.HTML("""
188
+ <div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
189
+ <p><strong>Bengali Sentiment Analysis App</strong></p>
190
+ <p>Powered by SVM with TF-IDF features | Built with Gradio</p>
191
+ <p><em>Load the models first, then enter Bengali text to analyze sentiment</em></p>
192
+ </div>
193
+ """)
194
+
195
+ return demo
196
+
197
+ def main():
198
+ """Main function to run the Gradio app"""
199
+ print("Starting Bengali Sentiment Analysis App...")
200
+ print("Make sure you have the following files in the specified paths:")
201
+ print("- model.pkl")
202
+ print("- tfidf_VECt.pkl")
203
+ print("- stopwords.txt")
204
+
205
+ # Create and launch the interface
206
+ demo = create_gradio_interface()
207
+
208
+ # Launch the app
209
+ demo.launch(
210
+ share=True, # Creates a public link
211
+ inbrowser=True, # Opens in browser automatically
212
+ server_name="0.0.0.0", # Makes it accessible from any IP
213
+ server_port=7860, # Port number
214
+ show_error=True # Shows detailed error messages
215
+ )
216
+
217
+ if __name__ == "__main__":
218
+ # Install required packages if not already installed
219
+ try:
220
+ import gradio
221
+ except ImportError:
222
+ print("Installing Gradio...")
223
+ import subprocess
224
+ subprocess.check_call(["pip", "install", "gradio"])
225
+
226
+ main()