Abs6187 commited on
Commit
74e8108
Β·
verified Β·
1 Parent(s): 13e01d8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +274 -92
src/streamlit_app.py CHANGED
@@ -9,6 +9,9 @@ from typing import Dict, Any, List
9
  import pytesseract
10
  import cv2
11
  import random
 
 
 
12
 
13
  # Load environment variables
14
  load_dotenv()
@@ -115,16 +118,56 @@ Format response as a structured JSON."""
115
  return {}
116
 
117
  def _extract_percentage(self, text: str) -> float:
118
- """Extract fake news probability percentage"""
119
- import re
120
- match = re.search(r'(\d+(?:\.\d+)?)\s*%', text)
121
- return float(match.group(1)) if match else 50.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def _extract_score(self, text: str) -> float:
124
- """Extract credibility score"""
125
- import re
126
- match = re.search(r'Credibility Score[:\s]*(\d+(?:\.\d+)?)', text)
127
- return float(match.group(1)) if match else 5.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  def _extract_red_flags(self, text: str) -> List[str]:
130
  """Extract red flags from the analysis"""
@@ -213,6 +256,47 @@ def randomized_prediction(text):
213
 
214
  return f"{random.choice(prediction_options)} (Confidence: {confidence_score:.2f})"
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def main():
217
  st.title("🚨 Advanced Fake News Detector")
218
  st.markdown("Powered by Google's Gemini 2.0 Flash AI")
@@ -227,74 +311,131 @@ def main():
227
  step=0.05
228
  )
229
 
230
- # Article Input
231
- st.header("πŸ“ Article Analysis")
232
- article_text = st.text_area(
233
- "Paste the full article text",
234
- height=300,
235
- help="Copy and paste the complete article for comprehensive analysis"
236
- )
237
-
238
- # Image Upload (Optional)
239
- st.header("πŸ–ΌοΈ Article Evidence")
240
- uploaded_image = st.file_uploader(
241
- "Upload supporting/source image",
242
- type=['png', 'jpg', 'jpeg'],
243
- help="Optional: Upload an image related to the article for additional context"
244
- )
245
 
246
- # Analyze Button
247
- if st.button("πŸ” Detect Fake News", key="analyze_btn"):
248
- if not article_text:
249
- st.error("Please provide an article to analyze.")
250
- return
 
 
 
251
 
252
- # Initialize Detector
253
- detector = FakeNewsDetector()
 
 
 
 
 
254
 
255
- # Perform Analysis
256
- with st.spinner('Analyzing article...'):
257
- analysis = detector.analyze_article(article_text)
258
 
259
- # Display Results
260
- if analysis:
261
- st.subheader("πŸ”¬ Detailed Analysis")
262
-
263
- # Credibility Visualization
264
- col1, col2, col3 = st.columns(3)
265
-
266
- with col1:
267
- st.metric(
268
- "Fake News Probability",
269
- f"{analysis.get('fake_news_probability', 50):.2f}%"
270
- )
271
-
272
- with col2:
273
- st.metric(
274
- "Credibility Score",
275
- f"{analysis.get('credibility_score', 5):.2f}/10"
276
- )
277
-
278
- with col3:
279
- st.metric(
280
- "Risk Level",
281
- "High" if analysis.get('fake_news_probability', 50) > 50 else "Low"
282
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- # Detailed Insights
285
- st.subheader("🚩 Red Flags")
286
- for flag in analysis.get('red_flags', []):
287
- st.warning(flag)
288
 
289
- st.subheader("πŸ•΅οΈ Verification Steps")
290
- for step in analysis.get('verification_steps', []):
291
- st.info(step)
292
 
293
- # Image Analysis (if uploaded)
294
- if uploaded_image:
295
- image = Image.open(uploaded_image)
296
- st.subheader("πŸ“Έ Uploaded Image")
297
- st.image(image, caption="Article Supporting Image", use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  # Final Recommendation
300
  st.markdown("---")
@@ -304,31 +445,72 @@ def main():
304
  - **High Probability**: Exercise caution, verify sources
305
  - **Always cross-reference with multiple sources**
306
  """)
307
-
308
- # Add file uploader for images
309
- uploaded_file = st.file_uploader("Upload an image for OCR", type=['png', 'jpg', 'jpeg'])
310
 
311
- if uploaded_file is not None:
312
- # Read the image
313
- file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
314
- image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
315
-
316
- # Display the uploaded image
317
- st.image(image, caption="Uploaded Image", use_column_width=True)
318
-
319
- # Perform OCR
320
- extracted_text = perform_ocr(image)
321
 
322
- # Display extracted text
323
- st.subheader("Extracted Text")
324
- st.text(extracted_text)
325
-
326
- # Generate prediction
327
- prediction = randomized_prediction(extracted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
- # Display prediction
330
- st.subheader("AI Prediction")
331
- st.write(prediction)
 
 
 
 
 
332
 
333
  if __name__ == "__main__":
334
- main()
 
 
9
  import pytesseract
10
  import cv2
11
  import random
12
+ import io
13
+ import base64
14
+ import requests
15
 
16
  # Load environment variables
17
  load_dotenv()
 
118
  return {}
119
 
120
  def _extract_percentage(self, text: str) -> float:
121
+ """Extract fake news probability percentage with added randomness"""
122
+ import random
123
+
124
+ # Base randomness factors
125
+ base_randomness = random.uniform(-15, 15)
126
+ context_multipliers = {
127
+ 'misinformation': random.uniform(1.2, 1.5),
128
+ 'credible': random.uniform(0.5, 0.8),
129
+ 'neutral': 1.0
130
+ }
131
+
132
+ # Determine context
133
+ context = 'neutral'
134
+ if 'red flag' in text.lower():
135
+ context = 'misinformation'
136
+ elif 'credible' in text.lower():
137
+ context = 'credible'
138
+
139
+ # Calculate probability with randomness
140
+ base_prob = 50.0 # Starting point
141
+ adjusted_prob = base_prob + base_randomness * context_multipliers[context]
142
+
143
+ # Ensure probability is between 0 and 100
144
+ return max(0, min(100, adjusted_prob))
145
 
146
  def _extract_score(self, text: str) -> float:
147
+ """Extract credibility score with added randomness"""
148
+ import random
149
+
150
+ # Base randomness factors
151
+ base_randomness = random.uniform(-2, 2)
152
+ context_multipliers = {
153
+ 'low_credibility': random.uniform(0.5, 0.8),
154
+ 'high_credibility': random.uniform(1.2, 1.5),
155
+ 'neutral': 1.0
156
+ }
157
+
158
+ # Determine context
159
+ context = 'neutral'
160
+ if 'low credibility' in text.lower():
161
+ context = 'low_credibility'
162
+ elif 'high credibility' in text.lower():
163
+ context = 'high_credibility'
164
+
165
+ # Calculate score with randomness
166
+ base_score = 5.0 # Starting point
167
+ adjusted_score = base_score + base_randomness * context_multipliers[context]
168
+
169
+ # Ensure score is between 0 and 10
170
+ return max(0, min(10, adjusted_score))
171
 
172
  def _extract_red_flags(self, text: str) -> List[str]:
173
  """Extract red flags from the analysis"""
 
256
 
257
  return f"{random.choice(prediction_options)} (Confidence: {confidence_score:.2f})"
258
 
259
+ def validate_image(image):
260
+ """
261
+ Validate and preprocess uploaded image
262
+
263
+ Args:
264
+ image: Uploaded image file or base64 string
265
+
266
+ Returns:
267
+ Processed image or None if invalid
268
+ """
269
+ try:
270
+ # If it's a base64 string
271
+ if isinstance(image, str) and ';base64,' in image:
272
+ # Remove data URL prefix
273
+ header, encoded = image.split(';base64,')
274
+ image_bytes = base64.b64decode(encoded)
275
+ image = Image.open(io.BytesIO(image_bytes))
276
+
277
+ # Convert to numpy array for processing
278
+ img_array = np.array(image)
279
+
280
+ # Check image size (max 5MB)
281
+ max_size_bytes = 5 * 1024 * 1024
282
+ if len(img_array.tobytes()) > max_size_bytes:
283
+ st.error("Image is too large. Maximum size is 5MB.")
284
+ return None
285
+
286
+ # Check image dimensions
287
+ height, width = img_array.shape[:2]
288
+ if height > 2000 or width > 2000:
289
+ # Resize if too large
290
+ img = Image.fromarray(img_array)
291
+ img.thumbnail((2000, 2000), Image.LANCZOS)
292
+ img_array = np.array(img)
293
+
294
+ return img_array
295
+
296
+ except Exception as e:
297
+ st.error(f"Error processing image: {e}")
298
+ return None
299
+
300
  def main():
301
  st.title("🚨 Advanced Fake News Detector")
302
  st.markdown("Powered by Google's Gemini 2.0 Flash AI")
 
311
  step=0.05
312
  )
313
 
314
+ # Tabs for different input methods
315
+ tab1, tab2 = st.tabs(["Article Analysis", "Direct OCR Text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ with tab1:
318
+ # Article Input
319
+ st.header("πŸ“ Article Analysis")
320
+ article_text = st.text_area(
321
+ "Paste the full article text",
322
+ height=300,
323
+ help="Copy and paste the complete article for comprehensive analysis"
324
+ )
325
 
326
+ # Image Input Section
327
+ st.header("πŸ–ΌοΈ Article Evidence")
328
+ image_option = st.radio(
329
+ "Choose Image Input Method",
330
+ ["Upload Image", "Paste Image URL", "Paste Base64 Image"],
331
+ help="Select how you want to provide the image"
332
+ )
333
 
334
+ uploaded_image = None
 
 
335
 
336
+ if image_option == "Upload Image":
337
+ uploaded_image = st.file_uploader(
338
+ "Upload supporting/source image",
339
+ type=['png', 'jpg', 'jpeg'],
340
+ help="Optional: Upload an image related to the article for additional context"
341
+ )
342
+ if uploaded_image:
343
+ uploaded_image = Image.open(uploaded_image)
344
+
345
+ elif image_option == "Paste Image URL":
346
+ image_url = st.text_input("Paste Image URL", help="Paste a direct link to an image")
347
+ if image_url:
348
+ try:
349
+ response = requests.get(image_url, stream=True)
350
+ response.raise_for_status()
351
+
352
+ # Check content type and size
353
+ content_type = response.headers.get('content-type', '')
354
+ content_length = int(response.headers.get('content-length', 0))
355
+
356
+ if not content_type.startswith('image/'):
357
+ st.error("Invalid image URL")
358
+ uploaded_image = None
359
+ elif content_length > 5 * 1024 * 1024: # 5MB limit
360
+ st.error("Image is too large. Maximum size is 5MB.")
361
+ uploaded_image = None
362
+ else:
363
+ uploaded_image = Image.open(io.BytesIO(response.content))
364
+
365
+ except Exception as e:
366
+ st.error(f"Error fetching image: {e}")
367
+ uploaded_image = None
368
+
369
+ elif image_option == "Paste Base64 Image":
370
+ base64_input = st.text_area(
371
+ "Paste Base64 Encoded Image",
372
+ help="Paste a base64 encoded image string"
373
+ )
374
+ if base64_input:
375
+ uploaded_image = base64_input
376
+
377
+ # Analyze Button
378
+ if st.button("πŸ” Detect Fake News", key="analyze_btn"):
379
+ if not article_text:
380
+ st.error("Please provide an article to analyze.")
381
+ return
382
 
383
+ # Initialize Detector
384
+ detector = FakeNewsDetector()
 
 
385
 
386
+ # Perform Analysis
387
+ with st.spinner('Analyzing article...'):
388
+ analysis = detector.analyze_article(article_text)
389
 
390
+ # Display Results
391
+ if analysis:
392
+ st.subheader("πŸ”¬ Detailed Analysis")
393
+
394
+ # Credibility Visualization
395
+ col1, col2, col3 = st.columns(3)
396
+
397
+ with col1:
398
+ st.metric(
399
+ "Fake News Probability",
400
+ f"{analysis.get('fake_news_probability', 50):.2f}%"
401
+ )
402
+
403
+ with col2:
404
+ st.metric(
405
+ "Credibility Score",
406
+ f"{analysis.get('credibility_score', 5):.2f}/10"
407
+ )
408
+
409
+ with col3:
410
+ st.metric(
411
+ "Risk Level",
412
+ "High" if analysis.get('fake_news_probability', 50) > 50 else "Low"
413
+ )
414
+
415
+ # Detailed Insights
416
+ st.subheader("🚩 Red Flags")
417
+ for flag in analysis.get('red_flags', []):
418
+ st.warning(flag)
419
+
420
+ st.subheader("πŸ•΅οΈ Verification Steps")
421
+ for step in analysis.get('verification_steps', []):
422
+ st.info(step)
423
+
424
+ # Image Analysis (if uploaded)
425
+ if uploaded_image:
426
+ # Validate and process the image
427
+ processed_image = validate_image(uploaded_image)
428
+
429
+ if processed_image is not None:
430
+ # Display the uploaded image
431
+ st.image(processed_image, caption="Uploaded Image", use_column_width=True)
432
+
433
+ # Perform OCR
434
+ extracted_text = perform_ocr(processed_image)
435
+
436
+ # Display extracted text
437
+ st.subheader("πŸ“Έ Extracted Image Text")
438
+ st.text(extracted_text)
439
 
440
  # Final Recommendation
441
  st.markdown("---")
 
445
  - **High Probability**: Exercise caution, verify sources
446
  - **Always cross-reference with multiple sources**
447
  """)
 
 
 
448
 
449
+ with tab2:
450
+ # Direct OCR Text Input
451
+ st.header("πŸ“ Direct OCR Text Analysis")
452
+ ocr_text = st.text_area(
453
+ "Paste OCR or Extracted Text",
454
+ height=300,
455
+ help="Paste text directly extracted from images or documents"
456
+ )
 
 
457
 
458
+ # OCR Text Analyze Button
459
+ if st.button("πŸ” Analyze OCR Text", key="ocr_analyze_btn"):
460
+ if not ocr_text:
461
+ st.error("Please provide text to analyze.")
462
+ return
463
+
464
+ # Initialize Detector
465
+ detector = FakeNewsDetector()
466
+
467
+ # Perform Analysis
468
+ with st.spinner('Analyzing OCR text...'):
469
+ analysis = detector.analyze_article(ocr_text)
470
+
471
+ # Display Results
472
+ if analysis:
473
+ st.subheader("πŸ”¬ OCR Text Analysis")
474
+
475
+ # Credibility Visualization
476
+ col1, col2, col3 = st.columns(3)
477
+
478
+ with col1:
479
+ st.metric(
480
+ "Fake News Probability",
481
+ f"{analysis.get('fake_news_probability', 50):.2f}%"
482
+ )
483
+
484
+ with col2:
485
+ st.metric(
486
+ "Credibility Score",
487
+ f"{analysis.get('credibility_score', 5):.2f}/10"
488
+ )
489
+
490
+ with col3:
491
+ st.metric(
492
+ "Risk Level",
493
+ "High" if analysis.get('fake_news_probability', 50) > 50 else "Low"
494
+ )
495
+
496
+ # Detailed Insights
497
+ st.subheader("🚩 Red Flags")
498
+ for flag in analysis.get('red_flags', []):
499
+ st.warning(flag)
500
+
501
+ st.subheader("πŸ•΅οΈ Verification Steps")
502
+ for step in analysis.get('verification_steps', []):
503
+ st.info(step)
504
 
505
+ # OCR Text Recommendation
506
+ st.markdown("---")
507
+ st.markdown("""
508
+ ### πŸ“‹ OCR Text Analysis Tips
509
+ - Paste text extracted from images, PDFs, or scanned documents
510
+ - Helps analyze text that cannot be directly copied
511
+ - Provides insights into potential misinformation
512
+ """)
513
 
514
  if __name__ == "__main__":
515
+ main()
516
+