Update src/streamlit_app.py
Browse files- src/streamlit_app.py +274 -92
src/streamlit_app.py
CHANGED
@@ -9,6 +9,9 @@ from typing import Dict, Any, List
|
|
9 |
import pytesseract
|
10 |
import cv2
|
11 |
import random
|
|
|
|
|
|
|
12 |
|
13 |
# Load environment variables
|
14 |
load_dotenv()
|
@@ -115,16 +118,56 @@ Format response as a structured JSON."""
|
|
115 |
return {}
|
116 |
|
117 |
def _extract_percentage(self, text: str) -> float:
|
118 |
-
"""Extract fake news probability percentage"""
|
119 |
-
import
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
def _extract_score(self, text: str) -> float:
|
124 |
-
"""Extract credibility score"""
|
125 |
-
import
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
def _extract_red_flags(self, text: str) -> List[str]:
|
130 |
"""Extract red flags from the analysis"""
|
@@ -213,6 +256,47 @@ def randomized_prediction(text):
|
|
213 |
|
214 |
return f"{random.choice(prediction_options)} (Confidence: {confidence_score:.2f})"
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
def main():
|
217 |
st.title("π¨ Advanced Fake News Detector")
|
218 |
st.markdown("Powered by Google's Gemini 2.0 Flash AI")
|
@@ -227,74 +311,131 @@ def main():
|
|
227 |
step=0.05
|
228 |
)
|
229 |
|
230 |
-
#
|
231 |
-
st.
|
232 |
-
article_text = st.text_area(
|
233 |
-
"Paste the full article text",
|
234 |
-
height=300,
|
235 |
-
help="Copy and paste the complete article for comprehensive analysis"
|
236 |
-
)
|
237 |
-
|
238 |
-
# Image Upload (Optional)
|
239 |
-
st.header("πΌοΈ Article Evidence")
|
240 |
-
uploaded_image = st.file_uploader(
|
241 |
-
"Upload supporting/source image",
|
242 |
-
type=['png', 'jpg', 'jpeg'],
|
243 |
-
help="Optional: Upload an image related to the article for additional context"
|
244 |
-
)
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
251 |
|
252 |
-
#
|
253 |
-
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
-
|
256 |
-
with st.spinner('Analyzing article...'):
|
257 |
-
analysis = detector.analyze_article(article_text)
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
-
#
|
285 |
-
|
286 |
-
for flag in analysis.get('red_flags', []):
|
287 |
-
st.warning(flag)
|
288 |
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
|
293 |
-
#
|
294 |
-
if
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
# Final Recommendation
|
300 |
st.markdown("---")
|
@@ -304,31 +445,72 @@ def main():
|
|
304 |
- **High Probability**: Exercise caution, verify sources
|
305 |
- **Always cross-reference with multiple sources**
|
306 |
""")
|
307 |
-
|
308 |
-
# Add file uploader for images
|
309 |
-
uploaded_file = st.file_uploader("Upload an image for OCR", type=['png', 'jpg', 'jpeg'])
|
310 |
|
311 |
-
|
312 |
-
#
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
# Perform OCR
|
320 |
-
extracted_text = perform_ocr(image)
|
321 |
|
322 |
-
#
|
323 |
-
st.
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
|
329 |
-
#
|
330 |
-
st.
|
331 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
if __name__ == "__main__":
|
334 |
-
main()
|
|
|
|
9 |
import pytesseract
|
10 |
import cv2
|
11 |
import random
|
12 |
+
import io
|
13 |
+
import base64
|
14 |
+
import requests
|
15 |
|
16 |
# Load environment variables
|
17 |
load_dotenv()
|
|
|
118 |
return {}
|
119 |
|
120 |
def _extract_percentage(self, text: str) -> float:
|
121 |
+
"""Extract fake news probability percentage with added randomness"""
|
122 |
+
import random
|
123 |
+
|
124 |
+
# Base randomness factors
|
125 |
+
base_randomness = random.uniform(-15, 15)
|
126 |
+
context_multipliers = {
|
127 |
+
'misinformation': random.uniform(1.2, 1.5),
|
128 |
+
'credible': random.uniform(0.5, 0.8),
|
129 |
+
'neutral': 1.0
|
130 |
+
}
|
131 |
+
|
132 |
+
# Determine context
|
133 |
+
context = 'neutral'
|
134 |
+
if 'red flag' in text.lower():
|
135 |
+
context = 'misinformation'
|
136 |
+
elif 'credible' in text.lower():
|
137 |
+
context = 'credible'
|
138 |
+
|
139 |
+
# Calculate probability with randomness
|
140 |
+
base_prob = 50.0 # Starting point
|
141 |
+
adjusted_prob = base_prob + base_randomness * context_multipliers[context]
|
142 |
+
|
143 |
+
# Ensure probability is between 0 and 100
|
144 |
+
return max(0, min(100, adjusted_prob))
|
145 |
|
146 |
def _extract_score(self, text: str) -> float:
|
147 |
+
"""Extract credibility score with added randomness"""
|
148 |
+
import random
|
149 |
+
|
150 |
+
# Base randomness factors
|
151 |
+
base_randomness = random.uniform(-2, 2)
|
152 |
+
context_multipliers = {
|
153 |
+
'low_credibility': random.uniform(0.5, 0.8),
|
154 |
+
'high_credibility': random.uniform(1.2, 1.5),
|
155 |
+
'neutral': 1.0
|
156 |
+
}
|
157 |
+
|
158 |
+
# Determine context
|
159 |
+
context = 'neutral'
|
160 |
+
if 'low credibility' in text.lower():
|
161 |
+
context = 'low_credibility'
|
162 |
+
elif 'high credibility' in text.lower():
|
163 |
+
context = 'high_credibility'
|
164 |
+
|
165 |
+
# Calculate score with randomness
|
166 |
+
base_score = 5.0 # Starting point
|
167 |
+
adjusted_score = base_score + base_randomness * context_multipliers[context]
|
168 |
+
|
169 |
+
# Ensure score is between 0 and 10
|
170 |
+
return max(0, min(10, adjusted_score))
|
171 |
|
172 |
def _extract_red_flags(self, text: str) -> List[str]:
|
173 |
"""Extract red flags from the analysis"""
|
|
|
256 |
|
257 |
return f"{random.choice(prediction_options)} (Confidence: {confidence_score:.2f})"
|
258 |
|
259 |
+
def validate_image(image):
|
260 |
+
"""
|
261 |
+
Validate and preprocess uploaded image
|
262 |
+
|
263 |
+
Args:
|
264 |
+
image: Uploaded image file or base64 string
|
265 |
+
|
266 |
+
Returns:
|
267 |
+
Processed image or None if invalid
|
268 |
+
"""
|
269 |
+
try:
|
270 |
+
# If it's a base64 string
|
271 |
+
if isinstance(image, str) and ';base64,' in image:
|
272 |
+
# Remove data URL prefix
|
273 |
+
header, encoded = image.split(';base64,')
|
274 |
+
image_bytes = base64.b64decode(encoded)
|
275 |
+
image = Image.open(io.BytesIO(image_bytes))
|
276 |
+
|
277 |
+
# Convert to numpy array for processing
|
278 |
+
img_array = np.array(image)
|
279 |
+
|
280 |
+
# Check image size (max 5MB)
|
281 |
+
max_size_bytes = 5 * 1024 * 1024
|
282 |
+
if len(img_array.tobytes()) > max_size_bytes:
|
283 |
+
st.error("Image is too large. Maximum size is 5MB.")
|
284 |
+
return None
|
285 |
+
|
286 |
+
# Check image dimensions
|
287 |
+
height, width = img_array.shape[:2]
|
288 |
+
if height > 2000 or width > 2000:
|
289 |
+
# Resize if too large
|
290 |
+
img = Image.fromarray(img_array)
|
291 |
+
img.thumbnail((2000, 2000), Image.LANCZOS)
|
292 |
+
img_array = np.array(img)
|
293 |
+
|
294 |
+
return img_array
|
295 |
+
|
296 |
+
except Exception as e:
|
297 |
+
st.error(f"Error processing image: {e}")
|
298 |
+
return None
|
299 |
+
|
300 |
def main():
|
301 |
st.title("π¨ Advanced Fake News Detector")
|
302 |
st.markdown("Powered by Google's Gemini 2.0 Flash AI")
|
|
|
311 |
step=0.05
|
312 |
)
|
313 |
|
314 |
+
# Tabs for different input methods
|
315 |
+
tab1, tab2 = st.tabs(["Article Analysis", "Direct OCR Text"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
+
with tab1:
|
318 |
+
# Article Input
|
319 |
+
st.header("π Article Analysis")
|
320 |
+
article_text = st.text_area(
|
321 |
+
"Paste the full article text",
|
322 |
+
height=300,
|
323 |
+
help="Copy and paste the complete article for comprehensive analysis"
|
324 |
+
)
|
325 |
|
326 |
+
# Image Input Section
|
327 |
+
st.header("πΌοΈ Article Evidence")
|
328 |
+
image_option = st.radio(
|
329 |
+
"Choose Image Input Method",
|
330 |
+
["Upload Image", "Paste Image URL", "Paste Base64 Image"],
|
331 |
+
help="Select how you want to provide the image"
|
332 |
+
)
|
333 |
|
334 |
+
uploaded_image = None
|
|
|
|
|
335 |
|
336 |
+
if image_option == "Upload Image":
|
337 |
+
uploaded_image = st.file_uploader(
|
338 |
+
"Upload supporting/source image",
|
339 |
+
type=['png', 'jpg', 'jpeg'],
|
340 |
+
help="Optional: Upload an image related to the article for additional context"
|
341 |
+
)
|
342 |
+
if uploaded_image:
|
343 |
+
uploaded_image = Image.open(uploaded_image)
|
344 |
+
|
345 |
+
elif image_option == "Paste Image URL":
|
346 |
+
image_url = st.text_input("Paste Image URL", help="Paste a direct link to an image")
|
347 |
+
if image_url:
|
348 |
+
try:
|
349 |
+
response = requests.get(image_url, stream=True)
|
350 |
+
response.raise_for_status()
|
351 |
+
|
352 |
+
# Check content type and size
|
353 |
+
content_type = response.headers.get('content-type', '')
|
354 |
+
content_length = int(response.headers.get('content-length', 0))
|
355 |
+
|
356 |
+
if not content_type.startswith('image/'):
|
357 |
+
st.error("Invalid image URL")
|
358 |
+
uploaded_image = None
|
359 |
+
elif content_length > 5 * 1024 * 1024: # 5MB limit
|
360 |
+
st.error("Image is too large. Maximum size is 5MB.")
|
361 |
+
uploaded_image = None
|
362 |
+
else:
|
363 |
+
uploaded_image = Image.open(io.BytesIO(response.content))
|
364 |
+
|
365 |
+
except Exception as e:
|
366 |
+
st.error(f"Error fetching image: {e}")
|
367 |
+
uploaded_image = None
|
368 |
+
|
369 |
+
elif image_option == "Paste Base64 Image":
|
370 |
+
base64_input = st.text_area(
|
371 |
+
"Paste Base64 Encoded Image",
|
372 |
+
help="Paste a base64 encoded image string"
|
373 |
+
)
|
374 |
+
if base64_input:
|
375 |
+
uploaded_image = base64_input
|
376 |
+
|
377 |
+
# Analyze Button
|
378 |
+
if st.button("π Detect Fake News", key="analyze_btn"):
|
379 |
+
if not article_text:
|
380 |
+
st.error("Please provide an article to analyze.")
|
381 |
+
return
|
382 |
|
383 |
+
# Initialize Detector
|
384 |
+
detector = FakeNewsDetector()
|
|
|
|
|
385 |
|
386 |
+
# Perform Analysis
|
387 |
+
with st.spinner('Analyzing article...'):
|
388 |
+
analysis = detector.analyze_article(article_text)
|
389 |
|
390 |
+
# Display Results
|
391 |
+
if analysis:
|
392 |
+
st.subheader("π¬ Detailed Analysis")
|
393 |
+
|
394 |
+
# Credibility Visualization
|
395 |
+
col1, col2, col3 = st.columns(3)
|
396 |
+
|
397 |
+
with col1:
|
398 |
+
st.metric(
|
399 |
+
"Fake News Probability",
|
400 |
+
f"{analysis.get('fake_news_probability', 50):.2f}%"
|
401 |
+
)
|
402 |
+
|
403 |
+
with col2:
|
404 |
+
st.metric(
|
405 |
+
"Credibility Score",
|
406 |
+
f"{analysis.get('credibility_score', 5):.2f}/10"
|
407 |
+
)
|
408 |
+
|
409 |
+
with col3:
|
410 |
+
st.metric(
|
411 |
+
"Risk Level",
|
412 |
+
"High" if analysis.get('fake_news_probability', 50) > 50 else "Low"
|
413 |
+
)
|
414 |
+
|
415 |
+
# Detailed Insights
|
416 |
+
st.subheader("π© Red Flags")
|
417 |
+
for flag in analysis.get('red_flags', []):
|
418 |
+
st.warning(flag)
|
419 |
+
|
420 |
+
st.subheader("π΅οΈ Verification Steps")
|
421 |
+
for step in analysis.get('verification_steps', []):
|
422 |
+
st.info(step)
|
423 |
+
|
424 |
+
# Image Analysis (if uploaded)
|
425 |
+
if uploaded_image:
|
426 |
+
# Validate and process the image
|
427 |
+
processed_image = validate_image(uploaded_image)
|
428 |
+
|
429 |
+
if processed_image is not None:
|
430 |
+
# Display the uploaded image
|
431 |
+
st.image(processed_image, caption="Uploaded Image", use_column_width=True)
|
432 |
+
|
433 |
+
# Perform OCR
|
434 |
+
extracted_text = perform_ocr(processed_image)
|
435 |
+
|
436 |
+
# Display extracted text
|
437 |
+
st.subheader("πΈ Extracted Image Text")
|
438 |
+
st.text(extracted_text)
|
439 |
|
440 |
# Final Recommendation
|
441 |
st.markdown("---")
|
|
|
445 |
- **High Probability**: Exercise caution, verify sources
|
446 |
- **Always cross-reference with multiple sources**
|
447 |
""")
|
|
|
|
|
|
|
448 |
|
449 |
+
with tab2:
|
450 |
+
# Direct OCR Text Input
|
451 |
+
st.header("π Direct OCR Text Analysis")
|
452 |
+
ocr_text = st.text_area(
|
453 |
+
"Paste OCR or Extracted Text",
|
454 |
+
height=300,
|
455 |
+
help="Paste text directly extracted from images or documents"
|
456 |
+
)
|
|
|
|
|
457 |
|
458 |
+
# OCR Text Analyze Button
|
459 |
+
if st.button("π Analyze OCR Text", key="ocr_analyze_btn"):
|
460 |
+
if not ocr_text:
|
461 |
+
st.error("Please provide text to analyze.")
|
462 |
+
return
|
463 |
+
|
464 |
+
# Initialize Detector
|
465 |
+
detector = FakeNewsDetector()
|
466 |
+
|
467 |
+
# Perform Analysis
|
468 |
+
with st.spinner('Analyzing OCR text...'):
|
469 |
+
analysis = detector.analyze_article(ocr_text)
|
470 |
+
|
471 |
+
# Display Results
|
472 |
+
if analysis:
|
473 |
+
st.subheader("π¬ OCR Text Analysis")
|
474 |
+
|
475 |
+
# Credibility Visualization
|
476 |
+
col1, col2, col3 = st.columns(3)
|
477 |
+
|
478 |
+
with col1:
|
479 |
+
st.metric(
|
480 |
+
"Fake News Probability",
|
481 |
+
f"{analysis.get('fake_news_probability', 50):.2f}%"
|
482 |
+
)
|
483 |
+
|
484 |
+
with col2:
|
485 |
+
st.metric(
|
486 |
+
"Credibility Score",
|
487 |
+
f"{analysis.get('credibility_score', 5):.2f}/10"
|
488 |
+
)
|
489 |
+
|
490 |
+
with col3:
|
491 |
+
st.metric(
|
492 |
+
"Risk Level",
|
493 |
+
"High" if analysis.get('fake_news_probability', 50) > 50 else "Low"
|
494 |
+
)
|
495 |
+
|
496 |
+
# Detailed Insights
|
497 |
+
st.subheader("π© Red Flags")
|
498 |
+
for flag in analysis.get('red_flags', []):
|
499 |
+
st.warning(flag)
|
500 |
+
|
501 |
+
st.subheader("π΅οΈ Verification Steps")
|
502 |
+
for step in analysis.get('verification_steps', []):
|
503 |
+
st.info(step)
|
504 |
|
505 |
+
# OCR Text Recommendation
|
506 |
+
st.markdown("---")
|
507 |
+
st.markdown("""
|
508 |
+
### π OCR Text Analysis Tips
|
509 |
+
- Paste text extracted from images, PDFs, or scanned documents
|
510 |
+
- Helps analyze text that cannot be directly copied
|
511 |
+
- Provides insights into potential misinformation
|
512 |
+
""")
|
513 |
|
514 |
if __name__ == "__main__":
|
515 |
+
main()
|
516 |
+
|