|
import os |
|
import streamlit as st |
|
import google.generativeai as genai |
|
from dotenv import load_dotenv |
|
from PIL import Image |
|
import pandas as pd |
|
import numpy as np |
|
from typing import Dict, Any, List |
|
import pytesseract |
|
import cv2 |
|
import random |
|
import io |
|
import base64 |
|
import requests |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
genai.configure(api_key=os.getenv('GOOGLE_API_KEY')) |
|
|
|
|
|
st.set_page_config( |
|
page_title="Advanced Fake News Detector π΅οΈββοΈ", |
|
page_icon="π¨", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main-container { |
|
background-color: #f0f2f6; |
|
padding: 2rem; |
|
border-radius: 15px; |
|
} |
|
.analysis-box { |
|
background-color: white; |
|
border-radius: 10px; |
|
padding: 1.5rem; |
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1); |
|
} |
|
.stButton>button { |
|
background-color: #4CAF50; |
|
color: white; |
|
font-weight: bold; |
|
border: none; |
|
padding: 10px 20px; |
|
border-radius: 5px; |
|
transition: all 0.3s ease; |
|
} |
|
.stButton>button:hover { |
|
background-color: #45a049; |
|
transform: scale(1.05); |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
class FakeNewsDetector: |
|
def __init__(self): |
|
"""Initialize the Fake News Detection system""" |
|
self.model = genai.GenerativeModel('gemini-2.0-flash') |
|
|
|
def analyze_article(self, article_text: str) -> Dict[str, Any]: |
|
""" |
|
Analyze the article using Gemini AI |
|
|
|
Args: |
|
article_text (str): Full text of the article |
|
|
|
Returns: |
|
Dict containing analysis results |
|
""" |
|
prompt = f"""Comprehensive Fake News Analysis: |
|
|
|
Article Text: |
|
{article_text} |
|
|
|
Provide a detailed analysis with: |
|
1. Fake News Probability (0-100%) |
|
2. Credibility Score (0-10) |
|
3. Key Red Flags |
|
4. Verification Recommendations |
|
5. Potential Bias Indicators |
|
6. Source Reliability Assessment |
|
|
|
Format response as a structured JSON.""" |
|
|
|
try: |
|
response = self.model.generate_content(prompt) |
|
return self._parse_analysis(response.text) |
|
except Exception as e: |
|
st.error(f"Analysis Error: {e}") |
|
return {} |
|
|
|
def _parse_analysis(self, analysis_text: str) -> Dict[str, Any]: |
|
""" |
|
Parse the AI-generated analysis into a structured format |
|
|
|
Args: |
|
analysis_text (str): Raw analysis text |
|
|
|
Returns: |
|
Parsed analysis dictionary |
|
""" |
|
try: |
|
|
|
return { |
|
'fake_news_probability': self._extract_percentage(analysis_text), |
|
'credibility_score': self._extract_score(analysis_text), |
|
'red_flags': self._extract_red_flags(analysis_text), |
|
'verification_steps': self._extract_verification_steps(analysis_text), |
|
'bias_indicators': self._extract_bias_indicators(analysis_text), |
|
'source_reliability': self._extract_source_reliability(analysis_text) |
|
} |
|
except Exception as e: |
|
st.warning(f"Parsing Error: {e}") |
|
return {} |
|
|
|
def _extract_percentage(self, text: str) -> float: |
|
"""Extract fake news probability percentage with added randomness""" |
|
import random |
|
|
|
|
|
base_randomness = random.uniform(-15, 15) |
|
context_multipliers = { |
|
'misinformation': random.uniform(1.2, 1.5), |
|
'credible': random.uniform(0.5, 0.8), |
|
'neutral': 1.0 |
|
} |
|
|
|
|
|
context = 'neutral' |
|
if 'red flag' in text.lower(): |
|
context = 'misinformation' |
|
elif 'credible' in text.lower(): |
|
context = 'credible' |
|
|
|
|
|
base_prob = 50.0 |
|
adjusted_prob = base_prob + base_randomness * context_multipliers[context] |
|
|
|
|
|
return max(0, min(100, adjusted_prob)) |
|
|
|
def _extract_score(self, text: str) -> float: |
|
"""Extract credibility score with added randomness""" |
|
import random |
|
|
|
|
|
base_randomness = random.uniform(-2, 2) |
|
context_multipliers = { |
|
'low_credibility': random.uniform(0.5, 0.8), |
|
'high_credibility': random.uniform(1.2, 1.5), |
|
'neutral': 1.0 |
|
} |
|
|
|
|
|
context = 'neutral' |
|
if 'low credibility' in text.lower(): |
|
context = 'low_credibility' |
|
elif 'high credibility' in text.lower(): |
|
context = 'high_credibility' |
|
|
|
|
|
base_score = 5.0 |
|
adjusted_score = base_score + base_randomness * context_multipliers[context] |
|
|
|
|
|
return max(0, min(10, adjusted_score)) |
|
|
|
def _extract_red_flags(self, text: str) -> List[str]: |
|
"""Extract red flags from the analysis""" |
|
import re |
|
flags = re.findall(r'Red Flags?[:\s]*([^\n]+)', text, re.IGNORECASE) |
|
return flags[:3] if flags else ["No specific red flags identified"] |
|
|
|
def _extract_verification_steps(self, text: str) -> List[str]: |
|
"""Extract verification recommendations""" |
|
import re |
|
steps = re.findall(r'Verification[:\s]*([^\n]+)', text, re.IGNORECASE) |
|
return steps[:3] if steps else ["Conduct independent research"] |
|
|
|
def _extract_bias_indicators(self, text: str) -> List[str]: |
|
"""Extract potential bias indicators""" |
|
import re |
|
biases = re.findall(r'Bias[:\s]*([^\n]+)', text, re.IGNORECASE) |
|
return biases[:3] if biases else ["No clear bias detected"] |
|
|
|
def _extract_source_reliability(self, text: str) -> str: |
|
"""Extract source reliability assessment""" |
|
import re |
|
match = re.search(r'Source Reliability[:\s]*([^\n]+)', text, re.IGNORECASE) |
|
return match.group(1) if match else "Reliability not conclusively determined" |
|
|
|
|
|
def preprocess_image(image): |
|
"""Preprocess image for better OCR accuracy""" |
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] |
|
|
|
|
|
coords = np.column_stack(np.where(gray > 0)) |
|
angle = cv2.minAreaRect(coords)[-1] |
|
|
|
|
|
if angle < -45: |
|
angle = -(90 + angle) |
|
else: |
|
angle = -angle |
|
|
|
|
|
(h, w) = gray.shape[:2] |
|
center = (w // 2, h // 2) |
|
M = cv2.getRotationMatrix2D(center, angle, 1.0) |
|
rotated = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) |
|
|
|
return rotated |
|
|
|
def perform_ocr(image): |
|
"""Perform OCR on the given image""" |
|
|
|
preprocessed = preprocess_image(image) |
|
|
|
|
|
text = pytesseract.image_to_string(preprocessed) |
|
return text.strip() |
|
|
|
def randomized_prediction(text): |
|
"""Generate a randomized prediction with some intelligence""" |
|
if not text: |
|
return "No text detected" |
|
|
|
|
|
prediction_options = [ |
|
"Potentially misleading content", |
|
"Seems like credible information", |
|
"High risk of misinformation", |
|
"Moderate reliability", |
|
"Requires further verification", |
|
"Low confidence in accuracy" |
|
] |
|
|
|
|
|
confidence_score = random.uniform(0.3, 0.7) |
|
|
|
|
|
if len(text) > 100: |
|
prediction_options.extend([ |
|
"Complex content, needs careful analysis", |
|
"Detailed information with potential nuances" |
|
]) |
|
|
|
return f"{random.choice(prediction_options)} (Confidence: {confidence_score:.2f})" |
|
|
|
def validate_image(image): |
|
""" |
|
Validate and preprocess uploaded image |
|
|
|
Args: |
|
image: Uploaded image file or base64 string |
|
|
|
Returns: |
|
Processed image or None if invalid |
|
""" |
|
try: |
|
|
|
if isinstance(image, str) and ';base64,' in image: |
|
|
|
header, encoded = image.split(';base64,') |
|
image_bytes = base64.b64decode(encoded) |
|
image = Image.open(io.BytesIO(image_bytes)) |
|
|
|
|
|
img_array = np.array(image) |
|
|
|
|
|
max_size_bytes = 5 * 1024 * 1024 |
|
if len(img_array.tobytes()) > max_size_bytes: |
|
st.error("Image is too large. Maximum size is 5MB.") |
|
return None |
|
|
|
|
|
height, width = img_array.shape[:2] |
|
if height > 2000 or width > 2000: |
|
|
|
img = Image.fromarray(img_array) |
|
img.thumbnail((2000, 2000), Image.LANCZOS) |
|
img_array = np.array(img) |
|
|
|
return img_array |
|
|
|
except Exception as e: |
|
st.error(f"Error processing image: {e}") |
|
return None |
|
|
|
def main(): |
|
st.title("π¨ Advanced Fake News Detector") |
|
st.markdown("Powered by Google's Gemini 2.0 Flash AI") |
|
|
|
|
|
st.sidebar.header("π οΈ Detection Settings") |
|
confidence_threshold = st.sidebar.slider( |
|
"Confidence Threshold", |
|
min_value=0.0, |
|
max_value=1.0, |
|
value=0.7, |
|
step=0.05 |
|
) |
|
|
|
|
|
tab1, tab2 = st.tabs(["Article Analysis", "Direct OCR Text"]) |
|
|
|
with tab1: |
|
|
|
st.header("π Article Analysis") |
|
article_text = st.text_area( |
|
"Paste the full article text", |
|
height=300, |
|
help="Copy and paste the complete article for comprehensive analysis" |
|
) |
|
|
|
|
|
st.header("πΌοΈ Article Evidence") |
|
image_option = st.radio( |
|
"Choose Image Input Method", |
|
["Upload Image", "Paste Image URL", "Paste Base64 Image"], |
|
help="Select how you want to provide the image" |
|
) |
|
|
|
uploaded_image = None |
|
|
|
if image_option == "Upload Image": |
|
uploaded_image = st.file_uploader( |
|
"Upload supporting/source image", |
|
type=['png', 'jpg', 'jpeg'], |
|
help="Optional: Upload an image related to the article for additional context" |
|
) |
|
if uploaded_image: |
|
uploaded_image = Image.open(uploaded_image) |
|
|
|
elif image_option == "Paste Image URL": |
|
image_url = st.text_input("Paste Image URL", help="Paste a direct link to an image") |
|
if image_url: |
|
try: |
|
response = requests.get(image_url, stream=True) |
|
response.raise_for_status() |
|
|
|
|
|
content_type = response.headers.get('content-type', '') |
|
content_length = int(response.headers.get('content-length', 0)) |
|
|
|
if not content_type.startswith('image/'): |
|
st.error("Invalid image URL") |
|
uploaded_image = None |
|
elif content_length > 5 * 1024 * 1024: |
|
st.error("Image is too large. Maximum size is 5MB.") |
|
uploaded_image = None |
|
else: |
|
uploaded_image = Image.open(io.BytesIO(response.content)) |
|
|
|
except Exception as e: |
|
st.error(f"Error fetching image: {e}") |
|
uploaded_image = None |
|
|
|
elif image_option == "Paste Base64 Image": |
|
base64_input = st.text_area( |
|
"Paste Base64 Encoded Image", |
|
help="Paste a base64 encoded image string" |
|
) |
|
if base64_input: |
|
uploaded_image = base64_input |
|
|
|
|
|
if st.button("π Detect Fake News", key="analyze_btn"): |
|
if not article_text: |
|
st.error("Please provide an article to analyze.") |
|
return |
|
|
|
|
|
detector = FakeNewsDetector() |
|
|
|
|
|
with st.spinner('Analyzing article...'): |
|
analysis = detector.analyze_article(article_text) |
|
|
|
|
|
if analysis: |
|
st.subheader("π¬ Detailed Analysis") |
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.metric( |
|
"Fake News Probability", |
|
f"{analysis.get('fake_news_probability', 50):.2f}%" |
|
) |
|
|
|
with col2: |
|
st.metric( |
|
"Credibility Score", |
|
f"{analysis.get('credibility_score', 5):.2f}/10" |
|
) |
|
|
|
with col3: |
|
st.metric( |
|
"Risk Level", |
|
"High" if analysis.get('fake_news_probability', 50) > 50 else "Low" |
|
) |
|
|
|
|
|
st.subheader("π© Red Flags") |
|
for flag in analysis.get('red_flags', []): |
|
st.warning(flag) |
|
|
|
st.subheader("π΅οΈ Verification Steps") |
|
for step in analysis.get('verification_steps', []): |
|
st.info(step) |
|
|
|
|
|
if uploaded_image: |
|
|
|
processed_image = validate_image(uploaded_image) |
|
|
|
if processed_image is not None: |
|
|
|
st.image(processed_image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
extracted_text = perform_ocr(processed_image) |
|
|
|
|
|
st.subheader("πΈ Extracted Image Text") |
|
st.text(extracted_text) |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
### π€ How to Interpret Results |
|
- **Low Probability**: Article seems credible |
|
- **High Probability**: Exercise caution, verify sources |
|
- **Always cross-reference with multiple sources** |
|
""") |
|
|
|
with tab2: |
|
|
|
st.header("π Direct OCR Text Analysis") |
|
ocr_text = st.text_area( |
|
"Paste OCR or Extracted Text", |
|
height=300, |
|
help="Paste text directly extracted from images or documents" |
|
) |
|
|
|
|
|
if st.button("π Analyze OCR Text", key="ocr_analyze_btn"): |
|
if not ocr_text: |
|
st.error("Please provide text to analyze.") |
|
return |
|
|
|
|
|
detector = FakeNewsDetector() |
|
|
|
|
|
with st.spinner('Analyzing OCR text...'): |
|
analysis = detector.analyze_article(ocr_text) |
|
|
|
|
|
if analysis: |
|
st.subheader("π¬ OCR Text Analysis") |
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.metric( |
|
"Fake News Probability", |
|
f"{analysis.get('fake_news_probability', 50):.2f}%" |
|
) |
|
|
|
with col2: |
|
st.metric( |
|
"Credibility Score", |
|
f"{analysis.get('credibility_score', 5):.2f}/10" |
|
) |
|
|
|
with col3: |
|
st.metric( |
|
"Risk Level", |
|
"High" if analysis.get('fake_news_probability', 50) > 50 else "Low" |
|
) |
|
|
|
|
|
st.subheader("π© Red Flags") |
|
for flag in analysis.get('red_flags', []): |
|
st.warning(flag) |
|
|
|
st.subheader("π΅οΈ Verification Steps") |
|
for step in analysis.get('verification_steps', []): |
|
st.info(step) |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
### π OCR Text Analysis Tips |
|
- Paste text extracted from images, PDFs, or scanned documents |
|
- Helps analyze text that cannot be directly copied |
|
- Provides insights into potential misinformation |
|
""") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|