Spaces:
Running
Running
Upload 21 files
Browse files- .gitattributes +1 -1
- __init__.py +1 -0
- agent.py +515 -0
- app.py +613 -0
- config.py +79 -0
- docs/architecture.md +176 -0
- docs/assets/app_screenshot.png +3 -0
- docs/changelog.md +114 -0
- docs/configuration.md +292 -0
- docs/data-handling.md +285 -0
- evaluate_performance.py +520 -0
- modules/__init__.py +19 -0
- modules/category_detection.py +716 -0
- modules/claim_extraction.py +241 -0
- modules/classification.py +472 -0
- modules/evidence_retrieval.py +816 -0
- modules/explanation.py +282 -0
- modules/rss_feed.py +408 -0
- utils/__init__.py +20 -0
- utils/api_utils.py +229 -0
- utils/models.py +157 -0
- utils/performance.py +135 -0
.gitattributes
CHANGED
@@ -53,4 +53,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
53 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
54 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
55 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
56 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
53 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
54 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
55 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
56 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -textdocs/assets/app_screenshot.png filter=lfs diff=lfs merge=lfs -text
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Root package initialization
|
agent.py
ADDED
@@ -0,0 +1,515 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Agent module for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module implements a LangGraph-based agent that orchestrates
|
5 |
+
the fact-checking process. It defines the agent setup, tools,
|
6 |
+
and processing pipeline for claim verification.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import time
|
11 |
+
import logging
|
12 |
+
import traceback
|
13 |
+
import json
|
14 |
+
from langchain_core.tools import tool
|
15 |
+
from langchain.prompts import PromptTemplate
|
16 |
+
from langgraph.prebuilt import create_react_agent
|
17 |
+
|
18 |
+
from utils.models import get_llm_model
|
19 |
+
from utils.performance import PerformanceTracker
|
20 |
+
from modules.claim_extraction import extract_claims
|
21 |
+
from modules.evidence_retrieval import retrieve_combined_evidence
|
22 |
+
from modules.classification import classify_with_llm, aggregate_evidence
|
23 |
+
from modules.explanation import generate_explanation
|
24 |
+
|
25 |
+
# Configure logger
|
26 |
+
logger = logging.getLogger("misinformation_detector")
|
27 |
+
|
28 |
+
# Reference to global performance tracker
|
29 |
+
performance_tracker = PerformanceTracker()
|
30 |
+
|
31 |
+
# Define LangGraph Tools
|
32 |
+
@tool
|
33 |
+
def claim_extractor(query):
|
34 |
+
"""
|
35 |
+
Tool that extracts factual claims from a given text.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
query (str): Text containing potential factual claims
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
str: Extracted factual claim
|
42 |
+
"""
|
43 |
+
performance_tracker.log_claim_processed()
|
44 |
+
return extract_claims(query)
|
45 |
+
|
46 |
+
@tool
|
47 |
+
def evidence_retriever(query):
|
48 |
+
"""
|
49 |
+
Tool that retrieves evidence from multiple sources for a claim.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
query (str): The factual claim to gather evidence for
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
list: List of evidence items from various sources
|
56 |
+
"""
|
57 |
+
return retrieve_combined_evidence(query)
|
58 |
+
|
59 |
+
@tool
|
60 |
+
def truth_classifier(query, evidence):
|
61 |
+
"""
|
62 |
+
Tool that classifies the truthfulness of a claim based on evidence.
|
63 |
+
|
64 |
+
This function analyzes the provided evidence to determine if a claim is true,
|
65 |
+
false, or uncertain. It implements a weighted scoring approach considering
|
66 |
+
both the number of supporting/contradicting evidence items and their quality.
|
67 |
+
|
68 |
+
Args:
|
69 |
+
query (str): The factual claim to classify
|
70 |
+
evidence (list): Evidence items to evaluate against the claim
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
str: JSON string containing verdict, confidence, and classification results
|
74 |
+
with a guaranteed structure for consistent downstream processing
|
75 |
+
"""
|
76 |
+
# Perform classification on the evidence
|
77 |
+
classification_results = classify_with_llm(query, evidence)
|
78 |
+
|
79 |
+
# Aggregate results to determine overall verdict and confidence
|
80 |
+
truth_label, confidence = aggregate_evidence(classification_results)
|
81 |
+
|
82 |
+
# Debug logging
|
83 |
+
logger.info(f"Classification results: {len(classification_results)} items")
|
84 |
+
logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
|
85 |
+
|
86 |
+
# Ensure truth_label is never None
|
87 |
+
if not truth_label:
|
88 |
+
truth_label = "Uncertain"
|
89 |
+
confidence = 0.0
|
90 |
+
|
91 |
+
# Return a structured dictionary with all needed information
|
92 |
+
result = {
|
93 |
+
"verdict": truth_label,
|
94 |
+
"confidence": confidence,
|
95 |
+
"results": classification_results
|
96 |
+
}
|
97 |
+
|
98 |
+
# Convert to JSON string for consistent handling
|
99 |
+
return json.dumps(result)
|
100 |
+
|
101 |
+
@tool
|
102 |
+
def explanation_generator(claim, evidence_results, truth_label):
|
103 |
+
"""
|
104 |
+
Tool that generates a human-readable explanation for the verdict.
|
105 |
+
|
106 |
+
This function creates a clear, natural language explanation of why a claim
|
107 |
+
was classified as true, false, or uncertain based on the evidence. It handles
|
108 |
+
various truth label formats and extracts appropriate confidence values.
|
109 |
+
|
110 |
+
Args:
|
111 |
+
claim (str): The factual claim being verified
|
112 |
+
evidence_results (list): Evidence items and classification results
|
113 |
+
truth_label (str): The verdict (True/False/Uncertain), which may come
|
114 |
+
in different formats
|
115 |
+
|
116 |
+
Returns:
|
117 |
+
str: Natural language explanation of the verdict with confidence
|
118 |
+
framing and evidence citations
|
119 |
+
|
120 |
+
Note:
|
121 |
+
The function extracts confidence values from evidence when available
|
122 |
+
or uses appropriate defaults based on the verdict type. It includes
|
123 |
+
robust error handling to ensure explanations are always generated,
|
124 |
+
even in edge cases.
|
125 |
+
"""
|
126 |
+
try:
|
127 |
+
# Extract confidence if available in evidence_results
|
128 |
+
confidence = None
|
129 |
+
if isinstance(evidence_results, list) and evidence_results and isinstance(evidence_results[0], dict):
|
130 |
+
# Try to get confidence from results
|
131 |
+
confidence_values = [result.get('confidence', 0) for result in evidence_results if 'confidence' in result]
|
132 |
+
if confidence_values:
|
133 |
+
confidence = max(confidence_values)
|
134 |
+
|
135 |
+
# If confidence couldn't be extracted, use a default value based on the verdict
|
136 |
+
if confidence is None:
|
137 |
+
if truth_label and ("True" in truth_label or "False" in truth_label):
|
138 |
+
confidence = 0.7 # Default for definitive verdicts
|
139 |
+
else:
|
140 |
+
confidence = 0.5 # Default for uncertain verdicts
|
141 |
+
|
142 |
+
# Generate the explanation
|
143 |
+
explanation = generate_explanation(claim, evidence_results, truth_label, confidence)
|
144 |
+
logger.info(f"Generated explanation: {explanation[:100]}...")
|
145 |
+
return explanation
|
146 |
+
except Exception as e:
|
147 |
+
logger.error(f"Error generating explanation: {str(e)}")
|
148 |
+
# Provide a fallback explanation with basic information
|
149 |
+
return f"The claim '{claim}' has been evaluated as {truth_label}. The available evidence provides {confidence or 'moderate'} confidence in this assessment. For more detailed information, please review the evidence provided."
|
150 |
+
|
151 |
+
def setup_agent():
|
152 |
+
"""
|
153 |
+
Create and configure a ReAct agent with the fact-checking tools.
|
154 |
+
|
155 |
+
This function configures a LangGraph ReAct agent with all the
|
156 |
+
necessary tools for fact checking, including claim extraction,
|
157 |
+
evidence retrieval, classification, and explanation generation.
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
object: Configured LangGraph agent ready for claim processing
|
161 |
+
|
162 |
+
Raises:
|
163 |
+
ValueError: If OpenAI API key is not set
|
164 |
+
"""
|
165 |
+
# Make sure OpenAI API key is set
|
166 |
+
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
|
167 |
+
logger.error("OPENAI_API_KEY environment variable not set or empty.")
|
168 |
+
raise ValueError("OpenAI API key is required")
|
169 |
+
|
170 |
+
# Define tools with any customizations
|
171 |
+
tools = [
|
172 |
+
claim_extractor,
|
173 |
+
evidence_retriever,
|
174 |
+
truth_classifier,
|
175 |
+
explanation_generator
|
176 |
+
]
|
177 |
+
|
178 |
+
# Define the prompt template with clearer, more efficient instructions
|
179 |
+
FORMAT_INSTRUCTIONS_TEMPLATE = """
|
180 |
+
Use the following format:
|
181 |
+
Question: the input question you must answer
|
182 |
+
Action: the action to take, should be one of: {tool_names}
|
183 |
+
Action Input: the input to the action
|
184 |
+
Observation: the result of the action
|
185 |
+
... (this Action/Action Input/Observation can repeat N times)
|
186 |
+
Final Answer: the final answer to the original input question
|
187 |
+
"""
|
188 |
+
|
189 |
+
prompt = PromptTemplate(
|
190 |
+
input_variables=["input", "tool_names"],
|
191 |
+
template=f"""
|
192 |
+
You are a fact-checking assistant that verifies claims by gathering evidence and
|
193 |
+
determining their truthfulness. Follow these exact steps in sequence:
|
194 |
+
|
195 |
+
1. Call claim_extractor to extract the main factual claim
|
196 |
+
2. Call evidence_retriever to gather evidence about the claim
|
197 |
+
3. Call truth_classifier to evaluate the claim using the evidence
|
198 |
+
4. Call explanation_generator to explain the result
|
199 |
+
5. Provide your Final Answer that summarizes everything
|
200 |
+
|
201 |
+
Execute these steps in order without unnecessary thinking steps between tool calls.
|
202 |
+
Be direct and efficient in your verification process.
|
203 |
+
|
204 |
+
{FORMAT_INSTRUCTIONS_TEMPLATE}
|
205 |
+
"""
|
206 |
+
)
|
207 |
+
|
208 |
+
try:
|
209 |
+
# Get the LLM model
|
210 |
+
model = get_llm_model()
|
211 |
+
|
212 |
+
# Create the agent with a shorter timeout
|
213 |
+
graph = create_react_agent(model, tools=tools)
|
214 |
+
logger.info("Agent created successfully")
|
215 |
+
return graph
|
216 |
+
except Exception as e:
|
217 |
+
logger.error(f"Error creating agent: {str(e)}")
|
218 |
+
raise e
|
219 |
+
|
220 |
+
def process_claim(claim, agent=None, recursion_limit=20):
|
221 |
+
"""
|
222 |
+
Process a claim to determine its truthfulness using the agent.
|
223 |
+
|
224 |
+
This function invokes the LangGraph agent to process a factual claim,
|
225 |
+
extract supporting evidence, evaluate the claim's truthfulness, and
|
226 |
+
generate a human-readable explanation.
|
227 |
+
|
228 |
+
Args:
|
229 |
+
claim (str): The factual claim to be verified
|
230 |
+
agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
|
231 |
+
recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
|
232 |
+
Higher values allow more complex reasoning but increase processing time.
|
233 |
+
|
234 |
+
Returns:
|
235 |
+
dict: Result dictionary containing:
|
236 |
+
- claim: Extracted factual claim
|
237 |
+
- evidence: List of evidence pieces
|
238 |
+
- evidence_count: Number of evidence pieces
|
239 |
+
- classification: Verdict (True/False/Uncertain)
|
240 |
+
- confidence: Confidence score (0-1)
|
241 |
+
- explanation: Human-readable explanation of the verdict
|
242 |
+
- final_answer: Final answer from the agent
|
243 |
+
- Or error information if processing failed
|
244 |
+
"""
|
245 |
+
if agent is None:
|
246 |
+
logger.error("Agent not initialized. Call setup_agent() first.")
|
247 |
+
return None
|
248 |
+
|
249 |
+
start_time = time.time()
|
250 |
+
logger.info(f"Processing claim with agent: {claim}")
|
251 |
+
|
252 |
+
try:
|
253 |
+
# IMPORTANT: Create fresh inputs for each claim
|
254 |
+
# This ensures we don't carry over state from previous claims
|
255 |
+
inputs = {"messages": [("user", claim)]}
|
256 |
+
|
257 |
+
# Set configuration - reduced recursion limit for faster processing
|
258 |
+
config = {"recursion_limit": recursion_limit}
|
259 |
+
|
260 |
+
# Invoke the agent
|
261 |
+
response = agent.invoke(inputs, config)
|
262 |
+
|
263 |
+
# Format the response
|
264 |
+
result = format_response(response)
|
265 |
+
|
266 |
+
# Log performance
|
267 |
+
elapsed = time.time() - start_time
|
268 |
+
logger.info(f"Claim processed in {elapsed:.2f} seconds")
|
269 |
+
|
270 |
+
return result
|
271 |
+
|
272 |
+
except Exception as e:
|
273 |
+
logger.error(f"Error processing claim with agent: {str(e)}")
|
274 |
+
logger.error(traceback.format_exc())
|
275 |
+
return {"error": str(e)}
|
276 |
+
|
277 |
+
def format_response(response):
|
278 |
+
"""
|
279 |
+
Format the agent's response into a structured result.
|
280 |
+
|
281 |
+
This function extracts key information from the agent's response,
|
282 |
+
including the claim, evidence, classification, and explanation.
|
283 |
+
It also performs error handling and provides fallback values.
|
284 |
+
|
285 |
+
Args:
|
286 |
+
response (dict): Raw response from the LangGraph agent
|
287 |
+
|
288 |
+
Returns:
|
289 |
+
dict: Structured result containing claim verification data
|
290 |
+
"""
|
291 |
+
try:
|
292 |
+
if not response or "messages" not in response:
|
293 |
+
return {"error": "Invalid response format"}
|
294 |
+
|
295 |
+
messages = response.get("messages", [])
|
296 |
+
|
297 |
+
# Initialize result container with default values
|
298 |
+
result = {
|
299 |
+
"claim": None,
|
300 |
+
"evidence": [],
|
301 |
+
"evidence_count": 0,
|
302 |
+
"classification": "Uncertain",
|
303 |
+
"confidence": 0.0, # Default zero confidence
|
304 |
+
"explanation": "Insufficient evidence to evaluate this claim.",
|
305 |
+
"final_answer": None,
|
306 |
+
"thoughts": []
|
307 |
+
}
|
308 |
+
|
309 |
+
# Track if we found results from each tool
|
310 |
+
found_tools = {
|
311 |
+
"claim_extractor": False,
|
312 |
+
"evidence_retriever": False,
|
313 |
+
"truth_classifier": False,
|
314 |
+
"explanation_generator": False
|
315 |
+
}
|
316 |
+
|
317 |
+
# Extract information from messages
|
318 |
+
tool_outputs = {}
|
319 |
+
|
320 |
+
for idx, message in enumerate(messages):
|
321 |
+
# Extract agent thoughts
|
322 |
+
if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
|
323 |
+
content = message.content
|
324 |
+
if "Thought:" in content:
|
325 |
+
thought_parts = content.split("Thought:", 1)
|
326 |
+
if len(thought_parts) > 1:
|
327 |
+
thought = thought_parts[1].split("\n")[0].strip()
|
328 |
+
result["thoughts"].append(thought)
|
329 |
+
|
330 |
+
# Extract tool outputs
|
331 |
+
if hasattr(message, "type") and message.type == "tool":
|
332 |
+
tool_name = getattr(message, "name", "unknown")
|
333 |
+
|
334 |
+
# Store tool outputs
|
335 |
+
tool_outputs[tool_name] = message.content
|
336 |
+
|
337 |
+
# Extract specific information
|
338 |
+
if tool_name == "claim_extractor":
|
339 |
+
found_tools["claim_extractor"] = True
|
340 |
+
if message.content:
|
341 |
+
result["claim"] = message.content
|
342 |
+
|
343 |
+
elif tool_name == "evidence_retriever":
|
344 |
+
found_tools["evidence_retriever"] = True
|
345 |
+
# Handle string representation of a list
|
346 |
+
if message.content:
|
347 |
+
if isinstance(message.content, list):
|
348 |
+
result["evidence"] = message.content
|
349 |
+
result["evidence_count"] = len(message.content)
|
350 |
+
elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
|
351 |
+
try:
|
352 |
+
import ast
|
353 |
+
parsed_content = ast.literal_eval(message.content)
|
354 |
+
if isinstance(parsed_content, list):
|
355 |
+
result["evidence"] = parsed_content
|
356 |
+
result["evidence_count"] = len(parsed_content)
|
357 |
+
else:
|
358 |
+
result["evidence"] = [message.content]
|
359 |
+
result["evidence_count"] = 1
|
360 |
+
except:
|
361 |
+
result["evidence"] = [message.content]
|
362 |
+
result["evidence_count"] = 1
|
363 |
+
else:
|
364 |
+
result["evidence"] = [message.content]
|
365 |
+
result["evidence_count"] = 1
|
366 |
+
logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
|
367 |
+
|
368 |
+
elif tool_name == "truth_classifier":
|
369 |
+
found_tools["truth_classifier"] = True
|
370 |
+
|
371 |
+
# Log the incoming content for debugging
|
372 |
+
logger.info(f"Truth classifier content type: {type(message.content)}")
|
373 |
+
logger.info(f"Truth classifier content: {message.content}")
|
374 |
+
|
375 |
+
# Handle JSON formatted result from truth_classifier
|
376 |
+
if isinstance(message.content, str):
|
377 |
+
try:
|
378 |
+
import json
|
379 |
+
# Parse the JSON string
|
380 |
+
parsed_content = json.loads(message.content)
|
381 |
+
|
382 |
+
# Extract the values from the parsed content
|
383 |
+
result["classification"] = parsed_content.get("verdict", "Uncertain")
|
384 |
+
result["confidence"] = float(parsed_content.get("confidence", 0.0))
|
385 |
+
result["classification_results"] = parsed_content.get("results", [])
|
386 |
+
|
387 |
+
# Add low confidence warning for results < 10%
|
388 |
+
if 0 < result["confidence"] < 0.1:
|
389 |
+
result["low_confidence_warning"] = True
|
390 |
+
|
391 |
+
logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
|
392 |
+
except json.JSONDecodeError:
|
393 |
+
logger.warning(f"Could not parse truth classifier JSON: {message.content}")
|
394 |
+
except Exception as e:
|
395 |
+
logger.warning(f"Error extracting from truth classifier output: {e}")
|
396 |
+
else:
|
397 |
+
logger.warning(f"Unexpected truth_classifier content format: {message.content}")
|
398 |
+
|
399 |
+
elif tool_name == "explanation_generator":
|
400 |
+
found_tools["explanation_generator"] = True
|
401 |
+
if message.content:
|
402 |
+
result["explanation"] = message.content
|
403 |
+
logger.info(f"Found explanation from tool: {message.content[:100]}...")
|
404 |
+
|
405 |
+
# Get final answer from last message
|
406 |
+
elif idx == len(messages) - 1 and hasattr(message, "content"):
|
407 |
+
result["final_answer"] = message.content
|
408 |
+
|
409 |
+
# Log which tools weren't found
|
410 |
+
missing_tools = [tool for tool, found in found_tools.items() if not found]
|
411 |
+
if missing_tools:
|
412 |
+
logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
|
413 |
+
|
414 |
+
# IMPORTANT: ENHANCED FALLBACK MECHANISM
|
415 |
+
# Always run truth classification if evidence was collected but classifier wasn't called
|
416 |
+
if found_tools["evidence_retriever"] and not found_tools["truth_classifier"]:
|
417 |
+
logger.info("Truth classifier was not called by the agent, executing fallback classification")
|
418 |
+
|
419 |
+
try:
|
420 |
+
from modules.classification import classify_with_llm, aggregate_evidence
|
421 |
+
|
422 |
+
# Get the evidence from the results
|
423 |
+
evidence = result["evidence"]
|
424 |
+
claim = result["claim"] or "Unknown claim"
|
425 |
+
|
426 |
+
# Force classification even with minimal evidence
|
427 |
+
if evidence:
|
428 |
+
# Classify with available evidence
|
429 |
+
classification_results = classify_with_llm(claim, evidence)
|
430 |
+
truth_label, confidence = aggregate_evidence(classification_results)
|
431 |
+
|
432 |
+
# Update result with classification results
|
433 |
+
result["classification"] = truth_label
|
434 |
+
result["confidence"] = confidence
|
435 |
+
result["classification_results"] = classification_results
|
436 |
+
|
437 |
+
# Add low confidence warning if needed
|
438 |
+
if 0 < confidence < 0.1:
|
439 |
+
result["low_confidence_warning"] = True
|
440 |
+
|
441 |
+
logger.info(f"Fallback classification: {truth_label}, confidence: {confidence}")
|
442 |
+
else:
|
443 |
+
# If no evidence at all, maintain uncertain with zero confidence
|
444 |
+
result["classification"] = "Uncertain"
|
445 |
+
result["confidence"] = 0.0
|
446 |
+
logger.info("No evidence available for fallback classification")
|
447 |
+
except Exception as e:
|
448 |
+
logger.error(f"Error in fallback truth classification: {e}")
|
449 |
+
|
450 |
+
# ENHANCED: Always generate explanation if classification exists but explanation wasn't called
|
451 |
+
if (found_tools["truth_classifier"] or result["classification"] != "Uncertain") and not found_tools["explanation_generator"]:
|
452 |
+
logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
|
453 |
+
|
454 |
+
try:
|
455 |
+
from modules.explanation import generate_explanation
|
456 |
+
|
457 |
+
# Get the necessary inputs for explanation generation
|
458 |
+
claim = result["claim"] or "Unknown claim"
|
459 |
+
evidence = result["evidence"]
|
460 |
+
truth_label = result["classification"]
|
461 |
+
confidence_value = result["confidence"]
|
462 |
+
classification_results = result.get("classification_results", [])
|
463 |
+
|
464 |
+
# Choose the best available evidence for explanation
|
465 |
+
explanation_evidence = classification_results if classification_results else evidence
|
466 |
+
|
467 |
+
# Force explanation generation even with minimal evidence
|
468 |
+
explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
|
469 |
+
|
470 |
+
# Use the generated explanation
|
471 |
+
if explanation:
|
472 |
+
logger.info(f"Generated fallback explanation: {explanation[:100]}...")
|
473 |
+
result["explanation"] = explanation
|
474 |
+
except Exception as e:
|
475 |
+
logger.error(f"Error generating fallback explanation: {e}")
|
476 |
+
|
477 |
+
# Make sure evidence exists
|
478 |
+
if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
|
479 |
+
logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
|
480 |
+
result["evidence_count"] = 0
|
481 |
+
|
482 |
+
# Add debug info about the final result
|
483 |
+
logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
|
484 |
+
logger.info(f"Final explanation: {result['explanation'][:100]}...")
|
485 |
+
|
486 |
+
# Add performance metrics
|
487 |
+
result["performance"] = performance_tracker.get_summary()
|
488 |
+
|
489 |
+
# Memory management - limit the size of evidence and thoughts
|
490 |
+
# To keep memory usage reasonable for web deployment
|
491 |
+
if "evidence" in result and isinstance(result["evidence"], list):
|
492 |
+
limited_evidence = []
|
493 |
+
for ev in result["evidence"]:
|
494 |
+
if isinstance(ev, str) and len(ev) > 500:
|
495 |
+
limited_evidence.append(ev[:497] + "...")
|
496 |
+
else:
|
497 |
+
limited_evidence.append(ev)
|
498 |
+
result["evidence"] = limited_evidence
|
499 |
+
|
500 |
+
# Limit thoughts to conserve memory
|
501 |
+
if "thoughts" in result and len(result["thoughts"]) > 10:
|
502 |
+
result["thoughts"] = result["thoughts"][:10]
|
503 |
+
|
504 |
+
return result
|
505 |
+
|
506 |
+
except Exception as e:
|
507 |
+
logger.error(f"Error formatting agent response: {str(e)}")
|
508 |
+
logger.error(traceback.format_exc())
|
509 |
+
return {
|
510 |
+
"error": str(e),
|
511 |
+
"traceback": traceback.format_exc(),
|
512 |
+
"classification": "Error",
|
513 |
+
"confidence": 0.0,
|
514 |
+
"explanation": "An error occurred while processing this claim."
|
515 |
+
}
|
app.py
ADDED
@@ -0,0 +1,613 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Main Streamlit application for the Fake News Detector.
|
3 |
+
|
4 |
+
This module implements the user interface for claim verification,
|
5 |
+
rendering the results and handling user interactions. It also
|
6 |
+
manages the application lifecycle including initialization and cleanup.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import streamlit as st
|
10 |
+
import time
|
11 |
+
import json
|
12 |
+
import os
|
13 |
+
import logging
|
14 |
+
import atexit
|
15 |
+
import sys
|
16 |
+
from pathlib import Path
|
17 |
+
|
18 |
+
# Configure logging first, before other imports
|
19 |
+
logging.basicConfig(
|
20 |
+
level=logging.INFO,
|
21 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
22 |
+
handlers=[logging.StreamHandler()]
|
23 |
+
)
|
24 |
+
logger = logging.getLogger("misinformation_detector")
|
25 |
+
|
26 |
+
# Check for critical environment variables
|
27 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
28 |
+
logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
|
29 |
+
|
30 |
+
# Import our modules
|
31 |
+
from utils.models import initialize_models
|
32 |
+
from utils.performance import PerformanceTracker
|
33 |
+
|
34 |
+
# Import agent functionality
|
35 |
+
import agent
|
36 |
+
|
37 |
+
# Initialize performance tracker
|
38 |
+
performance_tracker = PerformanceTracker()
|
39 |
+
|
40 |
+
# Ensure data directory exists
|
41 |
+
data_dir = Path("data")
|
42 |
+
if not data_dir.exists():
|
43 |
+
logger.info("Creating data directory")
|
44 |
+
data_dir.mkdir(exist_ok=True)
|
45 |
+
|
46 |
+
# Set page configuration
|
47 |
+
st.set_page_config(
|
48 |
+
page_title="AskVeracity",
|
49 |
+
page_icon="🔍",
|
50 |
+
layout="wide",
|
51 |
+
)
|
52 |
+
|
53 |
+
# Hide the "Press ⌘+Enter to apply" text with CSS
|
54 |
+
st.markdown("""
|
55 |
+
<style>
|
56 |
+
/* Hide the shortcut text that appears at the bottom of text areas */
|
57 |
+
.stTextArea div:has(textarea) + div {
|
58 |
+
visibility: hidden !important;
|
59 |
+
height: 0px !important;
|
60 |
+
position: absolute !important;
|
61 |
+
}
|
62 |
+
</style>
|
63 |
+
""", unsafe_allow_html=True)
|
64 |
+
|
65 |
+
def reset_claim_specific_state():
|
66 |
+
"""
|
67 |
+
Reset claim-specific state while preserving model caching.
|
68 |
+
|
69 |
+
This function resets only the state variables related to the processing
|
70 |
+
of a specific claim, without clearing cached models to maintain efficiency.
|
71 |
+
"""
|
72 |
+
logger.info("Resetting claim-specific state")
|
73 |
+
|
74 |
+
# Reset performance tracker metrics but not the instance itself
|
75 |
+
global performance_tracker
|
76 |
+
performance_tracker.reset()
|
77 |
+
|
78 |
+
# Clear session state variables related to the current claim
|
79 |
+
if 'result' in st.session_state:
|
80 |
+
st.session_state.result = None
|
81 |
+
|
82 |
+
if 'has_result' in st.session_state:
|
83 |
+
st.session_state.has_result = False
|
84 |
+
|
85 |
+
if 'claim_to_process' in st.session_state:
|
86 |
+
st.session_state.claim_to_process = ""
|
87 |
+
|
88 |
+
# If we already have an agent, keep the instance but ensure it starts fresh
|
89 |
+
if hasattr(st.session_state, 'agent') and st.session_state.agent:
|
90 |
+
# Recreate the agent to ensure fresh state
|
91 |
+
try:
|
92 |
+
logger.info("Refreshing agent state for new claim processing")
|
93 |
+
# We're keeping the cached models but reinitializing the agent
|
94 |
+
st.session_state.agent = agent.setup_agent()
|
95 |
+
except Exception as e:
|
96 |
+
logger.error(f"Error refreshing agent: {e}")
|
97 |
+
|
98 |
+
@st.cache_resource
|
99 |
+
def get_agent():
|
100 |
+
"""
|
101 |
+
Initialize and cache the agent for reuse across requests.
|
102 |
+
|
103 |
+
This function creates and caches the fact-checking agent to avoid
|
104 |
+
recreating it for every request. It's decorated with st.cache_resource
|
105 |
+
to ensure the agent is only initialized once per session.
|
106 |
+
|
107 |
+
Returns:
|
108 |
+
object: Initialized LangGraph agent for fact checking
|
109 |
+
"""
|
110 |
+
logger.info("Initializing models and agent (cached)")
|
111 |
+
initialize_models()
|
112 |
+
return agent.setup_agent()
|
113 |
+
|
114 |
+
def cleanup_resources():
|
115 |
+
"""
|
116 |
+
Clean up resources when app is closed.
|
117 |
+
|
118 |
+
This function is registered with atexit to ensure resources
|
119 |
+
are properly released when the application terminates.
|
120 |
+
"""
|
121 |
+
try:
|
122 |
+
# Clear any cached data
|
123 |
+
st.cache_data.clear()
|
124 |
+
|
125 |
+
# Reset performance tracker
|
126 |
+
performance_tracker.reset()
|
127 |
+
|
128 |
+
# Log cleanup
|
129 |
+
logger.info("Resources cleaned up successfully")
|
130 |
+
except Exception as e:
|
131 |
+
logger.error(f"Error during cleanup: {e}")
|
132 |
+
|
133 |
+
# Register cleanup handler
|
134 |
+
atexit.register(cleanup_resources)
|
135 |
+
|
136 |
+
# App title and description
|
137 |
+
st.title("🔍 AskVeracity")
|
138 |
+
st.markdown("""
|
139 |
+
This is a simple AI-powered agentic tool - a fact-checking system that analyzes claims to determine
|
140 |
+
their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
|
141 |
+
news outlets, and academic repositories. The application aims to support broader efforts in misinformation detection.
|
142 |
+
""")
|
143 |
+
|
144 |
+
# Sidebar with app information
|
145 |
+
with st.sidebar:
|
146 |
+
st.header("About")
|
147 |
+
st.info(
|
148 |
+
"This system uses a combination of NLP techniques and LLMs to "
|
149 |
+
"extract claims, gather evidence, and classify the truthfulness of statements.\n\n"
|
150 |
+
"**Technical:** Built with Python, Streamlit, LangGraph, and OpenAI, leveraging spaCy for NLP and various APIs for retrieving evidence from diverse sources."
|
151 |
+
)
|
152 |
+
|
153 |
+
# Application information
|
154 |
+
st.markdown("### How It Works")
|
155 |
+
st.info(
|
156 |
+
"1. Enter any recent news or a factual claim\n"
|
157 |
+
"2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
|
158 |
+
"3. The system analyzes the evidence to determine truthfulness\n"
|
159 |
+
"4. Results show the verdict with supporting evidence"
|
160 |
+
)
|
161 |
+
|
162 |
+
# Our Mission
|
163 |
+
st.markdown("### Our Mission")
|
164 |
+
st.info(
|
165 |
+
"AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
|
166 |
+
"We believe in empowering people with factual information to make informed decisions."
|
167 |
+
)
|
168 |
+
|
169 |
+
# Limitations and Usage
|
170 |
+
st.markdown("### Limitations")
|
171 |
+
st.warning(
|
172 |
+
"Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
|
173 |
+
"Performance is typically best with widely-reported news and information published within the last 48 hours. "
|
174 |
+
"Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
|
175 |
+
"may be judged false if circumstances have changed, and vice versa."
|
176 |
+
"Currently, AskVeracity is only available in English."
|
177 |
+
)
|
178 |
+
|
179 |
+
# Best Practices
|
180 |
+
st.markdown("### Best Practices")
|
181 |
+
st.success(
|
182 |
+
"For optimal results:\n\n"
|
183 |
+
"• Keep claims short and precise\n\n"
|
184 |
+
"• Each part of the claim is important\n\n"
|
185 |
+
"• Include key details in your claim\n\n"
|
186 |
+
"• Phrase claims as direct statements rather than questions\n\n"
|
187 |
+
"• Be specific about who said what\n\n"
|
188 |
+
"• For very recent announcements or technical features, try checking company blogs, official documentation, or specialized tech news sites directly\n\n"
|
189 |
+
"• If receiving an \"Uncertain\" verdict, try alternative phrasings or more general versions of the claim\n\n"
|
190 |
+
"• Consider that some technical features might be in limited preview programs with minimal public documentation"
|
191 |
+
)
|
192 |
+
|
193 |
+
# Example comparison
|
194 |
+
with st.expander("📝 Examples of Effective Claims"):
|
195 |
+
st.markdown("""
|
196 |
+
**Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
|
197 |
+
|
198 |
+
**More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
|
199 |
+
""")
|
200 |
+
|
201 |
+
# Important Notes
|
202 |
+
st.markdown("### Important Notes")
|
203 |
+
st.info(
|
204 |
+
"• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
|
205 |
+
"• Results can vary based on available evidence and LLM behavior\n\n"
|
206 |
+
"• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
|
207 |
+
"• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
|
208 |
+
"• We recommend cross-verifying critical information with additional sources"
|
209 |
+
)
|
210 |
+
|
211 |
+
# Privacy Information
|
212 |
+
st.markdown("### Data Privacy")
|
213 |
+
st.info(
|
214 |
+
"We do not collect or store any data about the claims you submit. "
|
215 |
+
"Your interactions are processed by OpenAI's API. Please refer to "
|
216 |
+
"[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
|
217 |
+
)
|
218 |
+
|
219 |
+
# Feedback Section
|
220 |
+
st.markdown("### Feedback")
|
221 |
+
st.success(
|
222 |
+
"AskVeracity is evolving and we welcome your feedback to help us improve. "
|
223 |
+
"Please reach out to us with questions, suggestions, or concerns."
|
224 |
+
)
|
225 |
+
|
226 |
+
# Initialize session state variables
|
227 |
+
if 'processing' not in st.session_state:
|
228 |
+
st.session_state.processing = False
|
229 |
+
if 'claim_to_process' not in st.session_state:
|
230 |
+
st.session_state.claim_to_process = ""
|
231 |
+
if 'has_result' not in st.session_state:
|
232 |
+
st.session_state.has_result = False
|
233 |
+
if 'result' not in st.session_state:
|
234 |
+
st.session_state.result = None
|
235 |
+
if 'total_time' not in st.session_state:
|
236 |
+
st.session_state.total_time = 0
|
237 |
+
if 'fresh_state' not in st.session_state:
|
238 |
+
st.session_state.fresh_state = True
|
239 |
+
# Initialize verify button disabled state
|
240 |
+
if 'verify_btn_disabled' not in st.session_state:
|
241 |
+
st.session_state.verify_btn_disabled = False
|
242 |
+
# Add a new state to track input content
|
243 |
+
if 'input_content' not in st.session_state:
|
244 |
+
st.session_state.input_content = ""
|
245 |
+
|
246 |
+
# Main interface
|
247 |
+
st.markdown("### Enter a claim to verify")
|
248 |
+
|
249 |
+
# Define a callback for input change
|
250 |
+
def on_input_change():
|
251 |
+
st.session_state.input_content = st.session_state.claim_input_area
|
252 |
+
|
253 |
+
# Input area with callback - key fix here!
|
254 |
+
claim_input = st.text_area("",
|
255 |
+
value=st.session_state.input_content,
|
256 |
+
height=100,
|
257 |
+
placeholder=(
|
258 |
+
"Examples: The Eiffel Tower is located in Rome, Italy. "
|
259 |
+
"Meta recently released its Llama 4 large language model. "
|
260 |
+
"Justin Trudeau is not the Canadian Prime Minister anymore. "
|
261 |
+
"A recent piece of news."
|
262 |
+
),
|
263 |
+
key="claim_input_area",
|
264 |
+
on_change=on_input_change,
|
265 |
+
label_visibility="collapsed",
|
266 |
+
max_chars=None,
|
267 |
+
disabled=st.session_state.processing)
|
268 |
+
|
269 |
+
# Add information about claim formatting
|
270 |
+
st.info("""
|
271 |
+
**Tip for more accurate results:**
|
272 |
+
- As older news tends to get deprioritized by sources, trying recent news may yield better results
|
273 |
+
- Try news claims as they appear in the sources
|
274 |
+
- For claims older than 36 hours, consider rephrasing the claim by removing time-sensitive words like "recently," "today," "now," etc.
|
275 |
+
- Rephrase verbs from present tense to past tense for older events. Examples below:
|
276 |
+
- Instead of "launches/unveils/releases" → use "has launched/unveiled/released"
|
277 |
+
- Instead of "announces/invites/retaliates/ends" → use "has announced/invited/retaliated/ended"
|
278 |
+
""")
|
279 |
+
|
280 |
+
# Information about result variability
|
281 |
+
st.caption("""
|
282 |
+
💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
|
283 |
+
- Incorporate the most recent evidence available
|
284 |
+
- Benefit from the AI's ability to consider multiple perspectives
|
285 |
+
- Adapt to evolving information landscapes
|
286 |
+
""")
|
287 |
+
|
288 |
+
st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 3 minutes depending on query complexity, available evidence, and current API response times.")
|
289 |
+
|
290 |
+
# Create a clean interface based on state
|
291 |
+
if st.session_state.fresh_state:
|
292 |
+
# Only show the verify button in fresh state
|
293 |
+
verify_button = st.button(
|
294 |
+
"Verify Claim",
|
295 |
+
type="primary",
|
296 |
+
key="verify_btn",
|
297 |
+
disabled=st.session_state.verify_btn_disabled
|
298 |
+
)
|
299 |
+
|
300 |
+
# When button is clicked and not already processing
|
301 |
+
if verify_button and not st.session_state.processing:
|
302 |
+
# Only show error if claim input is completely empty
|
303 |
+
if not claim_input or claim_input.strip() == "":
|
304 |
+
st.error("Please enter a claim to verify.")
|
305 |
+
else:
|
306 |
+
# Reset claim-specific state before processing a new claim
|
307 |
+
reset_claim_specific_state()
|
308 |
+
|
309 |
+
# Store the claim and set processing state
|
310 |
+
st.session_state.claim_to_process = claim_input
|
311 |
+
st.session_state.processing = True
|
312 |
+
st.session_state.fresh_state = False
|
313 |
+
st.session_state.verify_btn_disabled = True
|
314 |
+
# Force a rerun to refresh UI
|
315 |
+
st.rerun()
|
316 |
+
|
317 |
+
else:
|
318 |
+
# This is either during processing or showing results
|
319 |
+
|
320 |
+
# Create a container for processing and results
|
321 |
+
analysis_container = st.container()
|
322 |
+
|
323 |
+
with analysis_container:
|
324 |
+
# If we're processing, show the processing UI
|
325 |
+
if st.session_state.processing:
|
326 |
+
st.subheader("🔄 Processing...")
|
327 |
+
status = st.empty()
|
328 |
+
status.text("Verifying claim... (this may take a while)")
|
329 |
+
progress_bar = st.progress(0)
|
330 |
+
|
331 |
+
# Initialize models and agent if needed
|
332 |
+
if not hasattr(st.session_state, 'agent_initialized'):
|
333 |
+
with st.spinner("Initializing system..."):
|
334 |
+
st.session_state.agent = get_agent()
|
335 |
+
st.session_state.agent_initialized = True
|
336 |
+
|
337 |
+
try:
|
338 |
+
# Use the stored claim for processing
|
339 |
+
claim_to_process = st.session_state.claim_to_process
|
340 |
+
|
341 |
+
# Process the claim with the agent
|
342 |
+
start_time = time.time()
|
343 |
+
result = agent.process_claim(claim_to_process, st.session_state.agent)
|
344 |
+
total_time = time.time() - start_time
|
345 |
+
|
346 |
+
# Update progress as claim processing completes
|
347 |
+
progress_bar.progress(100)
|
348 |
+
|
349 |
+
# Check for None result
|
350 |
+
if result is None:
|
351 |
+
st.error("Failed to process the claim. Please try again.")
|
352 |
+
st.session_state.processing = False
|
353 |
+
st.session_state.fresh_state = True
|
354 |
+
st.session_state.verify_btn_disabled = False
|
355 |
+
else:
|
356 |
+
# If result exists but key values are missing, provide default values
|
357 |
+
if "classification" not in result or result["classification"] is None:
|
358 |
+
result["classification"] = "Uncertain"
|
359 |
+
|
360 |
+
if "confidence" not in result or result["confidence"] is None:
|
361 |
+
result["confidence"] = 0.0 # Default to 0.0
|
362 |
+
|
363 |
+
if "explanation" not in result or result["explanation"] is None:
|
364 |
+
result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
|
365 |
+
|
366 |
+
# Update result with timing information
|
367 |
+
if "processing_times" not in result:
|
368 |
+
result["processing_times"] = {"total": total_time}
|
369 |
+
|
370 |
+
# Store the result and timing information
|
371 |
+
st.session_state.result = result
|
372 |
+
st.session_state.total_time = total_time
|
373 |
+
st.session_state.has_result = True
|
374 |
+
st.session_state.processing = False
|
375 |
+
|
376 |
+
# Clear processing indicators before showing results
|
377 |
+
status.empty()
|
378 |
+
progress_bar.empty()
|
379 |
+
|
380 |
+
# Force rerun to display results
|
381 |
+
st.rerun()
|
382 |
+
|
383 |
+
except Exception as e:
|
384 |
+
# Handle any exceptions and reset processing state
|
385 |
+
logger.error(f"Error during claim processing: {str(e)}")
|
386 |
+
st.error(f"An error occurred: {str(e)}")
|
387 |
+
st.session_state.processing = False
|
388 |
+
st.session_state.fresh_state = True
|
389 |
+
st.session_state.verify_btn_disabled = False
|
390 |
+
# Force rerun to re-enable button
|
391 |
+
st.rerun()
|
392 |
+
|
393 |
+
# Display results if available
|
394 |
+
elif st.session_state.has_result and st.session_state.result:
|
395 |
+
result = st.session_state.result
|
396 |
+
total_time = st.session_state.total_time
|
397 |
+
claim_to_process = st.session_state.claim_to_process
|
398 |
+
|
399 |
+
st.subheader("📊 Verification Results")
|
400 |
+
|
401 |
+
result_col1, result_col2 = st.columns([2, 1])
|
402 |
+
|
403 |
+
with result_col1:
|
404 |
+
# Display only the original claim
|
405 |
+
st.markdown(f"**Claim:** {claim_to_process}")
|
406 |
+
|
407 |
+
# Make verdict colorful based on classification
|
408 |
+
truth_label = result.get('classification', 'Uncertain')
|
409 |
+
if truth_label and "True" in truth_label:
|
410 |
+
verdict_color = "green"
|
411 |
+
elif truth_label and "False" in truth_label:
|
412 |
+
verdict_color = "red"
|
413 |
+
else:
|
414 |
+
verdict_color = "gray"
|
415 |
+
|
416 |
+
st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
|
417 |
+
|
418 |
+
# Ensure confidence value is used
|
419 |
+
if "confidence" in result and result["confidence"] is not None:
|
420 |
+
confidence_value = result["confidence"]
|
421 |
+
# Make sure confidence is a numeric value between 0 and 1
|
422 |
+
try:
|
423 |
+
confidence_value = float(confidence_value)
|
424 |
+
if confidence_value < 0:
|
425 |
+
confidence_value = 0.0
|
426 |
+
elif confidence_value > 1:
|
427 |
+
confidence_value = 1.0
|
428 |
+
except (ValueError, TypeError):
|
429 |
+
confidence_value = 0.0 # Fallback to zero confidence
|
430 |
+
else:
|
431 |
+
confidence_value = 0.0 # Default confidence
|
432 |
+
|
433 |
+
# Display the confidence
|
434 |
+
st.markdown(f"**Confidence:** {confidence_value:.2%}")
|
435 |
+
|
436 |
+
# Display low confidence warning if applicable
|
437 |
+
if 0 < confidence_value < 0.1:
|
438 |
+
st.warning("⚠️ **Very Low Confidence:** This result has very low confidence. Please verify with other authoritative sources.")
|
439 |
+
|
440 |
+
# Display explanation
|
441 |
+
st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
|
442 |
+
|
443 |
+
# Add disclaimer about cross-verification
|
444 |
+
st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
|
445 |
+
|
446 |
+
if truth_label == "Uncertain":
|
447 |
+
st.info("💡 **Tip for Uncertain Results:** This claim might be too recent for our sources, too specialized, or might not be widely reported or supported with evidence. It is also possible that the claim does not fall into any of these categories and our system may have failed to fetch the correct evidence. Try checking official sites and blogs, news sites, or other related documentation for more information.")
|
448 |
+
|
449 |
+
with result_col2:
|
450 |
+
st.markdown("**Processing Time**")
|
451 |
+
times = result.get("processing_times", {"total": total_time})
|
452 |
+
st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
|
453 |
+
|
454 |
+
# Show agent thoughts
|
455 |
+
if "thoughts" in result and result["thoughts"]:
|
456 |
+
st.markdown("**AI Reasoning Process**")
|
457 |
+
thoughts = result.get("thoughts", [])
|
458 |
+
for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts
|
459 |
+
st.markdown(f"{i+1}. {thought}")
|
460 |
+
if len(thoughts) > 5:
|
461 |
+
with st.expander("Show all reasoning steps"):
|
462 |
+
for i, thought in enumerate(thoughts):
|
463 |
+
st.markdown(f"{i+1}. {thought}")
|
464 |
+
|
465 |
+
# Display evidence
|
466 |
+
st.subheader("📝 Evidence")
|
467 |
+
evidence_count = result.get("evidence_count", 0)
|
468 |
+
evidence = result.get("evidence", [])
|
469 |
+
|
470 |
+
# Ensure evidence is a list
|
471 |
+
if not isinstance(evidence, list):
|
472 |
+
if isinstance(evidence, str):
|
473 |
+
# Try to parse string as a list
|
474 |
+
try:
|
475 |
+
import ast
|
476 |
+
parsed_evidence = ast.literal_eval(evidence)
|
477 |
+
if isinstance(parsed_evidence, list):
|
478 |
+
evidence = parsed_evidence
|
479 |
+
else:
|
480 |
+
evidence = [evidence]
|
481 |
+
except:
|
482 |
+
evidence = [evidence]
|
483 |
+
else:
|
484 |
+
evidence = [str(evidence)] if evidence else []
|
485 |
+
|
486 |
+
# Update evidence count based on actual evidence list
|
487 |
+
evidence_count = len(evidence)
|
488 |
+
|
489 |
+
# Get classification results
|
490 |
+
classification_results = result.get("classification_results", [])
|
491 |
+
|
492 |
+
# Check for empty evidence
|
493 |
+
if evidence_count == 0 or not any(ev for ev in evidence if ev):
|
494 |
+
st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
|
495 |
+
else:
|
496 |
+
# Add message about processing large number of evidence items
|
497 |
+
st.info("The system processes a large number of evidence items across multiple sources and provides a response based on the top relevant evidence items.")
|
498 |
+
|
499 |
+
# Filter to only show support and contradict evidence
|
500 |
+
if classification_results:
|
501 |
+
support_evidence = []
|
502 |
+
contradict_evidence = []
|
503 |
+
|
504 |
+
# Extract supporting and contradicting evidence
|
505 |
+
for res in classification_results:
|
506 |
+
if isinstance(res, dict) and "label" in res and "evidence" in res:
|
507 |
+
if res.get("label") == "support":
|
508 |
+
support_evidence.append(res)
|
509 |
+
elif res.get("label") == "contradict":
|
510 |
+
contradict_evidence.append(res)
|
511 |
+
|
512 |
+
# Sort by confidence
|
513 |
+
support_evidence.sort(key=lambda x: x.get("confidence", 0), reverse=True)
|
514 |
+
contradict_evidence.sort(key=lambda x: x.get("confidence", 0), reverse=True)
|
515 |
+
|
516 |
+
# Show counts of relevant evidence
|
517 |
+
st.markdown(f"Found {len(support_evidence)} supporting and {len(contradict_evidence)} contradicting evidence items")
|
518 |
+
|
519 |
+
# Only show evidence tabs if we have evidence
|
520 |
+
if evidence and any(ev for ev in evidence if ev):
|
521 |
+
# Create tabs for supporting and contradicting evidence only (removed All Evidence tab)
|
522 |
+
evidence_tabs = st.tabs(["Supporting Evidence", "Contradicting Evidence", "Source Details"])
|
523 |
+
|
524 |
+
# Supporting Evidence tab
|
525 |
+
with evidence_tabs[0]:
|
526 |
+
if support_evidence:
|
527 |
+
for i, res in enumerate(support_evidence):
|
528 |
+
evidence_text = res.get("evidence", "")
|
529 |
+
confidence = res.get("confidence", 0)
|
530 |
+
reason = res.get("reason", "No reason provided")
|
531 |
+
|
532 |
+
if evidence_text and isinstance(evidence_text, str) and evidence_text.strip():
|
533 |
+
with st.expander(f"Supporting Evidence {i+1} (Confidence: {confidence:.2%})", expanded=i==0):
|
534 |
+
st.text(evidence_text)
|
535 |
+
st.markdown(f"**Reason:** {reason}")
|
536 |
+
else:
|
537 |
+
st.info("No supporting evidence was found for this claim.")
|
538 |
+
|
539 |
+
# Contradicting Evidence tab
|
540 |
+
with evidence_tabs[1]:
|
541 |
+
if contradict_evidence:
|
542 |
+
for i, res in enumerate(contradict_evidence):
|
543 |
+
evidence_text = res.get("evidence", "")
|
544 |
+
confidence = res.get("confidence", 0)
|
545 |
+
reason = res.get("reason", "No reason provided")
|
546 |
+
|
547 |
+
if evidence_text and isinstance(evidence_text, str) and evidence_text.strip():
|
548 |
+
with st.expander(f"Contradicting Evidence {i+1} (Confidence: {confidence:.2%})", expanded=i==0):
|
549 |
+
st.text(evidence_text)
|
550 |
+
st.markdown(f"**Reason:** {reason}")
|
551 |
+
else:
|
552 |
+
st.info("No contradicting evidence was found for this claim.")
|
553 |
+
|
554 |
+
# Source Details tab (keeping original functionality)
|
555 |
+
with evidence_tabs[2]:
|
556 |
+
st.markdown("The system evaluates evidence from various sources to determine the verdict.")
|
557 |
+
|
558 |
+
evidence_sources = {}
|
559 |
+
for ev in evidence:
|
560 |
+
if not ev or not isinstance(ev, str):
|
561 |
+
continue
|
562 |
+
|
563 |
+
source = "Unknown"
|
564 |
+
# Extract source info from evidence text
|
565 |
+
if "URL:" in ev:
|
566 |
+
import re
|
567 |
+
url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
|
568 |
+
if url_match:
|
569 |
+
source = url_match.group(1)
|
570 |
+
elif "Source:" in ev:
|
571 |
+
import re
|
572 |
+
source_match = re.search(r'Source: ([^,]+)', ev)
|
573 |
+
if source_match:
|
574 |
+
source = source_match.group(1)
|
575 |
+
|
576 |
+
if source in evidence_sources:
|
577 |
+
evidence_sources[source] += 1
|
578 |
+
else:
|
579 |
+
evidence_sources[source] = 1
|
580 |
+
|
581 |
+
# Display evidence source distribution
|
582 |
+
if evidence_sources:
|
583 |
+
st.markdown("**Evidence Source Distribution**")
|
584 |
+
for source, count in evidence_sources.items():
|
585 |
+
st.markdown(f"- {source}: {count} item(s)")
|
586 |
+
else:
|
587 |
+
st.info("No source information available in the evidence.")
|
588 |
+
else:
|
589 |
+
st.warning("No evidence was retrieved for this claim.")
|
590 |
+
else:
|
591 |
+
# Fallback if no classification results
|
592 |
+
st.markdown(f"Retrieved {evidence_count} pieces of evidence, but none were classified as supporting or contradicting.")
|
593 |
+
st.warning("No supporting or contradicting evidence was found for this claim.")
|
594 |
+
|
595 |
+
# Button to start a new verification
|
596 |
+
if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
|
597 |
+
# Reset all necessary state variables
|
598 |
+
st.session_state.fresh_state = True
|
599 |
+
st.session_state.has_result = False
|
600 |
+
st.session_state.result = None
|
601 |
+
st.session_state.processing = False
|
602 |
+
st.session_state.claim_to_process = ""
|
603 |
+
st.session_state.verify_btn_disabled = False
|
604 |
+
# Clear the input field by resetting the input_content
|
605 |
+
st.session_state.input_content = ""
|
606 |
+
st.rerun()
|
607 |
+
|
608 |
+
# Footer with additional information
|
609 |
+
st.markdown("---")
|
610 |
+
st.caption("""
|
611 |
+
**AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
|
612 |
+
While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
|
613 |
+
""")
|
config.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration module for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module handles loading configuration parameters, API keys,
|
5 |
+
and source credibility data needed for the fact checking system.
|
6 |
+
It manages environment variables and file-based configurations.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import logging
|
12 |
+
from pathlib import Path
|
13 |
+
|
14 |
+
# Configure logger
|
15 |
+
logger = logging.getLogger("misinformation_detector")
|
16 |
+
|
17 |
+
# Base paths
|
18 |
+
ROOT_DIR = Path(__file__).parent.absolute()
|
19 |
+
DATA_DIR = ROOT_DIR / "data"
|
20 |
+
|
21 |
+
# Ensure data directory exists
|
22 |
+
DATA_DIR.mkdir(exist_ok=True)
|
23 |
+
|
24 |
+
# First try to get API keys from Streamlit secrets, then fall back to environment variables
|
25 |
+
try:
|
26 |
+
import streamlit as st
|
27 |
+
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
28 |
+
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
29 |
+
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
30 |
+
except (AttributeError, ImportError):
|
31 |
+
# Fall back to environment variables if Streamlit secrets aren't available
|
32 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
33 |
+
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
|
34 |
+
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
|
35 |
+
|
36 |
+
# Log secrets status (but not the values)
|
37 |
+
if OPENAI_API_KEY:
|
38 |
+
logger.info("OPENAI_API_KEY is set")
|
39 |
+
else:
|
40 |
+
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
|
41 |
+
|
42 |
+
if NEWS_API_KEY:
|
43 |
+
logger.info("NEWS_API_KEY is set")
|
44 |
+
else:
|
45 |
+
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
|
46 |
+
|
47 |
+
if FACTCHECK_API_KEY:
|
48 |
+
logger.info("FACTCHECK_API_KEY is set")
|
49 |
+
else:
|
50 |
+
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
|
51 |
+
|
52 |
+
# Set API key in environment to ensure it's available to all components
|
53 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
54 |
+
|
55 |
+
# Rate limiting configuration
|
56 |
+
RATE_LIMITS = {
|
57 |
+
# api_name: {"requests": max_requests, "period": period_in_seconds}
|
58 |
+
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
|
59 |
+
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
|
60 |
+
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
|
61 |
+
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
|
62 |
+
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
|
63 |
+
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
|
64 |
+
}
|
65 |
+
|
66 |
+
# Error backoff settings
|
67 |
+
ERROR_BACKOFF = {
|
68 |
+
"max_retries": 5,
|
69 |
+
"initial_backoff": 1, # seconds
|
70 |
+
"backoff_factor": 2, # exponential backoff
|
71 |
+
}
|
72 |
+
|
73 |
+
# RSS feed settings
|
74 |
+
RSS_SETTINGS = {
|
75 |
+
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
|
76 |
+
"max_age_days": 3, # Maximum age of RSS items to consider
|
77 |
+
"timeout_seconds": 5, # Timeout for RSS feed requests
|
78 |
+
"max_workers": 5 # Number of parallel workers for fetching feeds
|
79 |
+
}
|
docs/architecture.md
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AskVeracity Architecture
|
2 |
+
|
3 |
+
## System Overview
|
4 |
+
|
5 |
+
AskVeracity is a fact-checking and misinformation detection application that verifies factual claims by gathering and analyzing evidence from multiple sources. The system follows an agentic approach using LangGraph's ReAct agent framework for orchestrating the verification process.
|
6 |
+
|
7 |
+
## Core Components
|
8 |
+
|
9 |
+
### 1. Agent System
|
10 |
+
|
11 |
+
The system implements a LangGraph-based agent that orchestrates the entire fact-checking process:
|
12 |
+
|
13 |
+
- **Core Agent:** Defined in `agent.py`, the ReAct agent coordinates the execution of individual tools in a logical sequence to verify claims.
|
14 |
+
- **Agent Tools:** Implemented as callable functions that the agent can invoke:
|
15 |
+
- `claim_extractor`: Extracts the main factual claim from user input
|
16 |
+
- `evidence_retriever`: Gathers evidence from multiple sources
|
17 |
+
- `truth_classifier`: Evaluates the claim against evidence
|
18 |
+
- `explanation_generator`: Creates human-readable explanations
|
19 |
+
|
20 |
+
### 2. Web Interface
|
21 |
+
|
22 |
+
The user interface is implemented using Streamlit:
|
23 |
+
|
24 |
+
- **Main App:** Defined in `app.py`, provides the interface for users to submit claims and view results
|
25 |
+
- **Caching:** Uses Streamlit's caching mechanisms to optimize performance
|
26 |
+
- **Results Display:** Shows verdict, confidence, explanation, and evidence details
|
27 |
+
|
28 |
+
## Module Architecture
|
29 |
+
|
30 |
+
```
|
31 |
+
askveracity/
|
32 |
+
│
|
33 |
+
├── agent.py # LangGraph agent implementation
|
34 |
+
├── app.py # Main Streamlit application
|
35 |
+
├── config.py # Configuration and API keys
|
36 |
+
├── evaluate_performance.py # Performance evaluation script
|
37 |
+
│
|
38 |
+
├── modules/ # Core functionality modules
|
39 |
+
│ ├── claim_extraction.py # Claim extraction functionality
|
40 |
+
│ ├── evidence_retrieval.py # Evidence gathering from various sources
|
41 |
+
│ ├── classification.py # Truth classification logic
|
42 |
+
│ ├── explanation.py # Explanation generation
|
43 |
+
│ ├── rss_feed.py # RSS feed evidence retrieval
|
44 |
+
│ └── category_detection.py # Claim category detection
|
45 |
+
│
|
46 |
+
├── utils/ # Utility functions
|
47 |
+
│ ├── api_utils.py # API rate limiting and error handling
|
48 |
+
│ ├── performance.py # Performance tracking utilities
|
49 |
+
│ └── models.py # Model initialization functions
|
50 |
+
│
|
51 |
+
├── results/ # Performance evaluation results
|
52 |
+
│ ├── performance_results.json # Evaluation metrics
|
53 |
+
│ └── *.png # Performance visualization charts
|
54 |
+
│
|
55 |
+
└── docs/ # Documentation
|
56 |
+
├── assets/ # Images and other media
|
57 |
+
│ └── app_screenshot.png # Application screenshot
|
58 |
+
├── architecture.md # System design and component interactions
|
59 |
+
├── configuration.md # Setup and environment configuration
|
60 |
+
├── data-handling.md # Data processing and flow
|
61 |
+
└── changelog.md # Version history
|
62 |
+
```
|
63 |
+
|
64 |
+
## Component Interactions
|
65 |
+
|
66 |
+
### Claim Verification Flow
|
67 |
+
|
68 |
+
1. **User Input:** User submits a claim via the Streamlit interface
|
69 |
+
2. **Agent Initialization:** The ReAct agent is initialized with fact-checking tools
|
70 |
+
3. **Claim Extraction:** The agent extracts the main factual claim
|
71 |
+
4. **Category Detection:** The system detects the category of the claim (ai, science, technology, politics, business, world, sports, entertainment)
|
72 |
+
5. **Evidence Retrieval:** Multi-source evidence gathering with priority based on claim category
|
73 |
+
6. **Evidence Analysis:** Entity and verb matching assesses evidence relevance
|
74 |
+
7. **Truthfulness Classification:** The agent evaluates the claim against the evidence
|
75 |
+
8. **Explanation Generation:** Human-readable explanation is generated
|
76 |
+
9. **Results Display:** Results are presented to the user with evidence details
|
77 |
+
|
78 |
+
### Evidence Retrieval Architecture
|
79 |
+
|
80 |
+
Evidence retrieval is a core component of the misinformation detection system:
|
81 |
+
|
82 |
+
1. **Multi-source Retrieval:** The system collects evidence from:
|
83 |
+
- Wikipedia
|
84 |
+
- Wikidata
|
85 |
+
- News API
|
86 |
+
- RSS feeds
|
87 |
+
- Fact-checking sites (via Google Fact Check Tools API)
|
88 |
+
- Academic sources (via OpenAlex)
|
89 |
+
|
90 |
+
2. **Category-aware Prioritization:** Sources are prioritized based on the detected category of the claim:
|
91 |
+
- Each category (ai, science, technology, politics, business, world, sports, entertainment) has dedicated RSS feeds
|
92 |
+
- AI category falls back to technology sources when needed
|
93 |
+
- Other categories fall back to default RSS feeds
|
94 |
+
|
95 |
+
3. **Parallel Processing:** Evidence retrieval uses ThreadPoolExecutor for parallel API requests with optimized timeouts
|
96 |
+
|
97 |
+
4. **Rate Limiting:** API calls are managed by a token bucket rate limiter to respect API usage limits
|
98 |
+
|
99 |
+
5. **Error Handling:** Robust error handling with exponential backoff for retries
|
100 |
+
|
101 |
+
6. **Source Verification:** The system provides direct URLs to original sources for all evidence items, enabling users to verify information at its original source
|
102 |
+
|
103 |
+
|
104 |
+
### Classification System
|
105 |
+
|
106 |
+
The truth classification process involves:
|
107 |
+
|
108 |
+
1. **Evidence Analysis:** Each evidence item is classified as supporting, contradicting, or insufficient
|
109 |
+
2. **Confidence Scoring:** Confidence scores are assigned to each classification
|
110 |
+
3. **Aggregation:** Individual evidence classifications are aggregated to determine the final verdict
|
111 |
+
|
112 |
+
## Technical Details
|
113 |
+
|
114 |
+
### Language Models
|
115 |
+
|
116 |
+
- Uses OpenAI's Large Language Model GPT-3.5 Turbo via LangChain
|
117 |
+
- Configurable model selection in `utils/models.py`
|
118 |
+
|
119 |
+
### NLP Processing
|
120 |
+
|
121 |
+
- spaCy for natural language processing tasks
|
122 |
+
- Named entity recognition for claim and evidence analysis
|
123 |
+
- Entity and verb matching for evidence relevance scoring
|
124 |
+
|
125 |
+
### Performance Optimization
|
126 |
+
|
127 |
+
- Caching of models and results
|
128 |
+
- Prioritized and parallel evidence retrieval
|
129 |
+
- Early relevance analysis during retrieval process
|
130 |
+
|
131 |
+
### Error Resilience
|
132 |
+
|
133 |
+
- Multiple fallback mechanisms
|
134 |
+
- Graceful degradation when sources are unavailable
|
135 |
+
- Comprehensive error logging
|
136 |
+
|
137 |
+
## Performance Evaluation Results
|
138 |
+
|
139 |
+
The system has been evaluated using a test set of 40 claims across three categories (True, False, and Uncertain). A typical performance profile shows:
|
140 |
+
|
141 |
+
1. **Overall Accuracy:** ~52.5% across all claim types
|
142 |
+
* Accuracy: Percentage of claims correctly classified according to their ground truth label
|
143 |
+
|
144 |
+
2. **Safety Rate:** ~70.0% across all claim types
|
145 |
+
* Safety Rate: Percentage of claims that were either correctly classified or safely categorized as "Uncertain" rather than making an incorrect assertion
|
146 |
+
|
147 |
+
3. **Class-specific Metrics:**
|
148 |
+
* True claims: ~40-60% accuracy, ~55-85% safety rate
|
149 |
+
* False claims: ~15-35% accuracy, ~50-70% safety rate
|
150 |
+
* Uncertain claims: ~50.0% accuracy, ~50.0% safety rate (for Uncertain claims, accuracy equals safety rate)
|
151 |
+
|
152 |
+
4. **Confidence Scores:**
|
153 |
+
* True claims: ~0.62-0.74 average confidence
|
154 |
+
* False claims: ~0.42-0.50 average confidence
|
155 |
+
* Uncertain claims: ~0.38-0.50 average confidence
|
156 |
+
|
157 |
+
5. **Processing Times:**
|
158 |
+
* True claims: ~21-32 seconds average
|
159 |
+
* False claims: ~24-37 seconds average
|
160 |
+
* Uncertain claims: ~23-31 seconds average
|
161 |
+
|
162 |
+
**Note:** The class-specific metrics, confidence scores, and processing times vary by test run.
|
163 |
+
|
164 |
+
These metrics vary between evaluation runs due to the dynamic nature of evidence sources and the real-time information landscape. The system is designed to adapt to this variability, making it well-suited for real-world fact-checking scenarios where information evolves over time.
|
165 |
+
|
166 |
+
## Misinformation Detection Capabilities
|
167 |
+
|
168 |
+
The system's approach to detecting misinformation includes:
|
169 |
+
|
170 |
+
1. **Temporal Relevance:** Checks if evidence is temporally appropriate for the claim
|
171 |
+
2. **Contradiction Detection:** Identifies evidence that directly contradicts claims
|
172 |
+
3. **Evidence Diversity:** Ensures diverse evidence sources for more robust verification
|
173 |
+
4. **Domain Prioritization:** Applies a small relevance boost to content from established news and fact-checking domains in the RSS feed handling
|
174 |
+
5. **Safety-First Classification:** Prioritizes preventing the spread of misinformation by avoiding incorrect assertions when evidence is insufficient
|
175 |
+
|
176 |
+
This architecture enables AskVeracity to efficiently gather, analyze, and present evidence relevant to user claims, supporting the broader effort to detect and counteract misinformation.
|
docs/assets/app_screenshot.png
ADDED
![]() |
Git LFS Details
|
docs/changelog.md
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Changelog
|
2 |
+
|
3 |
+
All notable changes to the AskVeracity fact-checking and misinformation detection system will be documented in this file.
|
4 |
+
|
5 |
+
## [0.4.1] - 2025-04-25
|
6 |
+
|
7 |
+
### Updated
|
8 |
+
- Updated architecture.md to improve accuracy of system description
|
9 |
+
- Updated README.md to better reflect current system functionality
|
10 |
+
- Removed references to deprecated source credibility assessment
|
11 |
+
- Clarified documentation of domain quality boost in RSS feed processing
|
12 |
+
|
13 |
+
## [0.4.0] - 2025-04-24
|
14 |
+
|
15 |
+
### Added
|
16 |
+
- Added safety rate metric to performance evaluation
|
17 |
+
- Measures how often the system avoids making incorrect assertions
|
18 |
+
- Tracks when system correctly abstains from judgment by using "Uncertain"
|
19 |
+
- Included in overall metrics and per-class metrics
|
20 |
+
- New safety rate visualization chart in performance evaluation
|
21 |
+
- Added safety flag to detailed claim results
|
22 |
+
|
23 |
+
### Updated
|
24 |
+
- Enhanced `evaluate_performance.py` script to track and calculate safety rates
|
25 |
+
- Updated documentation to explain the safety rate metric and its importance
|
26 |
+
- Improved tabular display of performance metrics with safety rate column
|
27 |
+
|
28 |
+
## [0.3.0] - 2025-04-23
|
29 |
+
|
30 |
+
### Added
|
31 |
+
- Performance evaluation script (`evaluate_performance.py`) in root directory
|
32 |
+
- Performance results visualization and storage in `results/` directory
|
33 |
+
- Enhanced error handling and fallback mechanisms
|
34 |
+
- Refined relevance scoring with entity and verb matching with keyword fallback for accurate evidence assessment
|
35 |
+
- Enhanced evidence relevance with weighted scoring prioritization and increased gathering from 5 to 10 items
|
36 |
+
- Added detailed confidence calculation for more reliable verdicts with better handling of low confidence cases
|
37 |
+
- Category-specific RSS feeds for more targeted evidence retrieval
|
38 |
+
- OpenAlex integration for scholarly evidence (replacing Semantic Scholar)
|
39 |
+
|
40 |
+
### Changed
|
41 |
+
- Improved classification output structure for consistent downstream processing
|
42 |
+
- Added fallback mechanisms for explanation generation and classification
|
43 |
+
- Improved evidence retrieval and classification mechanism
|
44 |
+
- Streamlined architecture by removing source credibility and semantic analysis complexity
|
45 |
+
- Improved classification mechanism with weighted evidence count (55%) and quality (45%)
|
46 |
+
- Updated documentation to reflect the updated performance metrics, enhanced evidence processing pipeline, improved classification mechanism, and streamlined architecture
|
47 |
+
|
48 |
+
### Fixed
|
49 |
+
- Enhanced handling of non-standard response formats
|
50 |
+
|
51 |
+
## [0.2.0] - 2025-04-22
|
52 |
+
|
53 |
+
### Added
|
54 |
+
- Created comprehensive documentation in `/docs` directory
|
55 |
+
- `architecture.md` for system design and component interactions
|
56 |
+
- `configuration.md` for setup and environment configuration
|
57 |
+
- `data-handling.md` for data processing and flow
|
58 |
+
- `changelog.md` for version history tracking
|
59 |
+
- Updated app description to emphasize misinformation detection capabilities
|
60 |
+
|
61 |
+
### Changed
|
62 |
+
- Improved directory structure with documentation folder
|
63 |
+
- Enhanced README with updated project structure
|
64 |
+
- Clarified misinformation detection focus in documentation
|
65 |
+
|
66 |
+
## [0.1.0] - 2025-04-21
|
67 |
+
|
68 |
+
### Added
|
69 |
+
- Initial release of AskVeracity fact-checking system
|
70 |
+
- Streamlit web interface in `app.py`
|
71 |
+
- LangGraph ReAct agent implementation in `agent.py`
|
72 |
+
- Multi-source evidence retrieval system
|
73 |
+
- Wikipedia integration
|
74 |
+
- Wikidata integration
|
75 |
+
- News API integration
|
76 |
+
- RSS feed processing
|
77 |
+
- Google's FactCheck Tools API integration
|
78 |
+
- OpenAlex scholarly evidence
|
79 |
+
- Truth classification with LLM
|
80 |
+
- Explanation generation
|
81 |
+
- Performance tracking utilities
|
82 |
+
- Rate limiting and API error handling
|
83 |
+
- Category detection for source prioritization
|
84 |
+
|
85 |
+
### Features
|
86 |
+
- User-friendly claim input interface
|
87 |
+
- Detailed results display with evidence exploration
|
88 |
+
- Category-aware source prioritization
|
89 |
+
- Robust error handling and fallbacks
|
90 |
+
- Parallel evidence retrieval for improved performance
|
91 |
+
- Support for various claim categories:
|
92 |
+
- AI
|
93 |
+
- Science
|
94 |
+
- Technology
|
95 |
+
- Politics
|
96 |
+
- Business
|
97 |
+
- World news
|
98 |
+
- Sports
|
99 |
+
- Entertainment
|
100 |
+
|
101 |
+
## Unreleased
|
102 |
+
|
103 |
+
### Planned Features
|
104 |
+
- Enhanced visualization of evidence relevance
|
105 |
+
- Support for user feedback on verification results
|
106 |
+
- Streamlined fact-checking using only relevant sources
|
107 |
+
- Source weighting for improved result relevance
|
108 |
+
- Improved verdict confidence for challenging / ambiguous claims
|
109 |
+
- Expanded fact-checking sources
|
110 |
+
- Improved handling of multilingual claims
|
111 |
+
- Integration with additional academic databases
|
112 |
+
- Custom source credibility configuration interface
|
113 |
+
- Historical claim verification database
|
114 |
+
- API endpoint for programmatic access
|
docs/configuration.md
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AskVeracity Configuration Guide
|
2 |
+
|
3 |
+
This document describes how to set up and configure the AskVeracity fact-checking and misinformation detection system.
|
4 |
+
|
5 |
+
## Prerequisites
|
6 |
+
|
7 |
+
Before setting up AskVeracity, ensure you have:
|
8 |
+
|
9 |
+
- Python 3.8 or higher
|
10 |
+
- pip (Python package installer)
|
11 |
+
- Git (for cloning the repository)
|
12 |
+
- API keys for external services
|
13 |
+
|
14 |
+
## Installation
|
15 |
+
|
16 |
+
### Local Development
|
17 |
+
|
18 |
+
1. Clone the repository:
|
19 |
+
```bash
|
20 |
+
git clone https://github.com/yourusername/askveracity.git
|
21 |
+
cd askveracity
|
22 |
+
```
|
23 |
+
|
24 |
+
2. Install the required dependencies:
|
25 |
+
```bash
|
26 |
+
pip install -r requirements.txt
|
27 |
+
```
|
28 |
+
|
29 |
+
3. Download the required spaCy model:
|
30 |
+
```bash
|
31 |
+
python -m spacy download en_core_web_sm
|
32 |
+
```
|
33 |
+
|
34 |
+
## API Key Configuration
|
35 |
+
|
36 |
+
AskVeracity requires several API keys to access external services. You have two options for configuring these keys:
|
37 |
+
|
38 |
+
### Option 1: Using Streamlit Secrets (Recommended for Local Development)
|
39 |
+
|
40 |
+
1. Create a `.streamlit` directory if it doesn't exist:
|
41 |
+
```bash
|
42 |
+
mkdir -p .streamlit
|
43 |
+
```
|
44 |
+
|
45 |
+
2. Create a `secrets.toml` file:
|
46 |
+
```bash
|
47 |
+
cp .streamlit/secrets.toml.example .streamlit/secrets.toml
|
48 |
+
```
|
49 |
+
|
50 |
+
3. Edit the `.streamlit/secrets.toml` file with your API keys:
|
51 |
+
```toml
|
52 |
+
OPENAI_API_KEY = "your_openai_api_key"
|
53 |
+
NEWS_API_KEY = "your_news_api_key"
|
54 |
+
FACTCHECK_API_KEY = "your_factcheck_api_key"
|
55 |
+
```
|
56 |
+
|
57 |
+
### Option 2: Using Environment Variables
|
58 |
+
|
59 |
+
1. Create a `.env` file in the root directory:
|
60 |
+
```bash
|
61 |
+
touch .env
|
62 |
+
```
|
63 |
+
|
64 |
+
2. Add your API keys to the `.env` file:
|
65 |
+
```
|
66 |
+
OPENAI_API_KEY=your_openai_api_key
|
67 |
+
NEWS_API_KEY=your_news_api_key
|
68 |
+
FACTCHECK_API_KEY=your_factcheck_api_key
|
69 |
+
```
|
70 |
+
|
71 |
+
3. Load the environment variables:
|
72 |
+
```python
|
73 |
+
# In Python
|
74 |
+
from dotenv import load_dotenv
|
75 |
+
load_dotenv()
|
76 |
+
```
|
77 |
+
|
78 |
+
Or in your terminal:
|
79 |
+
```bash
|
80 |
+
# Unix/Linux/MacOS
|
81 |
+
source .env
|
82 |
+
|
83 |
+
# Windows
|
84 |
+
# Install python-dotenv[cli] and run
|
85 |
+
dotenv run streamlit run app.py
|
86 |
+
```
|
87 |
+
|
88 |
+
## Required API Keys
|
89 |
+
|
90 |
+
AskVeracity uses the following external APIs:
|
91 |
+
|
92 |
+
1. **OpenAI API** (Required)
|
93 |
+
- Used for claim extraction, classification, and explanation generation
|
94 |
+
- Get an API key from [OpenAI's website](https://platform.openai.com/)
|
95 |
+
|
96 |
+
2. **News API** (Optional but recommended)
|
97 |
+
- Used for retrieving news article evidence
|
98 |
+
- Get an API key from [NewsAPI.org](https://newsapi.org/)
|
99 |
+
|
100 |
+
3. **Google Fact Check Tools API** (Optional but recommended)
|
101 |
+
- Used for retrieving fact-checking evidence
|
102 |
+
- Get an API key from [Google Fact Check Tools API](https://developers.google.com/fact-check/tools/api)
|
103 |
+
|
104 |
+
## Configuration Files
|
105 |
+
|
106 |
+
### config.py
|
107 |
+
|
108 |
+
The main configuration file is `config.py`, which contains:
|
109 |
+
|
110 |
+
- API key handling
|
111 |
+
- Rate limiting configuration
|
112 |
+
- Error backoff settings
|
113 |
+
- RSS feed settings
|
114 |
+
|
115 |
+
Important configuration sections in `config.py`:
|
116 |
+
|
117 |
+
```python
|
118 |
+
# Rate limiting configuration
|
119 |
+
RATE_LIMITS = {
|
120 |
+
# api_name: {"requests": max_requests, "period": period_in_seconds}
|
121 |
+
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
|
122 |
+
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
|
123 |
+
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
|
124 |
+
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
|
125 |
+
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
|
126 |
+
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
|
127 |
+
}
|
128 |
+
|
129 |
+
# Error backoff settings
|
130 |
+
ERROR_BACKOFF = {
|
131 |
+
"max_retries": 5,
|
132 |
+
"initial_backoff": 1, # seconds
|
133 |
+
"backoff_factor": 2, # exponential backoff
|
134 |
+
}
|
135 |
+
|
136 |
+
# RSS feed settings
|
137 |
+
RSS_SETTINGS = {
|
138 |
+
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
|
139 |
+
"max_age_days": 3, # Maximum age of RSS items to consider
|
140 |
+
"timeout_seconds": 5, # Timeout for RSS feed requests
|
141 |
+
"max_workers": 5 # Number of parallel workers for fetching feeds
|
142 |
+
}
|
143 |
+
```
|
144 |
+
|
145 |
+
### Category-Specific RSS Feeds
|
146 |
+
|
147 |
+
Category-specific RSS feeds are defined in `modules/category_detection.py`. These feeds are used to prioritize sources based on the detected claim category:
|
148 |
+
|
149 |
+
```python
|
150 |
+
CATEGORY_SPECIFIC_FEEDS = {
|
151 |
+
"ai": [
|
152 |
+
"https://www.artificialintelligence-news.com/feed/",
|
153 |
+
"https://openai.com/news/rss.xml",
|
154 |
+
# Additional AI-specific feeds
|
155 |
+
],
|
156 |
+
"science": [
|
157 |
+
"https://www.science.org/rss/news_current.xml",
|
158 |
+
"https://www.nature.com/nature.rss",
|
159 |
+
# Additional science feeds
|
160 |
+
],
|
161 |
+
# Additional categories
|
162 |
+
}
|
163 |
+
```
|
164 |
+
|
165 |
+
## Hugging Face Spaces Deployment
|
166 |
+
|
167 |
+
### Setting Up a Space
|
168 |
+
|
169 |
+
1. Create a new Space on Hugging Face:
|
170 |
+
- Go to https://huggingface.co/spaces
|
171 |
+
- Click "Create new Space"
|
172 |
+
- Select "Streamlit" as the SDK
|
173 |
+
- Choose the hardware tier (use the default 16GB RAM)
|
174 |
+
|
175 |
+
2. Upload the project files:
|
176 |
+
- You can upload files directly through the Hugging Face web interface
|
177 |
+
- Alternatively, use Git to push to the Hugging Face repository
|
178 |
+
- Make sure to include all necessary files including requirements.txt
|
179 |
+
|
180 |
+
### Setting Up Secrets
|
181 |
+
|
182 |
+
1. Add API keys as secrets:
|
183 |
+
- Go to the "Settings" tab of your Space
|
184 |
+
- Navigate to the "Repository secrets" section
|
185 |
+
- Add your API keys:
|
186 |
+
- `OPENAI_API_KEY`
|
187 |
+
- `NEWS_API_KEY`
|
188 |
+
- `FACTCHECK_API_KEY`
|
189 |
+
|
190 |
+
### Configuring the Space
|
191 |
+
|
192 |
+
Edit the metadata in the `README.md` file:
|
193 |
+
|
194 |
+
```yaml
|
195 |
+
---
|
196 |
+
title: Askveracity
|
197 |
+
emoji: 📉
|
198 |
+
colorFrom: blue
|
199 |
+
colorTo: pink
|
200 |
+
sdk: streamlit
|
201 |
+
sdk_version: 1.44.1
|
202 |
+
app_file: app.py
|
203 |
+
pinned: false
|
204 |
+
license: mit
|
205 |
+
short_description: Fact-checking and misinformation detection tool.
|
206 |
+
---
|
207 |
+
```
|
208 |
+
|
209 |
+
## Custom Configuration
|
210 |
+
|
211 |
+
### Adjusting Rate Limits
|
212 |
+
|
213 |
+
You can adjust the rate limits in `config.py` based on your API subscription levels:
|
214 |
+
|
215 |
+
```python
|
216 |
+
# Update for higher tier News API subscription
|
217 |
+
RATE_LIMITS["newsapi"] = {"requests": 500, "period": 3600} # 500 requests per hour
|
218 |
+
```
|
219 |
+
|
220 |
+
### Modifying RSS Feeds
|
221 |
+
|
222 |
+
The list of RSS feeds can be found in `modules/rss_feed.py` and category-specific feeds in `modules/category_detection.py`. You can add or remove feeds as needed.
|
223 |
+
|
224 |
+
### Performance Evaluation
|
225 |
+
|
226 |
+
The system includes a performance evaluation script `evaluate_performance.py` that:
|
227 |
+
|
228 |
+
1. Runs the fact-checking system on a predefined set of test claims
|
229 |
+
2. Calculates accuracy, safety rate, processing time, and confidence metrics
|
230 |
+
3. Generates visualization charts in the `results/` directory
|
231 |
+
4. Saves detailed results to `results/performance_results.json`
|
232 |
+
|
233 |
+
To run the performance evaluation:
|
234 |
+
|
235 |
+
```bash
|
236 |
+
python evaluate_performance.py [--limit N] [--output FILE]
|
237 |
+
```
|
238 |
+
|
239 |
+
- `--limit N`: Limit evaluation to first N claims (default: all)
|
240 |
+
- `--output FILE`: Save results to FILE (default: performance_results.json)
|
241 |
+
|
242 |
+
## Running the Application
|
243 |
+
|
244 |
+
Start the Streamlit app:
|
245 |
+
|
246 |
+
```bash
|
247 |
+
streamlit run app.py
|
248 |
+
```
|
249 |
+
|
250 |
+
The application will be available at http://localhost:8501 by default.
|
251 |
+
|
252 |
+
## Troubleshooting
|
253 |
+
|
254 |
+
### API Key Issues
|
255 |
+
|
256 |
+
If you encounter API key errors:
|
257 |
+
|
258 |
+
1. Verify that your API keys are set correctly
|
259 |
+
2. Check the logs for specific error messages
|
260 |
+
3. Make sure API keys are not expired or rate-limited
|
261 |
+
|
262 |
+
### Model Loading Errors
|
263 |
+
|
264 |
+
If spaCy model fails to load:
|
265 |
+
|
266 |
+
```bash
|
267 |
+
# Reinstall the model
|
268 |
+
python -m spacy download en_core_web_sm --force
|
269 |
+
```
|
270 |
+
|
271 |
+
### Rate Limiting
|
272 |
+
|
273 |
+
If you encounter rate limiting issues:
|
274 |
+
|
275 |
+
1. Reduce the number of requests by adjusting `RATE_LIMITS` in `config.py`
|
276 |
+
2. Increase the backoff parameters in `ERROR_BACKOFF`
|
277 |
+
3. Subscribe to higher API tiers if available
|
278 |
+
|
279 |
+
### Memory Issues
|
280 |
+
|
281 |
+
If the application crashes due to memory issues:
|
282 |
+
|
283 |
+
1. Reduce the number of parallel workers in `RSS_SETTINGS`
|
284 |
+
2. Limit the maximum number of evidence items processed
|
285 |
+
|
286 |
+
## Performance Optimization
|
287 |
+
|
288 |
+
For better performance:
|
289 |
+
|
290 |
+
1. Upgrade to a higher-tier OpenAI model for improved accuracy
|
291 |
+
2. Increase the number of parallel workers for evidence retrieval
|
292 |
+
3. Add more relevant RSS feeds to improve evidence gathering
|
docs/data-handling.md
ADDED
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Data Handling in AskVeracity
|
2 |
+
|
3 |
+
This document explains how data flows through the AskVeracity fact-checking and misinformation detection system, from user input to final verification results.
|
4 |
+
|
5 |
+
## Data Flow Overview
|
6 |
+
|
7 |
+
```
|
8 |
+
User Input → Claim Extraction → Category Detection → Evidence Retrieval → Evidence Analysis → Classification → Explanation → Result Display
|
9 |
+
```
|
10 |
+
|
11 |
+
## User Input Processing
|
12 |
+
|
13 |
+
### Input Sanitization and Extraction
|
14 |
+
|
15 |
+
1. **Input Acceptance:** The system accepts user input as free-form text through the Streamlit interface.
|
16 |
+
|
17 |
+
2. **Claim Extraction** (`modules/claim_extraction.py`):
|
18 |
+
- For concise inputs (<30 words), the system preserves the input as-is
|
19 |
+
- For longer texts, an LLM extracts the main factual claim
|
20 |
+
- Validation ensures the extraction doesn't add information not present in the original
|
21 |
+
- Entity preservation is verified using spaCy's NER
|
22 |
+
|
23 |
+
3. **Claim Shortening:**
|
24 |
+
- For evidence retrieval, claims are shortened to preserve key entities and context
|
25 |
+
- Preserves entity mentions, key nouns, titles, country references, and negation contexts
|
26 |
+
|
27 |
+
## Evidence Retrieval and Processing
|
28 |
+
|
29 |
+
### Multi-source Evidence Gathering
|
30 |
+
|
31 |
+
Evidence is collected from multiple sources in parallel (`modules/evidence_retrieval.py`):
|
32 |
+
|
33 |
+
1. **Category Detection** (`modules/category_detection.py`):
|
34 |
+
- Detects the claim category (ai, science, technology, politics, business, world, sports, entertainment)
|
35 |
+
- Prioritizes sources based on category
|
36 |
+
- No category receives preferential weighting; assignment is based purely on keyword matching
|
37 |
+
|
38 |
+
2. **Wikipedia** evidence:
|
39 |
+
- Search Wikipedia API for relevant articles
|
40 |
+
- Extract introductory paragraphs
|
41 |
+
- Process in parallel for up to 3 top search results
|
42 |
+
|
43 |
+
3. **Wikidata** evidence:
|
44 |
+
- SPARQL queries for structured data
|
45 |
+
- Entity extraction with descriptions
|
46 |
+
|
47 |
+
4. **News API** evidence:
|
48 |
+
- Retrieval from NewsAPI.org with date filtering
|
49 |
+
- Prioritizes recent articles
|
50 |
+
- Extracts titles, descriptions, and content snippets
|
51 |
+
|
52 |
+
5. **RSS Feed** evidence (`modules/rss_feed.py`):
|
53 |
+
- Parallel retrieval from multiple RSS feeds
|
54 |
+
- Category-specific feeds selection
|
55 |
+
- Relevance and recency scoring
|
56 |
+
|
57 |
+
6. **ClaimReview** evidence:
|
58 |
+
- Google's Fact Check Tools API integration
|
59 |
+
- Retrieves fact-checks from fact-checking organizations
|
60 |
+
- Includes ratings and publisher information
|
61 |
+
|
62 |
+
7. **Scholarly** evidence:
|
63 |
+
- OpenAlex API for academic sources
|
64 |
+
- Extracts titles, abstracts, and publication dates
|
65 |
+
|
66 |
+
8. **Category Fallback** mechanism:
|
67 |
+
- For AI claims, falls back to technology sources if insufficient evidence (for RSS feeds)
|
68 |
+
- For other categories, falls back to default RSS feeds
|
69 |
+
- Ensures robust evidence retrieval across related domains
|
70 |
+
|
71 |
+
### Evidence Preprocessing
|
72 |
+
|
73 |
+
Each evidence item is standardized to a consistent format:
|
74 |
+
```
|
75 |
+
Title: [title], Source: [source], Date: [date], URL: [url], Content: [content snippet]
|
76 |
+
```
|
77 |
+
|
78 |
+
Length limits are applied to reduce token usage:
|
79 |
+
- Content snippets are limited to ~1000 characters
|
80 |
+
- Evidence items are truncated while maintaining context
|
81 |
+
|
82 |
+
## Evidence Analysis and Relevance Ranking
|
83 |
+
|
84 |
+
### Relevance Assessment
|
85 |
+
|
86 |
+
Evidence is analyzed and scored for relevance:
|
87 |
+
|
88 |
+
1. **Component Extraction:**
|
89 |
+
- Extract entities, verbs, and keywords from the claim
|
90 |
+
- Use NLP processing to identify key claim components
|
91 |
+
|
92 |
+
2. **Entity and Verb Matching:**
|
93 |
+
- Match entities from claim to evidence (case-sensitive and case-insensitive)
|
94 |
+
- Match verbs from claim to evidence
|
95 |
+
- Score based on matches (entity matches weighted higher than verb matches)
|
96 |
+
|
97 |
+
3. **Temporal Relevance:**
|
98 |
+
- Detection of temporal indicators in claims
|
99 |
+
- Date-based filtering for time-sensitive claims
|
100 |
+
- Adjusts evidence retrieval window based on claim temporal context
|
101 |
+
|
102 |
+
4. **Scoring Formula:**
|
103 |
+
```
|
104 |
+
final_score = (entity_matches * 3.0) + (verb_matches * 2.0)
|
105 |
+
```
|
106 |
+
If no entity or verb matches, fall back to keyword matching:
|
107 |
+
```
|
108 |
+
final_score = keyword_matches * 1.0
|
109 |
+
```
|
110 |
+
|
111 |
+
### Evidence Selection
|
112 |
+
|
113 |
+
The system selects the most relevant evidence:
|
114 |
+
|
115 |
+
1. **Relevance Sorting:**
|
116 |
+
- Evidence items sorted by relevance score (descending)
|
117 |
+
- Top 10 most relevant items selected
|
118 |
+
|
119 |
+
2. **Handling No Evidence:**
|
120 |
+
- If no evidence is found, a placeholder is returned
|
121 |
+
- Ensures graceful handling of edge cases
|
122 |
+
|
123 |
+
## Truth Classification
|
124 |
+
|
125 |
+
### Evidence Classification (`modules/classification.py`)
|
126 |
+
|
127 |
+
Each evidence item is classified individually:
|
128 |
+
|
129 |
+
1. **LLM Classification:**
|
130 |
+
- Each evidence item is analyzed by an LLM
|
131 |
+
- Classification categories: support, contradict, insufficient
|
132 |
+
- Confidence score (0-100) assigned to each classification
|
133 |
+
- Structured output parsing with fallback mechanisms
|
134 |
+
|
135 |
+
2. **Tense Normalization:**
|
136 |
+
- Normalizes verb tenses in claims to ensure consistent classification
|
137 |
+
- Converts present simple and perfect forms to past tense equivalents
|
138 |
+
- Preserves semantic equivalence across tense variations
|
139 |
+
|
140 |
+
### Verdict Aggregation
|
141 |
+
|
142 |
+
Evidence classifications are aggregated to determine the final verdict:
|
143 |
+
|
144 |
+
1. **Weighted Aggregation:**
|
145 |
+
- 55% weight for count of support/contradict items
|
146 |
+
- 45% weight for quality (confidence) of support/contradict items
|
147 |
+
|
148 |
+
2. **Confidence Calculation:**
|
149 |
+
- Formula: `1.0 - (min_score / max_score)`
|
150 |
+
- Higher confidence for consistent evidence
|
151 |
+
- Lower confidence for mixed or insufficient evidence
|
152 |
+
|
153 |
+
3. **Final Verdict Categories:**
|
154 |
+
- "True (Based on Evidence)"
|
155 |
+
- "False (Based on Evidence)"
|
156 |
+
- "Uncertain"
|
157 |
+
|
158 |
+
## Explanation Generation
|
159 |
+
|
160 |
+
### Explanation Creation (`modules/explanation.py`)
|
161 |
+
|
162 |
+
Human-readable explanations are generated based on the verdict:
|
163 |
+
|
164 |
+
1. **Template Selection:**
|
165 |
+
- Different prompts for true, false, and uncertain verdicts
|
166 |
+
- Special handling for claims containing negation
|
167 |
+
|
168 |
+
2. **Confidence Communication:**
|
169 |
+
- Translation of confidence scores to descriptive language
|
170 |
+
- Clear communication of certainty/uncertainty
|
171 |
+
|
172 |
+
3. **Very Low Confidence Handling:**
|
173 |
+
- Special explanations for verdicts with very low confidence (<10%)
|
174 |
+
- Strong recommendations to verify with authoritative sources
|
175 |
+
|
176 |
+
## Result Presentation
|
177 |
+
|
178 |
+
Results are presented in the Streamlit UI with multiple components:
|
179 |
+
|
180 |
+
1. **Verdict Display:**
|
181 |
+
- Color-coded verdict (green for true, red for false, gray for uncertain)
|
182 |
+
- Confidence percentage
|
183 |
+
- Explanation text
|
184 |
+
|
185 |
+
2. **Evidence Presentation:**
|
186 |
+
- Tabbed interface for different evidence views with URLs if available
|
187 |
+
- Supporting and contradicting evidence tabs
|
188 |
+
- Source distribution summary
|
189 |
+
|
190 |
+
3. **Input Guidance:**
|
191 |
+
- Tips for claim formatting
|
192 |
+
- Guidance for time-sensitive claims
|
193 |
+
- Suggestions for verb tense based on claim age
|
194 |
+
|
195 |
+
4. **Processing Insights:**
|
196 |
+
- Processing time
|
197 |
+
- AI reasoning steps
|
198 |
+
- Source distribution statistics
|
199 |
+
|
200 |
+
## Data Persistence and Privacy
|
201 |
+
|
202 |
+
AskVeracity prioritizes user privacy:
|
203 |
+
|
204 |
+
1. **No Data Storage:**
|
205 |
+
- User claims are not stored persistently
|
206 |
+
- Results are maintained only in session state
|
207 |
+
- No user data is collected or retained
|
208 |
+
|
209 |
+
2. **Session Management:**
|
210 |
+
- Session state in Streamlit manages current user interaction
|
211 |
+
- Session is cleared when starting a new verification
|
212 |
+
|
213 |
+
3. **API Interaction:**
|
214 |
+
- External API calls use their respective privacy policies
|
215 |
+
- OpenAI API usage follows their data handling practices
|
216 |
+
|
217 |
+
4. **Caching:**
|
218 |
+
- Model caching for performance
|
219 |
+
- Resource cleanup on application termination
|
220 |
+
|
221 |
+
## Performance Tracking
|
222 |
+
|
223 |
+
The system includes a performance tracking utility (`utils/performance.py`):
|
224 |
+
|
225 |
+
1. **Metrics Tracked:**
|
226 |
+
- Claims processed count
|
227 |
+
- Evidence retrieval success rates
|
228 |
+
- Processing times
|
229 |
+
- Confidence scores
|
230 |
+
- Source types used
|
231 |
+
- Temporal relevance
|
232 |
+
|
233 |
+
2. **Usage:**
|
234 |
+
- Performance metrics are logged during processing
|
235 |
+
- Summary of select metrics available in the final result
|
236 |
+
- Used for system optimization
|
237 |
+
|
238 |
+
## Performance Evaluation
|
239 |
+
|
240 |
+
The system includes a performance evaluation script (`evaluate_performance.py`):
|
241 |
+
|
242 |
+
1. **Test Claims:**
|
243 |
+
- Predefined set of test claims with known ground truth labels
|
244 |
+
- Claims categorized as "True", "False", or "Uncertain"
|
245 |
+
|
246 |
+
2. **Metrics:**
|
247 |
+
- Overall accuracy: Percentage of claims correctly classified according to ground truth
|
248 |
+
- Safety rate: Percentage of claims either correctly classified or safely categorized as "Uncertain" rather than making an incorrect assertion
|
249 |
+
- Per-class accuracy and safety rates
|
250 |
+
- Average processing time
|
251 |
+
- Average confidence score
|
252 |
+
- Classification distributions
|
253 |
+
|
254 |
+
3. **Visualization:**
|
255 |
+
- Charts for accuracy by classification type
|
256 |
+
- Charts for safety rate by classification type
|
257 |
+
- Processing time by classification type
|
258 |
+
- Confidence scores by classification type
|
259 |
+
|
260 |
+
4. **Results Storage:**
|
261 |
+
- Detailed results saved to JSON file
|
262 |
+
- Visualization charts saved as PNG files
|
263 |
+
- All results stored in the `results/` directory
|
264 |
+
|
265 |
+
## Error Handling and Resilience
|
266 |
+
|
267 |
+
The system implements robust error handling:
|
268 |
+
|
269 |
+
1. **API Error Handling** (`utils/api_utils.py`):
|
270 |
+
- Decorator-based error handling
|
271 |
+
- Exponential backoff for retries
|
272 |
+
- Rate limiting respecting API constraints
|
273 |
+
|
274 |
+
2. **Safe JSON Parsing:**
|
275 |
+
- Defensive parsing of API responses
|
276 |
+
- Fallback mechanisms for invalid responses
|
277 |
+
|
278 |
+
3. **Graceful Degradation:**
|
279 |
+
- Multiple fallback strategies
|
280 |
+
- Core functionality preservation even when some sources fail
|
281 |
+
|
282 |
+
4. **Fallback Mechanisms:**
|
283 |
+
- Fallback for truth classification when classifier is not called
|
284 |
+
- Fallback for explanation generation when explanation generator is not called
|
285 |
+
- Ensures complete results even with partial component failures
|
evaluate_performance.py
ADDED
@@ -0,0 +1,520 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Performance Evaluation Script for AskVeracity.
|
4 |
+
|
5 |
+
This script evaluates the performance of the AskVeracity fact-checking system
|
6 |
+
using a predefined set of test claims with known ground truth labels.
|
7 |
+
It collects metrics on accuracy, safety rate, processing time, and confidence scores
|
8 |
+
without modifying the core codebase.
|
9 |
+
|
10 |
+
Usage:
|
11 |
+
python evaluate_performance.py [--limit N] [--output FILE]
|
12 |
+
|
13 |
+
Options:
|
14 |
+
--limit N Limit evaluation to first N claims (default: all)
|
15 |
+
--output FILE Save results to FILE (default: performance_results.json)
|
16 |
+
"""
|
17 |
+
|
18 |
+
import os
|
19 |
+
import sys
|
20 |
+
import json
|
21 |
+
import time
|
22 |
+
import argparse
|
23 |
+
from datetime import datetime
|
24 |
+
import matplotlib.pyplot as plt
|
25 |
+
from tabulate import tabulate
|
26 |
+
import numpy as np
|
27 |
+
|
28 |
+
# Add the parent directory to sys.path if this script is run directly
|
29 |
+
if __name__ == "__main__":
|
30 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
31 |
+
|
32 |
+
# Import the agent and performance tracker
|
33 |
+
import agent
|
34 |
+
from utils.performance import PerformanceTracker
|
35 |
+
from utils.models import initialize_models
|
36 |
+
|
37 |
+
# IMPORTANT NOTE FOR DEVELOPERS:
|
38 |
+
# The test claims below include many recent events that will become outdated.
|
39 |
+
# When using this script for testing or evaluation, please update these claims
|
40 |
+
# with relevant and up-to-date examples to ensure meaningful results.
|
41 |
+
# Performance metrics are heavily influenced by the recency and verifiability
|
42 |
+
# of these claims, so using outdated claims will likely lead to poor results.
|
43 |
+
|
44 |
+
# Define the test claims with ground truth labels
|
45 |
+
TEST_CLAIMS = [
|
46 |
+
# True claims
|
47 |
+
{"claim": "Dozens killed as gunmen massacre tourists in Kashmir beauty spot.", "expected": "True"},
|
48 |
+
{"claim": "Pope Francis dies at 88.", "expected": "True"},
|
49 |
+
{"claim": "OpenAI released new reasoning models called o3 and o4-mini.", "expected": "True"},
|
50 |
+
{"claim": "Trump And Zelensky Clash Again As US Says Crimea Now Russian Territory.", "expected": "True"},
|
51 |
+
{"claim": "Twelve states sue Donald Trump administration in trade court over chaotic and illegal tariff policy.", "expected": "True"},
|
52 |
+
{"claim": "Zomato has been renamed to Eternal Limited.", "expected": "True"},
|
53 |
+
{"claim": "The Taj Mahal is located in Agra.", "expected": "True"},
|
54 |
+
{"claim": "ISRO achieves second docking with SpaDeX satellites.", "expected": "True"},
|
55 |
+
{"claim": "The TV series Adolescence is streaming on Netflix.", "expected": "True"},
|
56 |
+
{"claim": "Vladimir Putin offers to halt Ukraine invasion.", "expected": "True"},
|
57 |
+
{"claim": "Meta released its Llama 4 language model.", "expected": "True"},
|
58 |
+
{"claim": "Google launched Gemini 2.5 Pro Experimental, the first model in the Gemini 2.5 family.", "expected": "True"},
|
59 |
+
{"claim": "Microsoft is rolling out improved Recall feature for Windows Insiders.", "expected": "True"},
|
60 |
+
{"claim": "Microsoft announced a 1-bit language model that can run on CPU.", "expected": "True"},
|
61 |
+
{"claim": "Royal Challengers Bengaluru beat Rajasthan Royals by 11 runs in yesterday's IPL match.", "expected": "True"},
|
62 |
+
{"claim": "Anthropic introduced Claude Research.", "expected": "True"},
|
63 |
+
{"claim": "The IMF has lowered India's growth projection for the fiscal year 2025-26 to 6.2 per cent.", "expected": "True"},
|
64 |
+
{"claim": "In Bundesliga, Bayern Munich beat Heidenheim 4-0 last week.", "expected": "True"},
|
65 |
+
{"claim": "Manchester United in Europa League semi-finals.", "expected": "True"},
|
66 |
+
|
67 |
+
# False claims
|
68 |
+
{"claim": "The Eiffel Tower is in Rome.", "expected": "False"},
|
69 |
+
{"claim": "The earth is flat.", "expected": "False"},
|
70 |
+
{"claim": "Rishi Sunak is the current Prime Minister of the UK.", "expected": "False"},
|
71 |
+
{"claim": "New Zealand won the ICC Champions Trophy in 2025.", "expected": "False"},
|
72 |
+
{"claim": "US President Donald trump to visit India next week.", "expected": "False"},
|
73 |
+
{"claim": "Quantum computers have definitively solved the protein folding problem.", "expected": "False"},
|
74 |
+
{"claim": "CRISPR gene editing has successfully cured type 1 diabetes in human clinical trials.", "expected": "False"},
|
75 |
+
{"claim": "Google's new quantum computer, Willow, has demonstrated remarkable capabilities by solving mathematical problems far beyond the reach of the fastest supercomputers.", "expected": "False"},
|
76 |
+
{"claim": "NASA confirmed that the James Webb Space Telescope has found definitive evidence of alien life on an exoplanet.", "expected": "False"},
|
77 |
+
{"claim": "Google launched Gemini 3.", "expected": "False"},
|
78 |
+
{"claim": "A solar eclipse was be seen in India on October 17, 2024.", "expected": "False"},
|
79 |
+
{"claim": "Tom Cruise and Shah Rukh Khan have starred in a Bollywood movie in the past.", "expected": "False"},
|
80 |
+
{"claim": "Germany has the highest GDP in the world.", "expected": "False"},
|
81 |
+
|
82 |
+
# Uncertain claims
|
83 |
+
{"claim": "Aliens have visited the Earth.", "expected": "Uncertain"},
|
84 |
+
{"claim": "Information that falls into a black hole is permanently lost or destroyed.", "expected": "Uncertain"},
|
85 |
+
{"claim": "Time travel into the past is possible.", "expected": "Uncertain"},
|
86 |
+
{"claim": "Bigfoot (or Yeti) exists in remote wilderness areas.", "expected": "Uncertain"},
|
87 |
+
{"claim": "Intelligent life exists elsewhere in the universe.", "expected": "Uncertain"},
|
88 |
+
{"claim": "Yogi Adityanath will be the next Prime Minister of India.", "expected": "Uncertain"},
|
89 |
+
{"claim": "Consciousness continues to exist after biological death.", "expected": "Uncertain"},
|
90 |
+
{"claim": "There are multiple parallel universes.", "expected": "Uncertain"}
|
91 |
+
]
|
92 |
+
|
93 |
+
def setup_argument_parser():
|
94 |
+
"""
|
95 |
+
Set up command line argument parsing.
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
argparse.Namespace: Parsed command line arguments
|
99 |
+
"""
|
100 |
+
parser = argparse.ArgumentParser(description="Evaluate AskVeracity performance")
|
101 |
+
parser.add_argument("--limit", type=int, help="Limit evaluation to first N claims")
|
102 |
+
parser.add_argument("--output", type=str, default="performance_results.json",
|
103 |
+
help="Output file for results (default: performance_results.json)")
|
104 |
+
return parser.parse_args()
|
105 |
+
|
106 |
+
def initialize_system():
|
107 |
+
"""
|
108 |
+
Initialize the system for evaluation.
|
109 |
+
|
110 |
+
Returns:
|
111 |
+
object: Initialized LangGraph agent
|
112 |
+
"""
|
113 |
+
print("Initializing models and agent...")
|
114 |
+
initialize_models()
|
115 |
+
eval_agent = agent.setup_agent()
|
116 |
+
return eval_agent
|
117 |
+
|
118 |
+
def normalize_classification(classification):
|
119 |
+
"""
|
120 |
+
Normalize classification labels for consistent comparison.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
classification (str): Classification label from the system
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
str: Normalized classification label ("True", "False", or "Uncertain")
|
127 |
+
"""
|
128 |
+
if not classification:
|
129 |
+
return "Uncertain"
|
130 |
+
|
131 |
+
if "true" in classification.lower():
|
132 |
+
return "True"
|
133 |
+
elif "false" in classification.lower():
|
134 |
+
return "False"
|
135 |
+
else:
|
136 |
+
return "Uncertain"
|
137 |
+
|
138 |
+
def is_correct(actual, expected):
|
139 |
+
"""
|
140 |
+
Determine if the actual classification matches the expected classification.
|
141 |
+
|
142 |
+
Args:
|
143 |
+
actual (str): Actual classification from the system
|
144 |
+
expected (str): Expected (ground truth) classification
|
145 |
+
|
146 |
+
Returns:
|
147 |
+
bool: True if classifications match, False otherwise
|
148 |
+
"""
|
149 |
+
# Normalize both for comparison
|
150 |
+
normalized_actual = normalize_classification(actual)
|
151 |
+
normalized_expected = expected
|
152 |
+
|
153 |
+
return normalized_actual == normalized_expected
|
154 |
+
|
155 |
+
def is_safe(actual, expected):
|
156 |
+
"""
|
157 |
+
Determine if the classification is "safe" - either correct or abstained (Uncertain)
|
158 |
+
instead of making an incorrect assertion.
|
159 |
+
|
160 |
+
Args:
|
161 |
+
actual (str): Actual classification from the system
|
162 |
+
expected (str): Expected (ground truth) classification
|
163 |
+
|
164 |
+
Returns:
|
165 |
+
bool: True if the classification is safe, False otherwise
|
166 |
+
"""
|
167 |
+
# Normalize both for comparison
|
168 |
+
normalized_actual = normalize_classification(actual)
|
169 |
+
normalized_expected = expected
|
170 |
+
|
171 |
+
# If the classification is correct, it's definitely safe
|
172 |
+
if normalized_actual == normalized_expected:
|
173 |
+
return True
|
174 |
+
|
175 |
+
# If the system classified as "Uncertain", that's safe (abstaining rather than wrong assertion)
|
176 |
+
if normalized_actual == "Uncertain":
|
177 |
+
return True
|
178 |
+
|
179 |
+
# Otherwise, the system made an incorrect assertion (False as True or True as False)
|
180 |
+
return False
|
181 |
+
|
182 |
+
def evaluate_claims(test_claims, eval_agent, limit=None):
|
183 |
+
"""
|
184 |
+
Evaluate a list of claims using the fact-checking system.
|
185 |
+
|
186 |
+
Args:
|
187 |
+
test_claims (list): List of test claims with expected classifications
|
188 |
+
eval_agent (object): Initialized LangGraph agent
|
189 |
+
limit (int, optional): Maximum number of claims to evaluate
|
190 |
+
|
191 |
+
Returns:
|
192 |
+
tuple: (results, metrics)
|
193 |
+
- results (list): Detailed results for each claim
|
194 |
+
- metrics (dict): Aggregated performance metrics
|
195 |
+
"""
|
196 |
+
# Initialize performance tracker
|
197 |
+
performance_tracker = PerformanceTracker()
|
198 |
+
|
199 |
+
# Limit the number of claims if requested
|
200 |
+
if limit and limit > 0:
|
201 |
+
claims_to_evaluate = test_claims[:limit]
|
202 |
+
else:
|
203 |
+
claims_to_evaluate = test_claims
|
204 |
+
|
205 |
+
results = []
|
206 |
+
total_count = len(claims_to_evaluate)
|
207 |
+
correct_count = 0
|
208 |
+
safe_count = 0
|
209 |
+
|
210 |
+
# Classification counts
|
211 |
+
classification_counts = {"True": 0, "False": 0, "Uncertain": 0}
|
212 |
+
|
213 |
+
# Track processing times by expected classification
|
214 |
+
processing_times = {"True": [], "False": [], "Uncertain": []}
|
215 |
+
|
216 |
+
# Confidence scores by expected classification
|
217 |
+
confidence_scores = {"True": [], "False": [], "Uncertain": []}
|
218 |
+
|
219 |
+
# Track correct classifications by expected classification
|
220 |
+
correct_by_class = {"True": 0, "False": 0, "Uncertain": 0}
|
221 |
+
safe_by_class = {"True": 0, "False": 0, "Uncertain": 0}
|
222 |
+
total_by_class = {"True": 0, "False": 0, "Uncertain": 0}
|
223 |
+
|
224 |
+
print(f"Evaluating {len(claims_to_evaluate)} claims...")
|
225 |
+
|
226 |
+
# Process each claim
|
227 |
+
for idx, test_case in enumerate(claims_to_evaluate):
|
228 |
+
claim = test_case["claim"]
|
229 |
+
expected = test_case["expected"]
|
230 |
+
|
231 |
+
print(f"\nProcessing claim {idx+1}/{len(claims_to_evaluate)}: {claim}")
|
232 |
+
|
233 |
+
try:
|
234 |
+
# Process the claim and measure time
|
235 |
+
start_time = time.time()
|
236 |
+
result = agent.process_claim(claim, eval_agent)
|
237 |
+
total_time = time.time() - start_time
|
238 |
+
|
239 |
+
# Extract classification and confidence
|
240 |
+
classification = result.get("classification", "Uncertain")
|
241 |
+
confidence = result.get("confidence", 0.0)
|
242 |
+
|
243 |
+
# Normalize classification for comparison
|
244 |
+
normalized_classification = normalize_classification(classification)
|
245 |
+
|
246 |
+
# Check if classification is correct
|
247 |
+
correct = is_correct(normalized_classification, expected)
|
248 |
+
if correct:
|
249 |
+
correct_count += 1
|
250 |
+
correct_by_class[expected] += 1
|
251 |
+
|
252 |
+
# Check if classification is safe
|
253 |
+
safe = is_safe(normalized_classification, expected)
|
254 |
+
if safe:
|
255 |
+
safe_count += 1
|
256 |
+
safe_by_class[expected] += 1
|
257 |
+
|
258 |
+
# Update classification count
|
259 |
+
classification_counts[normalized_classification] = classification_counts.get(normalized_classification, 0) + 1
|
260 |
+
|
261 |
+
# Update counts by expected class
|
262 |
+
total_by_class[expected] += 1
|
263 |
+
|
264 |
+
# Update processing times
|
265 |
+
processing_times[expected].append(total_time)
|
266 |
+
|
267 |
+
# Update confidence scores
|
268 |
+
confidence_scores[expected].append(confidence)
|
269 |
+
|
270 |
+
# Save detailed result
|
271 |
+
detail_result = {
|
272 |
+
"claim": claim,
|
273 |
+
"expected": expected,
|
274 |
+
"actual": normalized_classification,
|
275 |
+
"correct": correct,
|
276 |
+
"safe": safe,
|
277 |
+
"confidence": confidence,
|
278 |
+
"processing_time": total_time
|
279 |
+
}
|
280 |
+
|
281 |
+
results.append(detail_result)
|
282 |
+
|
283 |
+
# Print progress indicator
|
284 |
+
outcome = "✓" if correct else "✗"
|
285 |
+
safety = "(safe)" if safe and not correct else ""
|
286 |
+
print(f" Result: {normalized_classification} (Expected: {expected}) {outcome} {safety}")
|
287 |
+
print(f" Time: {total_time:.2f}s, Confidence: {confidence:.2f}")
|
288 |
+
|
289 |
+
except Exception as e:
|
290 |
+
print(f"Error processing claim: {str(e)}")
|
291 |
+
results.append({
|
292 |
+
"claim": claim,
|
293 |
+
"expected": expected,
|
294 |
+
"error": str(e)
|
295 |
+
})
|
296 |
+
|
297 |
+
# Calculate performance metrics
|
298 |
+
accuracy = correct_count / total_count if total_count > 0 else 0
|
299 |
+
safety_rate = safe_count / total_count if total_count > 0 else 0
|
300 |
+
|
301 |
+
# Calculate per-class metrics
|
302 |
+
class_metrics = {}
|
303 |
+
for cls in ["True", "False", "Uncertain"]:
|
304 |
+
class_accuracy = correct_by_class[cls] / total_by_class[cls] if total_by_class[cls] > 0 else 0
|
305 |
+
class_safety_rate = safe_by_class[cls] / total_by_class[cls] if total_by_class[cls] > 0 else 0
|
306 |
+
avg_time = sum(processing_times[cls]) / len(processing_times[cls]) if processing_times[cls] else 0
|
307 |
+
avg_confidence = sum(confidence_scores[cls]) / len(confidence_scores[cls]) if confidence_scores[cls] else 0
|
308 |
+
|
309 |
+
class_metrics[cls] = {
|
310 |
+
"accuracy": class_accuracy,
|
311 |
+
"safety_rate": class_safety_rate,
|
312 |
+
"count": total_by_class[cls],
|
313 |
+
"correct": correct_by_class[cls],
|
314 |
+
"safe": safe_by_class[cls],
|
315 |
+
"avg_processing_time": avg_time,
|
316 |
+
"avg_confidence": avg_confidence
|
317 |
+
}
|
318 |
+
|
319 |
+
# Calculate overall metrics
|
320 |
+
all_times = [r.get("processing_time", 0) for r in results if "processing_time" in r]
|
321 |
+
all_confidence = [r.get("confidence", 0) for r in results if "confidence" in r]
|
322 |
+
|
323 |
+
metrics = {
|
324 |
+
"total_claims": total_count,
|
325 |
+
"correct_claims": correct_count,
|
326 |
+
"safe_claims": safe_count,
|
327 |
+
"accuracy": accuracy,
|
328 |
+
"safety_rate": safety_rate,
|
329 |
+
"avg_processing_time": sum(all_times) / len(all_times) if all_times else 0,
|
330 |
+
"avg_confidence": sum(all_confidence) / len(all_confidence) if all_confidence else 0,
|
331 |
+
"classification_counts": classification_counts,
|
332 |
+
"per_class_metrics": class_metrics
|
333 |
+
}
|
334 |
+
|
335 |
+
return results, metrics
|
336 |
+
|
337 |
+
def save_results(results, metrics, output_file):
|
338 |
+
"""
|
339 |
+
Save evaluation results to a JSON file.
|
340 |
+
|
341 |
+
Args:
|
342 |
+
results (list): Detailed results for each claim
|
343 |
+
metrics (dict): Aggregated performance metrics
|
344 |
+
output_file (str): Path to output file
|
345 |
+
"""
|
346 |
+
output_data = {
|
347 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
348 |
+
"metrics": metrics,
|
349 |
+
"detailed_results": results
|
350 |
+
}
|
351 |
+
|
352 |
+
with open(output_file, 'w') as f:
|
353 |
+
json.dump(output_data, f, indent=2)
|
354 |
+
|
355 |
+
print(f"\nResults saved to {output_file}")
|
356 |
+
|
357 |
+
def print_summary(metrics):
|
358 |
+
"""
|
359 |
+
Print a summary of performance metrics.
|
360 |
+
|
361 |
+
Args:
|
362 |
+
metrics (dict): Aggregated performance metrics
|
363 |
+
"""
|
364 |
+
print("\n" + "="*70)
|
365 |
+
print(f"PERFORMANCE SUMMARY")
|
366 |
+
print("="*70)
|
367 |
+
|
368 |
+
# Overall metrics
|
369 |
+
print(f"\nOverall Metrics:")
|
370 |
+
print(f"Total Claims: {metrics['total_claims']}")
|
371 |
+
print(f"Correctly Classified: {metrics['correct_claims']}")
|
372 |
+
print(f"Safely Classified: {metrics['safe_claims']}")
|
373 |
+
print(f"Accuracy: {metrics['accuracy']:.2%}")
|
374 |
+
print(f"Safety Rate: {metrics['safety_rate']:.2%}")
|
375 |
+
print(f"Average Processing Time: {metrics['avg_processing_time']:.2f} seconds")
|
376 |
+
print(f"Average Confidence Score: {metrics['avg_confidence']:.2f}")
|
377 |
+
|
378 |
+
# Per-class metrics as table
|
379 |
+
print("\nPer-Class Performance:")
|
380 |
+
table_data = []
|
381 |
+
headers = ["Class", "Count", "Correct", "Safe", "Accuracy", "Safety Rate", "Avg Time", "Avg Confidence"]
|
382 |
+
|
383 |
+
for cls, cls_metrics in metrics['per_class_metrics'].items():
|
384 |
+
table_data.append([
|
385 |
+
cls,
|
386 |
+
cls_metrics['count'],
|
387 |
+
cls_metrics['correct'],
|
388 |
+
cls_metrics['safe'],
|
389 |
+
f"{cls_metrics['accuracy']:.2%}",
|
390 |
+
f"{cls_metrics['safety_rate']:.2%}",
|
391 |
+
f"{cls_metrics['avg_processing_time']:.2f}s",
|
392 |
+
f"{cls_metrics['avg_confidence']:.2f}"
|
393 |
+
])
|
394 |
+
|
395 |
+
print(tabulate(table_data, headers=headers, tablefmt="grid"))
|
396 |
+
|
397 |
+
def create_charts(metrics, output_dir="."):
|
398 |
+
"""
|
399 |
+
Create visualizations of performance metrics.
|
400 |
+
|
401 |
+
Args:
|
402 |
+
metrics (dict): Aggregated performance metrics
|
403 |
+
output_dir (str): Directory to save charts
|
404 |
+
"""
|
405 |
+
try:
|
406 |
+
# Create output directory if it doesn't exist
|
407 |
+
os.makedirs(output_dir, exist_ok=True)
|
408 |
+
|
409 |
+
# Plot 1: Accuracy by class
|
410 |
+
plt.figure(figsize=(10, 6))
|
411 |
+
classes = list(metrics['per_class_metrics'].keys())
|
412 |
+
accuracies = [metrics['per_class_metrics'][cls]['accuracy'] for cls in classes]
|
413 |
+
|
414 |
+
plt.bar(classes, accuracies, color=['green', 'red', 'gray'])
|
415 |
+
plt.title('Accuracy by Classification Type')
|
416 |
+
plt.xlabel('Classification')
|
417 |
+
plt.ylabel('Accuracy')
|
418 |
+
plt.ylim(0, 1)
|
419 |
+
|
420 |
+
for i, v in enumerate(accuracies):
|
421 |
+
plt.text(i, v + 0.02, f"{v:.2%}", ha='center')
|
422 |
+
|
423 |
+
plt.tight_layout()
|
424 |
+
plt.savefig(os.path.join(output_dir, 'accuracy_by_class.png'))
|
425 |
+
plt.close() # Close the figure to free memory
|
426 |
+
|
427 |
+
# Plot 2: Safety Rate by class
|
428 |
+
plt.figure(figsize=(10, 6))
|
429 |
+
safety_rates = [metrics['per_class_metrics'][cls]['safety_rate'] for cls in classes]
|
430 |
+
|
431 |
+
plt.bar(classes, safety_rates, color=['green', 'red', 'gray'])
|
432 |
+
plt.title('Safety Rate by Classification Type')
|
433 |
+
plt.xlabel('Classification')
|
434 |
+
plt.ylabel('Safety Rate')
|
435 |
+
plt.ylim(0, 1)
|
436 |
+
|
437 |
+
for i, v in enumerate(safety_rates):
|
438 |
+
plt.text(i, v + 0.02, f"{v:.2%}", ha='center')
|
439 |
+
|
440 |
+
plt.tight_layout()
|
441 |
+
plt.savefig(os.path.join(output_dir, 'safety_rate_by_class.png'))
|
442 |
+
plt.close() # Close the figure to free memory
|
443 |
+
|
444 |
+
# Plot 3: Processing time by class
|
445 |
+
plt.figure(figsize=(10, 6))
|
446 |
+
times = [metrics['per_class_metrics'][cls]['avg_processing_time'] for cls in classes]
|
447 |
+
|
448 |
+
plt.bar(classes, times, color=['green', 'red', 'gray'])
|
449 |
+
plt.title('Average Processing Time by Classification Type')
|
450 |
+
plt.xlabel('Classification')
|
451 |
+
plt.ylabel('Time (seconds)')
|
452 |
+
|
453 |
+
for i, v in enumerate(times):
|
454 |
+
plt.text(i, v + 0.5, f"{v:.2f}s", ha='center')
|
455 |
+
|
456 |
+
plt.tight_layout()
|
457 |
+
plt.savefig(os.path.join(output_dir, 'processing_time_by_class.png'))
|
458 |
+
plt.close() # Close the figure to free memory
|
459 |
+
|
460 |
+
# Plot 4: Confidence scores by class
|
461 |
+
plt.figure(figsize=(10, 6))
|
462 |
+
confidence = [metrics['per_class_metrics'][cls]['avg_confidence'] for cls in classes]
|
463 |
+
|
464 |
+
plt.bar(classes, confidence, color=['green', 'red', 'gray'])
|
465 |
+
plt.title('Average Confidence Score by Classification Type')
|
466 |
+
plt.xlabel('Classification')
|
467 |
+
plt.ylabel('Confidence Score')
|
468 |
+
plt.ylim(0, 1)
|
469 |
+
|
470 |
+
for i, v in enumerate(confidence):
|
471 |
+
plt.text(i, v + 0.02, f"{v:.2f}", ha='center')
|
472 |
+
|
473 |
+
plt.tight_layout()
|
474 |
+
plt.savefig(os.path.join(output_dir, 'confidence_by_class.png'))
|
475 |
+
plt.close() # Close the figure to free memory
|
476 |
+
|
477 |
+
print(f"\nCharts created in {output_dir}")
|
478 |
+
|
479 |
+
except Exception as e:
|
480 |
+
print(f"Error creating charts: {str(e)}")
|
481 |
+
print("Continuing without charts.")
|
482 |
+
|
483 |
+
def main():
|
484 |
+
"""Main evaluation function that runs the entire evaluation process."""
|
485 |
+
# Parse arguments
|
486 |
+
args = setup_argument_parser()
|
487 |
+
|
488 |
+
# Initialize the agent
|
489 |
+
eval_agent = initialize_system()
|
490 |
+
|
491 |
+
# Create results directory if it doesn't exist
|
492 |
+
results_dir = "results"
|
493 |
+
os.makedirs(results_dir, exist_ok=True)
|
494 |
+
|
495 |
+
# Set output file path
|
496 |
+
output_file = args.output
|
497 |
+
if not os.path.isabs(output_file):
|
498 |
+
output_file = os.path.join(results_dir, output_file)
|
499 |
+
|
500 |
+
# Evaluate claims
|
501 |
+
results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, args.limit)
|
502 |
+
# results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, 1)
|
503 |
+
|
504 |
+
# Print summary
|
505 |
+
print_summary(metrics)
|
506 |
+
|
507 |
+
# Save results
|
508 |
+
save_results(results, metrics, output_file)
|
509 |
+
|
510 |
+
# Create charts
|
511 |
+
try:
|
512 |
+
from tabulate import tabulate
|
513 |
+
import matplotlib.pyplot as plt
|
514 |
+
create_charts(metrics, results_dir)
|
515 |
+
except ImportError:
|
516 |
+
print("\nCould not create charts. Please install matplotlib and tabulate packages:")
|
517 |
+
print("pip install matplotlib tabulate")
|
518 |
+
|
519 |
+
if __name__ == "__main__":
|
520 |
+
main()
|
modules/__init__.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Modules package initialization.
|
3 |
+
|
4 |
+
This package contains the core modules for the AskVeracity fact-checking system.
|
5 |
+
"""
|
6 |
+
|
7 |
+
from .claim_extraction import extract_claims, shorten_claim_for_evidence
|
8 |
+
from .evidence_retrieval import retrieve_combined_evidence
|
9 |
+
from .classification import classify_with_llm, aggregate_evidence
|
10 |
+
from .explanation import generate_explanation
|
11 |
+
|
12 |
+
__all__ = [
|
13 |
+
'extract_claims',
|
14 |
+
'shorten_claim_for_evidence',
|
15 |
+
'retrieve_combined_evidence',
|
16 |
+
'classify_with_llm',
|
17 |
+
'aggregate_evidence',
|
18 |
+
'generate_explanation'
|
19 |
+
]
|
modules/category_detection.py
ADDED
@@ -0,0 +1,716 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
+
from typing import Tuple, List, Dict, Optional
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
|
7 |
+
# Set up logging
|
8 |
+
logger = logging.getLogger("misinformation_detector")
|
9 |
+
|
10 |
+
# Define categories and their keywords
|
11 |
+
CLAIM_CATEGORIES = {
|
12 |
+
"ai": [
|
13 |
+
# General AI terms
|
14 |
+
"AI", "artificial intelligence", "machine learning", "ML", "deep learning", "DL",
|
15 |
+
"neural network", "neural nets", "generative AI", "GenAI", "AGI", "artificial general intelligence",
|
16 |
+
"transformer", "attention mechanism", "fine-tuning", "pre-training", "training", "inference",
|
17 |
+
|
18 |
+
# AI Models and Architectures
|
19 |
+
"language model", "large language model", "LLM", "foundation model", "multimodal model",
|
20 |
+
"vision language model", "VLM", "text-to-speech", "TTS", "speech-to-text", "STT",
|
21 |
+
"text-to-image", "image-to-text", "diffusion model", "generative model", "discriminative model",
|
22 |
+
"GPT", "BERT", "T5", "PaLM", "Claude", "Llama", "Gemini", "Mistral", "Mixtral", "Stable Diffusion",
|
23 |
+
"Dall-E", "Midjourney", "Sora", "transformer", "MoE", "mixture of experts", "sparse model",
|
24 |
+
"dense model", "encoder", "decoder", "encoder-decoder", "autoencoder", "VAE",
|
25 |
+
"mixture of experts", "MoE", "sparse MoE", "switch transformer", "gated experts",
|
26 |
+
"routing network", "expert routing", "pathways", "multi-query attention", "multi-head attention",
|
27 |
+
"rotary position embedding", "RoPE", "grouped-query attention", "GQA", "flash attention",
|
28 |
+
"state space model", "SSM", "mamba", "recurrent neural network", "RNN", "LSTM", "GRU",
|
29 |
+
"convolutional neural network", "CNN", "residual connection", "skip connection", "normalization",
|
30 |
+
"layer norm", "group norm", "batch norm", "parameter efficient fine-tuning", "PEFT",
|
31 |
+
"LoRA", "low-rank adaptation", "QLoRA", "adapters", "prompt tuning", "prefix tuning",
|
32 |
+
|
33 |
+
# AI Learning Paradigms
|
34 |
+
"supervised learning", "unsupervised learning", "reinforcement learning", "RL",
|
35 |
+
"meta-learning", "transfer learning", "federated learning", "self-supervised learning",
|
36 |
+
"semi-supervised learning", "few-shot learning", "zero-shot learning", "one-shot learning",
|
37 |
+
"contrastive learning", "curriculum learning", "imitation learning", "active learning",
|
38 |
+
"reinforcement learning from human feedback", "RLHF", "direct preference optimization", "DPO",
|
39 |
+
"constitutional AI", "red teaming", "adversarial training", "GAN", "generative adversarial network",
|
40 |
+
"diffusion", "latent diffusion", "flow-based model", "variational autoencoder", "VAE",
|
41 |
+
|
42 |
+
# AI Capabilities and Applications
|
43 |
+
"natural language processing", "NLP", "computer vision", "CV", "speech recognition",
|
44 |
+
"text generation", "image generation", "video generation", "multimodal", "multi-modal",
|
45 |
+
"recommendation system", "recommender system", "chatbot", "conversational AI",
|
46 |
+
"sentiment analysis", "entity recognition", "semantic search", "vector search", "embedding",
|
47 |
+
"classification", "regression", "clustering", "anomaly detection", "agent", "AI agent",
|
48 |
+
"autonomous agent", "agentic", "RAG", "retrieval augmented generation", "tool use",
|
49 |
+
"function calling", "reasoning", "chain-of-thought", "CoT", "tree-of-thought", "ToT",
|
50 |
+
"planning", "decision making", "multi-agent", "agent swarm", "multi-agent simulation",
|
51 |
+
|
52 |
+
# AI Technical Terms
|
53 |
+
"token", "tokenizer", "tokenization", "embedding", "vector", "prompt", "prompt engineering",
|
54 |
+
"context window", "parameter", "weights", "bias", "activation function", "loss function",
|
55 |
+
"gradient descent", "backpropagation", "epoch", "batch", "mini-batch", "regularization",
|
56 |
+
"dropout", "overfitting", "underfitting", "hyperparameter", "latent space", "latent variable",
|
57 |
+
"feature extraction", "dimensionality reduction", "quantization", "pruning",
|
58 |
+
"fine-tuning", "transfer learning", "knowledge distillation", "int4", "int8", "bfloat16",
|
59 |
+
"float16", "mixed precision", "GPTQ", "AWQ", "GGUF", "GGML", "KV cache", "speculative decoding",
|
60 |
+
"beam search", "greedy decoding", "temperature", "top-k", "top-p", "nucleus sampling",
|
61 |
+
|
62 |
+
# AI Tools and Frameworks
|
63 |
+
"TensorFlow", "PyTorch", "JAX", "Keras", "Hugging Face", "Transformers", "Diffusers",
|
64 |
+
"LangChain", "Llama Index", "OpenAI", "Anthropic", "NVIDIA", "GPU", "TPU", "IPU", "NPU", "CUDA",
|
65 |
+
"MLOps", "model monitoring", "model deployment", "model serving", "inference endpoint",
|
66 |
+
"vLLM", "TGI", "text generation inference", "triton", "onnx", "tensorRT",
|
67 |
+
|
68 |
+
# AI Ethics and Concerns
|
69 |
+
"AI ethics", "responsible AI", "AI safety", "AI alignment", "AI governance",
|
70 |
+
"bias", "fairness", "interpretability", "explainability", "XAI", "transparency",
|
71 |
+
"hallucination", "toxicity", "safe deployment", "AI risk", "AI capabilities",
|
72 |
+
"alignment tax", "red teaming", "jailbreak", "prompt injection", "data poisoning",
|
73 |
+
|
74 |
+
# AI Companies and Organizations
|
75 |
+
"OpenAI", "Anthropic", "Google DeepMind", "Meta AI", "Microsoft", "NVIDIA",
|
76 |
+
"Hugging Face", "Mistral AI", "Cohere", "AI21 Labs", "Stability AI", "Midjourney",
|
77 |
+
"EleutherAI", "Allen AI", "DeepMind", "Character AI", "Inflection AI", "xAI"
|
78 |
+
],
|
79 |
+
|
80 |
+
"science": [
|
81 |
+
# General scientific terms
|
82 |
+
"study", "research", "scientist", "scientific", "discovered", "experiment",
|
83 |
+
"laboratory", "clinical", "trial", "hypothesis", "theory", "evidence-based",
|
84 |
+
"peer-reviewed", "journal", "publication", "finding", "breakthrough", "innovation",
|
85 |
+
"discovery", "analysis", "measurement", "observation", "empirical",
|
86 |
+
|
87 |
+
# Biology and medicine
|
88 |
+
"biology", "chemistry", "physics", "genetics", "genomics", "DNA", "RNA",
|
89 |
+
"medicine", "gene", "protein", "molecule", "cell", "brain", "neuro",
|
90 |
+
"cancer", "disease", "cure", "treatment", "vaccine", "health", "medical",
|
91 |
+
"pharmaceutical", "drug", "therapy", "symptom", "diagnosis", "prognosis",
|
92 |
+
"patient", "doctor", "hospital", "clinic", "surgery", "immune", "antibody",
|
93 |
+
"virus", "bacteria", "pathogen", "infection", "epidemic", "pandemic",
|
94 |
+
"organism", "evolution", "mutation", "chromosome", "enzyme", "hormone",
|
95 |
+
|
96 |
+
# Physics and astronomy
|
97 |
+
"quantum", "particle", "atom", "nuclear", "electron", "neutron", "proton",
|
98 |
+
"atomic", "subatomic", "molecular", "energy", "matter", "mass", "force",
|
99 |
+
"space", "NASA", "telescope", "planet", "exoplanet", "moon", "lunar", "mars",
|
100 |
+
"star", "galaxy", "cosmic", "astronomical", "universe", "solar", "celestial",
|
101 |
+
"orbit", "gravitational", "gravity", "relativity", "quantum mechanics",
|
102 |
+
"string theory", "dark matter", "dark energy", "black hole", "supernova",
|
103 |
+
"radiation", "radioactive", "isotope", "fission", "fusion", "accelerator",
|
104 |
+
|
105 |
+
# Environmental science
|
106 |
+
"climate", "carbon", "environment", "ecosystem", "species", "extinct",
|
107 |
+
"endangered", "biodiversity", "conservation", "sustainable", "renewable",
|
108 |
+
"fossil fuel", "greenhouse", "global warming", "polar", "ice cap", "glacier",
|
109 |
+
"ozone", "atmosphere", "weather", "meteorology", "geology", "earthquake",
|
110 |
+
"volcanic", "ocean", "marine", "coral reef", "deforestation", "pollution",
|
111 |
+
|
112 |
+
# Math and computer science (non-AI specific)
|
113 |
+
"equation", "formula", "theorem", "calculus", "statistical", "probability",
|
114 |
+
"variable", "matrix", "optimization",
|
115 |
+
|
116 |
+
# Organizations
|
117 |
+
"CERN", "NIH", "CDC", "WHO", "NOAA", "ESA", "SpaceX", "Blue Origin", "JPL",
|
118 |
+
"laboratory", "institute", "university", "academic", "faculty", "professor",
|
119 |
+
|
120 |
+
# Science tools
|
121 |
+
"Matlab", "SPSS", "SAS", "ImageJ", "LabVIEW", "ANSYS", "Cadence", "Origin",
|
122 |
+
"Avogadro", "ChemDraw", "Mathematica", "Wolfram Alpha", "COMSOL", "LAMMPS",
|
123 |
+
"VASP", "Gaussian", "GIS", "ArcGIS", "QGIS", "Maple", "R Studio"
|
124 |
+
],
|
125 |
+
|
126 |
+
"technology": [
|
127 |
+
# General tech terms
|
128 |
+
"computer", "hardware", "internet", "cyber", "digital", "tech",
|
129 |
+
"robot", "automation", "autonomous", "code", "programming", "data", "cloud",
|
130 |
+
"server", "network", "encryption", "blockchain", "crypto", "bitcoin", "ethereum",
|
131 |
+
"technology", "breakthrough", "prototype", "dataset",
|
132 |
+
"engineering", "technical", "specification", "feature", "functionality",
|
133 |
+
"interface", "system", "infrastructure", "integration", "implementation",
|
134 |
+
|
135 |
+
# Devices and hardware
|
136 |
+
"smartphone", "device", "gadget", "laptop", "desktop", "tablet", "wearable",
|
137 |
+
"smartwatch", "IoT", "internet of things", "sensor", "chip", "semiconductor",
|
138 |
+
"processor", "CPU", "GPU", "memory", "RAM", "storage", "hard drive", "SSD",
|
139 |
+
"electronic", "circuit", "motherboard", "component", "peripheral", "accessory",
|
140 |
+
"display", "screen", "touchscreen", "camera", "lens", "microphone", "speaker",
|
141 |
+
"battery", "charger", "wireless", "bluetooth", "WiFi", "router", "modem",
|
142 |
+
|
143 |
+
# Software and internet
|
144 |
+
"algorithm", "app", "application", "platform", "website", "online", "web", "browser",
|
145 |
+
"operating system", "Windows", "macOS", "Linux", "Android", "iOS", "software",
|
146 |
+
"program", "code", "coding", "development", "framework", "library", "API",
|
147 |
+
"backend", "frontend", "full-stack", "developer", "programmer", "function",
|
148 |
+
"database", "SQL", "NoSQL", "cloud computing", "SaaS", "PaaS", "IaaS",
|
149 |
+
"DevOps", "agile", "scrum", "sprint", "version control", "git", "repository",
|
150 |
+
|
151 |
+
# Communications and networking
|
152 |
+
"5G", "6G", "broadband", "fiber", "network", "wireless", "cellular", "mobile",
|
153 |
+
"telecommunications", "telecom", "transmission", "bandwidth", "latency",
|
154 |
+
"protocol", "IP address", "DNS", "server", "hosting", "data center",
|
155 |
+
|
156 |
+
# Company and product names
|
157 |
+
"Apple", "Google", "Microsoft", "Amazon", "Facebook", "Meta", "Tesla",
|
158 |
+
"IBM", "Intel", "AMD", "Nvidia", "Qualcomm", "Cisco", "Oracle", "SAP",
|
159 |
+
"Huawei", "Samsung", "Sony", "LG", "Dell", "HP", "Lenovo", "Xiaomi",
|
160 |
+
"iPhone", "iPad", "MacBook", "Surface", "Galaxy", "Pixel", "Windows",
|
161 |
+
"Android", "iOS", "Chrome", "Firefox", "Edge", "Safari", "Office",
|
162 |
+
"Azure", "AWS", "Google Cloud", "Gmail", "Outlook", "Teams", "Zoom",
|
163 |
+
|
164 |
+
# Advanced technologies
|
165 |
+
"VR", "AR", "XR", "virtual reality", "augmented reality", "mixed reality",
|
166 |
+
"metaverse", "3D printing", "additive manufacturing", "quantum computing",
|
167 |
+
"nanotechnology", "biotechnology", "electric vehicle", "self-driving",
|
168 |
+
"autonomous vehicle", "drone", "UAV", "robotics", "cybersecurity",
|
169 |
+
|
170 |
+
# Social media
|
171 |
+
"social media", "social network", "Facebook", "Instagram", "Twitter", "X",
|
172 |
+
"LinkedIn", "TikTok", "Snapchat", "YouTube", "Pinterest", "Reddit",
|
173 |
+
"streaming", "content creator", "influencer", "follower", "like", "share",
|
174 |
+
"post", "tweet", "user-generated", "viral", "trending", "engagement",
|
175 |
+
|
176 |
+
# Technology tools
|
177 |
+
"NumPy", "Pandas", "Matplotlib", "Seaborn", "Scikit-learn", "Jupyter",
|
178 |
+
"Visual Studio", "VS Code", "IntelliJ", "PyCharm", "Eclipse", "Android Studio",
|
179 |
+
"Xcode", "Docker", "Kubernetes", "Jenkins", "Ansible", "Terraform", "Vagrant",
|
180 |
+
"AWS CLI", "Azure CLI", "GCP CLI", "PowerShell", "Bash", "npm", "pip", "conda",
|
181 |
+
"React", "Angular", "Vue.js", "Node.js", "Django", "Flask", "Spring", "Laravel",
|
182 |
+
"PostgreSQL", "MySQL", "MongoDB", "Redis", "Elasticsearch", "Kafka", "RabbitMQ",
|
183 |
+
|
184 |
+
# Optimization terms
|
185 |
+
"efficiency", "performance tuning", "benchmarking", "profiling",
|
186 |
+
"refactoring", "scaling", "bottleneck", "throughput", "latency reduction",
|
187 |
+
"response time", "caching", "load balancing", "distributed computing",
|
188 |
+
"parallel processing", "concurrency", "asynchronous", "memory management"
|
189 |
+
],
|
190 |
+
|
191 |
+
"politics": [
|
192 |
+
# Government structure
|
193 |
+
"president", "prime minister", "government", "parliament", "congress",
|
194 |
+
"senate", "house", "representative", "minister", "secretary", "cabinet",
|
195 |
+
"administration", "mayor", "governor", "politician", "official", "authority",
|
196 |
+
"federal", "state", "local", "municipal", "county", "city", "town",
|
197 |
+
"constituency", "district", "precinct", "ward", "judiciary", "executive",
|
198 |
+
"legislative", "branch", "checks and balances", "separation of powers",
|
199 |
+
|
200 |
+
# Political activities
|
201 |
+
"election", "campaign", "vote", "voter", "ballot", "polling",
|
202 |
+
"political", "politics", "debate", "speech", "address", "press conference",
|
203 |
+
"approval rating", "opinion poll", "candidate", "incumbent", "challenger",
|
204 |
+
"primary", "caucus", "convention", "delegate", "nomination", "campaign trail",
|
205 |
+
"fundraising", "lobbying", "advocacy", "activism", "protest", "demonstration",
|
206 |
+
|
207 |
+
# Political ideologies
|
208 |
+
"democracy", "democratic", "republican", "conservative", "liberal",
|
209 |
+
"progressive", "left-wing", "right-wing", "centrist", "moderate",
|
210 |
+
"socialist", "capitalist", "communist", "libertarian", "populist",
|
211 |
+
"nationalist", "globalist", "isolationist", "hawk", "dove",
|
212 |
+
"ideology", "partisan", "bipartisan", "coalition", "majority", "minority",
|
213 |
+
|
214 |
+
# Laws and regulations
|
215 |
+
"bill", "law", "legislation", "regulation", "policy", "statute", "code",
|
216 |
+
"amendment", "reform", "repeal", "enact", "implement", "enforce",
|
217 |
+
"constitutional", "unconstitutional", "legal", "illegal", "legalize",
|
218 |
+
"criminalize", "deregulate", "regulatory", "compliance", "mandate",
|
219 |
+
|
220 |
+
# Judicial and legal
|
221 |
+
"court", "supreme", "justice", "judge", "ruling", "decision", "opinion",
|
222 |
+
"case", "lawsuit", "litigation", "plaintiff", "defendant", "prosecutor",
|
223 |
+
"attorney", "lawyer", "advocate", "judicial review", "precedent",
|
224 |
+
"constitution", "amendment", "rights", "civil rights", "human rights",
|
225 |
+
|
226 |
+
# International relations
|
227 |
+
"treaty", "diplomatic", "diplomacy", "relations",
|
228 |
+
"foreign policy", "domestic policy", "UN", "NATO", "EU", "United Nations",
|
229 |
+
"sanctions", "embargo", "tariff", "trade war", "diplomat", "embassy",
|
230 |
+
"consulate", "ambassador", "delegation", "summit", "bilateral", "multilateral",
|
231 |
+
"alliance", "ally", "adversary", "geopolitical", "sovereignty", "regime",
|
232 |
+
|
233 |
+
# Security and defense
|
234 |
+
"national security", "homeland security", "defense", "military", "armed forces",
|
235 |
+
"army", "navy", "air force", "marines", "coast guard", "intelligence",
|
236 |
+
"CIA", "FBI", "NSA", "Pentagon", "war", "conflict", "peacekeeping",
|
237 |
+
"terrorism", "counterterrorism", "insurgency", "nuclear weapon", "missile",
|
238 |
+
"disarmament", "nonproliferation", "surveillance", "espionage",
|
239 |
+
|
240 |
+
# Political institutions
|
241 |
+
"White House", "Kremlin", "Downing Street", "Capitol Hill", "Westminster",
|
242 |
+
"United Nations", "European Union", "NATO", "World Bank", "IMF", "WTO",
|
243 |
+
"ASEAN", "African Union", "BRICS", "G7", "G20",
|
244 |
+
|
245 |
+
# Political parties and movements
|
246 |
+
"Democrat", "Republican", "Labour", "Conservative", "Green Party",
|
247 |
+
"Socialist", "Communist", "Libertarian", "Independent", "Tea Party",
|
248 |
+
"progressive movement", "civil rights movement", "womens rights",
|
249 |
+
"LGBTQ rights", "Black Lives Matter", "environmental movement"
|
250 |
+
],
|
251 |
+
|
252 |
+
"business": [
|
253 |
+
# Companies and organization types
|
254 |
+
"company", "corporation", "business", "startup", "firm", "enterprise",
|
255 |
+
"corporate", "industry", "sector", "conglomerate", "multinational",
|
256 |
+
"organization", "entity", "private", "public", "incorporated", "LLC",
|
257 |
+
"partnership", "proprietorship", "franchise", "subsidiary", "parent company",
|
258 |
+
"headquarters", "office", "facility", "plant", "factory", "warehouse",
|
259 |
+
"retail", "wholesale", "ecommerce", "brick-and-mortar", "chain", "outlet",
|
260 |
+
|
261 |
+
# Business roles and management
|
262 |
+
"executive", "CEO", "CFO", "CTO", "COO", "CMO", "CIO", "CHRO", "chief",
|
263 |
+
"director", "board", "chairman", "chairwoman", "chairperson", "president",
|
264 |
+
"vice president", "senior", "junior", "manager", "management", "supervisor",
|
265 |
+
"founder", "entrepreneur", "owner", "shareholder", "stakeholder",
|
266 |
+
"employee", "staff", "workforce", "personnel", "human resources", "HR",
|
267 |
+
"recruit", "hire", "layoff", "downsizing", "restructuring", "reorganization",
|
268 |
+
"leadership",
|
269 |
+
|
270 |
+
# Financial terms
|
271 |
+
"profit", "revenue", "sales", "income", "earnings", "EBITDA", "turnover",
|
272 |
+
"loss", "deficit", "expense", "cost", "overhead", "margin", "markup",
|
273 |
+
"budget", "forecast", "projection", "estimate", "actual", "variance",
|
274 |
+
"balance sheet", "income statement", "cash flow", "P&L", "liquidity",
|
275 |
+
"solvency", "asset", "liability", "equity", "debt", "leverage", "capital",
|
276 |
+
"working capital", "cash", "funds", "money", "payment", "transaction",
|
277 |
+
|
278 |
+
# Markets and trading
|
279 |
+
"market", "stock", "share", "bond", "security", "commodity", "futures",
|
280 |
+
"option", "derivative", "forex", "foreign exchange", "currency", "crypto",
|
281 |
+
"trader", "trading", "buy", "sell", "long", "short", "position", "portfolio",
|
282 |
+
"diversification", "hedge", "risk", "return", "yield", "dividend", "interest",
|
283 |
+
"bull market", "bear market", "correction", "crash", "rally", "volatile",
|
284 |
+
"volatility", "index", "benchmark", "Dow Jones", "NASDAQ", "S&P 500", "NYSE",
|
285 |
+
|
286 |
+
# Investment and funding
|
287 |
+
"investor", "investment", "fund", "mutual fund", "ETF", "hedge fund",
|
288 |
+
"private equity", "venture", "venture capital", "VC", "angel investor",
|
289 |
+
"seed", "Series A", "Series B", "Series C", "funding", "financing",
|
290 |
+
"loan", "credit", "debt", "equity", "fundraising", "crowdfunding",
|
291 |
+
"IPO", "initial public offering", "going public", "listed", "delisted",
|
292 |
+
"merger", "acquisition", "M&A", "takeover", "buyout", "divestiture",
|
293 |
+
"valuation", "billion", "million", "trillion", "unicorn", "decacorn",
|
294 |
+
|
295 |
+
# Economic terms
|
296 |
+
"economy", "economic", "economics", "macro", "micro", "fiscal", "monetary",
|
297 |
+
"supply", "demand", "market forces", "competition", "competitive", "monopoly",
|
298 |
+
"oligopoly", "antitrust", "deregulation", "growth", "decline",
|
299 |
+
"recession", "depression", "recovery", "expansion", "contraction", "cycle",
|
300 |
+
"inflation", "deflation", "stagflation", "hyperinflation", "CPI", "price",
|
301 |
+
"GDP", "gross domestic product", "GNP", "productivity", "output", "input",
|
302 |
+
|
303 |
+
# Banking and finance
|
304 |
+
"finance", "financial", "bank", "banking", "commercial bank", "investment bank",
|
305 |
+
"central bank", "Federal Reserve", "Fed", "ECB", "Bank of England", "BOJ",
|
306 |
+
"interest rate", "prime rate", "discount rate", "basis point", "monetary policy",
|
307 |
+
"quantitative easing", "tightening", "loosening", "credit", "lending",
|
308 |
+
"borrowing", "loan", "mortgage", "consumer credit", "credit card", "debit card",
|
309 |
+
"checking", "savings", "deposit", "withdrawal", "ATM", "branch", "online banking",
|
310 |
+
|
311 |
+
# Currencies and payments
|
312 |
+
"dollar", "euro", "pound", "yen", "yuan", "rupee", "ruble", "real", "peso",
|
313 |
+
"currency", "money", "fiat", "exchange rate", "remittance", "transfer",
|
314 |
+
"payment", "transaction", "wire", "ACH", "SWIFT", "clearing", "settlement",
|
315 |
+
"cryptocurrency", "bitcoin", "ethereum", "blockchain", "fintech", "paytech",
|
316 |
+
|
317 |
+
# Business operations
|
318 |
+
"product", "service", "solution", "offering", "launch", "rollout", "release",
|
319 |
+
"operation", "production", "manufacturing", "supply chain", "logistics",
|
320 |
+
"procurement", "inventory", "distribution", "shipping", "delivery",
|
321 |
+
"quality", "control", "assurance", "standard", "certification",
|
322 |
+
|
323 |
+
# Marketing and sales
|
324 |
+
"marketing", "advertise", "advertising", "campaign", "promotion", "publicity",
|
325 |
+
"PR", "public relations", "brand", "branding", "identity", "image", "reputation",
|
326 |
+
"sales", "selling", "deal", "transaction", "pipeline", "lead", "prospect",
|
327 |
+
"customer", "client", "consumer", "buyer", "purchaser", "target market",
|
328 |
+
"segment", "demographic", "psychographic", "B2B", "B2C", "retail", "wholesale",
|
329 |
+
"price", "pricing", "discount", "premium", "luxury", "value", "bargain"
|
330 |
+
],
|
331 |
+
|
332 |
+
"world": [
|
333 |
+
# General international terms
|
334 |
+
"country", "nation", "state", "republic", "kingdom", "global", "international",
|
335 |
+
"foreign", "world", "worldwide", "domestic", "abroad", "overseas",
|
336 |
+
"developed", "developing", "industrialized", "emerging", "third world",
|
337 |
+
"global south", "global north", "east", "west", "western", "eastern",
|
338 |
+
"bilateral", "multilateral", "transnational", "multinational", "sovereignty",
|
339 |
+
|
340 |
+
# Regions and continents
|
341 |
+
"Europe", "European", "Asia", "Asian", "Africa", "African", "North America",
|
342 |
+
"South America", "Latin America", "Australia", "Oceania", "Antarctica",
|
343 |
+
"Middle East", "Central Asia", "Southeast Asia", "East Asia", "South Asia",
|
344 |
+
"Eastern Europe", "Western Europe", "Northern Europe", "Southern Europe",
|
345 |
+
"Mediterranean", "Scandinavia", "Nordic", "Baltic", "Balkans", "Caucasus",
|
346 |
+
"Caribbean", "Central America", "South Pacific", "Polynesia", "Micronesia",
|
347 |
+
|
348 |
+
# Major countries and regions
|
349 |
+
"China", "Chinese", "Russia", "Russian", "India", "Indian", "Japan", "Japanese",
|
350 |
+
"UK", "British", "England", "English", "Scotland", "Scottish", "Wales", "Welsh",
|
351 |
+
"Germany", "German", "France", "French", "Italy", "Italian", "Spain", "Spanish",
|
352 |
+
"Canada", "Canadian", "Brazil", "Brazilian", "Mexico", "Mexican", "Turkey", "Turkish",
|
353 |
+
"United States", "US", "USA", "American", "Britain", "Korea", "Korean",
|
354 |
+
"North Korea", "South Korea", "Saudi", "Saudi Arabia", "Saudi Arabian",
|
355 |
+
"Iran", "Iranian", "Iraq", "Iraqi", "Israel", "Israeli", "Palestine", "Palestinian",
|
356 |
+
"Egypt", "Egyptian", "Pakistan", "Pakistani", "Indonesia", "Indonesian",
|
357 |
+
"Australia", "Australian", "New Zealand", "Nigeria", "Nigerian", "South Africa",
|
358 |
+
"Argentina", "Argentinian", "Colombia", "Colombian", "Venezuela", "Venezuelan",
|
359 |
+
"Ukraine", "Ukrainian", "Poland", "Polish", "Switzerland", "Swiss",
|
360 |
+
"Netherlands", "Dutch", "Belgium", "Belgian", "Sweden", "Swedish", "Norway", "Norwegian",
|
361 |
+
|
362 |
+
# International issues and topics
|
363 |
+
"war", "conflict", "crisis", "tension", "dispute", "hostility", "peace",
|
364 |
+
"peacekeeping", "ceasefire", "truce", "armistice", "treaty", "agreement",
|
365 |
+
"compromise", "negotiation", "mediation", "resolution", "settlement",
|
366 |
+
"refugee", "migrant", "asylum seeker", "displacement", "humanitarian",
|
367 |
+
"border", "frontier", "territory", "territorial", "sovereignty", "jurisdiction",
|
368 |
+
"terror", "terrorism", "extremism", "radicalism", "insurgency", "militant",
|
369 |
+
"sanction", "embargo", "restriction", "isolation", "blockade",
|
370 |
+
|
371 |
+
# International trade and economy
|
372 |
+
"trade", "import", "export", "tariff", "duty", "quota", "subsidy",
|
373 |
+
"protectionism", "free trade", "fair trade", "globalization", "trade war",
|
374 |
+
"trade agreement", "trade deal", "trade deficit", "trade surplus",
|
375 |
+
"supply chain", "outsourcing", "offshoring", "reshoring", "nearshoring",
|
376 |
+
|
377 |
+
# Diplomacy and international relations
|
378 |
+
"embassy", "consulate", "diplomatic", "diplomacy", "diplomat", "ambassador",
|
379 |
+
"consul", "attaché", "envoy", "emissary", "delegation", "mission",
|
380 |
+
"foreign policy", "international relations", "geopolitics", "geopolitical",
|
381 |
+
"influence", "power", "superpower", "hegemony", "alliance", "coalition",
|
382 |
+
"bloc", "axis", "sphere of influence", "buffer state", "proxy",
|
383 |
+
|
384 |
+
# International organizations
|
385 |
+
"UN", "United Nations", "EU", "European Union", "NATO", "NAFTA", "USMCA",
|
386 |
+
"ASEAN", "OPEC", "Commonwealth", "Arab League", "African Union", "AU",
|
387 |
+
"BRICS", "G7", "G20", "IMF", "World Bank", "WTO", "WHO", "UNESCO",
|
388 |
+
"Security Council", "General Assembly", "International Court of Justice",
|
389 |
+
|
390 |
+
# Travel and cultural exchange
|
391 |
+
"visa", "passport", "immigration", "emigration", "migration", "travel",
|
392 |
+
"tourism", "tourist", "visitor", "foreigner", "expatriate", "expat",
|
393 |
+
"citizenship", "nationality", "dual citizen", "naturalization",
|
394 |
+
"cultural", "tradition", "heritage", "indigenous", "native", "local",
|
395 |
+
"language", "dialect", "translation", "interpreter", "cross-cultural",
|
396 |
+
|
397 |
+
# Other
|
398 |
+
"event"
|
399 |
+
],
|
400 |
+
|
401 |
+
"sports": [
|
402 |
+
# General sports terms
|
403 |
+
"game", "match", "tournament", "championship", "league", "cup", "Olympics",
|
404 |
+
"olympic", "world cup", "competition", "contest",
|
405 |
+
"sport", "sporting", "athletics", "physical", "play", "compete", "competition",
|
406 |
+
"amateur", "professional", "pro", "preseason", "regular season",
|
407 |
+
"postseason", "playoff", "final", "semifinal", "quarterfinal", "qualifying",
|
408 |
+
|
409 |
+
# Team sports
|
410 |
+
"football", "soccer", "American football", "rugby", "basketball", "baseball",
|
411 |
+
"cricket", "hockey", "ice hockey", "field hockey", "volleyball", "handball",
|
412 |
+
"water polo", "lacrosse", "ultimate frisbee", "netball", "kabaddi",
|
413 |
+
"team", "club", "franchise", "squad", "roster", "lineup", "formation",
|
414 |
+
"player", "coach", "manager", "trainer", "captain", "starter", "substitute",
|
415 |
+
"bench", "draft", "trade", "free agent", "contract", "transfer", "loan",
|
416 |
+
|
417 |
+
# Individual sports
|
418 |
+
"tennis", "golf", "boxing", "wrestling", "martial arts", "MMA", "UFC",
|
419 |
+
"athletics", "track and field", "swimming", "diving", "gymnastics",
|
420 |
+
"skiing", "snowboarding", "skating", "figure skating", "speed skating",
|
421 |
+
"cycling", "mountain biking", "BMX", "motorsport", "F1", "Formula 1",
|
422 |
+
"NASCAR", "IndyCar", "MotoGP", "rally", "marathon", "triathlon", "decathlon",
|
423 |
+
"archery", "shooting", "fencing", "equestrian", "rowing", "canoeing", "kayaking",
|
424 |
+
"surfing", "skateboarding", "climbing", "bouldering", "weightlifting",
|
425 |
+
|
426 |
+
# Scoring and results
|
427 |
+
"score", "point", "goal", "touchdown", "basket", "run", "wicket", "try",
|
428 |
+
"win", "lose", "draw", "tie", "defeat", "victory", "champion", "winner",
|
429 |
+
"loser", "runner-up", "finalist", "semifinalist", "eliminated", "advance",
|
430 |
+
"qualify", "record", "personal best", "world record", "Olympic record",
|
431 |
+
"streak", "undefeated", "unbeaten", "perfect season", "comeback",
|
432 |
+
|
433 |
+
# Performance and training
|
434 |
+
"fitness", "training", "practice", "drill", "workout", "exercise", "regime",
|
435 |
+
"conditioning", "strength", "endurance", "speed", "agility", "flexibility",
|
436 |
+
"skill", "technique", "form", "style", "strategy", "tactic", "playbook",
|
437 |
+
"offense", "defense", "attack", "counter", "press", "formation",
|
438 |
+
"injury", "rehabilitation", "recovery", "physiotherapy", "sports medicine",
|
439 |
+
|
440 |
+
# Sports infrastructure
|
441 |
+
"stadium", "arena", "court", "field", "pitch", "rink", "pool", "track",
|
442 |
+
"course", "gymnasium", "gym", "complex", "venue", "facility", "locker room",
|
443 |
+
"dugout", "bench", "sideline", "grandstand", "spectator", "fan", "supporter",
|
444 |
+
|
445 |
+
# Sports organizations and competitions
|
446 |
+
"medal", "gold", "silver", "bronze", "podium", "Olympics", "Paralympic",
|
447 |
+
"commonwealth games", "Asian games", "Pan American games", "world championship",
|
448 |
+
"grand slam", "masters", "open", "invitational", "classic", "tour", "circuit",
|
449 |
+
"IPL", "Indian Premier League", "MLB", "Major League Baseball",
|
450 |
+
"NBA", "National Basketball Association", "NFL", "National Football League",
|
451 |
+
"NHL", "National Hockey League", "FIFA", "UEFA", "ATP", "WTA", "ICC",
|
452 |
+
"Premier League", "La Liga", "Bundesliga", "Serie A", "Ligue 1", "MLS",
|
453 |
+
"Champions League", "Europa League", "Super Bowl", "World Series", "Stanley Cup",
|
454 |
+
"NCAA", "collegiate", "college", "university", "varsity", "intramural",
|
455 |
+
|
456 |
+
# Sports media and business
|
457 |
+
"broadcast", "coverage", "commentator", "announcer", "pundit", "analyst",
|
458 |
+
"highlight", "replay", "sports network", "ESPN", "Sky Sports", "Fox Sports",
|
459 |
+
"sponsorship", "endorsement", "advertisement", "merchandise", "jersey", "kit",
|
460 |
+
"ticket", "season ticket", "box seat", "premium", "concession", "vendor",
|
461 |
+
# Sports media and business (continued)
|
462 |
+
"broadcast", "coverage", "commentator", "announcer", "pundit", "analyst",
|
463 |
+
"highlight", "replay", "sports network", "ESPN", "Sky Sports", "Fox Sports",
|
464 |
+
"sponsorship", "endorsement", "advertisement", "merchandise", "jersey", "kit",
|
465 |
+
"ticket", "season ticket", "box seat", "premium", "concession", "vendor"
|
466 |
+
],
|
467 |
+
|
468 |
+
"entertainment": [
|
469 |
+
# Film and cinema
|
470 |
+
"movie", "film", "cinema", "feature", "short film", "documentary", "animation",
|
471 |
+
"blockbuster", "indie", "independent film", "foreign film", "box office",
|
472 |
+
"screening", "premiere", "release", "theatrical", "stream", "streaming",
|
473 |
+
"director", "producer", "screenwriter", "script", "screenplay", "adaptation",
|
474 |
+
"cinematography", "cinematographer", "editing", "editor", "visual effects",
|
475 |
+
"special effects", "CGI", "motion capture", "sound design", "soundtrack",
|
476 |
+
"score", "composer", "scene", "shot", "take", "cut", "sequel", "prequel",
|
477 |
+
"trilogy", "franchise", "universe", "reboot", "remake", "spin-off",
|
478 |
+
"genre", "action", "comedy", "drama", "thriller", "horror", "sci-fi",
|
479 |
+
"science fiction", "fantasy", "romance", "romantic comedy", "rom-com",
|
480 |
+
"mystery", "crime", "western", "historical", "biographical", "biopic",
|
481 |
+
|
482 |
+
# Television
|
483 |
+
"TV", "television", "show", "episode",
|
484 |
+
"finale", "midseason", "sitcom", "drama series", "miniseries", "limited series",
|
485 |
+
"anthology", "reality TV", "game show", "talk show", "variety show",
|
486 |
+
"network", "cable", "premium cable", "broadcast", "channel", "program",
|
487 |
+
"primetime", "daytime", "syndication", "rerun", "renewed", "cancelled",
|
488 |
+
"showrunner", "creator", "writer", "TV writer", "episode writer", "staff writer",
|
489 |
+
|
490 |
+
# Performing arts
|
491 |
+
"actor", "actress", "performer", "cast", "casting", "star", "co-star",
|
492 |
+
"supporting", "lead", "protagonist", "antagonist", "villain", "hero", "anti-hero",
|
493 |
+
"character", "role", "portrayal", "acting", "dialogue",
|
494 |
+
"monologue", "line", "script", "improv", "improvisation", "stand-up",
|
495 |
+
"comedian", "comic", "sketch", "theater", "theatre", "stage", "Broadway",
|
496 |
+
"West End", "play", "musical", "opera", "ballet", "dance", "choreography",
|
497 |
+
"production", "rehearsal", "audition", "understudy", "troupe", "ensemble",
|
498 |
+
|
499 |
+
# Music
|
500 |
+
"music", "song", "track", "single", "album", "EP", "LP", "record",
|
501 |
+
"release", "drop", "artist", "musician", "singer", "vocalist", "band",
|
502 |
+
"group", "duo", "trio", "soloist", "frontman", "frontwoman", "lead singer",
|
503 |
+
"songwriter", "composer", "producer", "DJ", "rapper", "MC", "beatmaker",
|
504 |
+
"guitarist", "bassist", "drummer", "pianist", "keyboardist", "violinist",
|
505 |
+
"instrumentalist", "orchestra", "symphony", "philharmonic", "conductor",
|
506 |
+
"genre", "rock", "pop", "hip-hop", "rap", "R&B", "soul", "funk", "jazz",
|
507 |
+
"blues", "country", "folk", "electronic", "EDM", "dance", "techno", "house",
|
508 |
+
"metal", "punk", "alternative", "indie", "classical", "reggae", "latin",
|
509 |
+
"hit", "chart", "Billboard", "Grammy", "award-winning", "platinum", "gold",
|
510 |
+
"concert", "tour", "gig", "show", "venue", "arena",
|
511 |
+
"stadium", "festival", "Coachella", "Glastonbury", "Lollapalooza", "Bonnaroo",
|
512 |
+
|
513 |
+
# Celebrity culture
|
514 |
+
"celebrity", "star", "fame", "famous", "A-list", "B-list", "icon", "iconic",
|
515 |
+
"superstar", "public figure", "household name", "stardom", "limelight",
|
516 |
+
"popular", "popularity", "fan", "fanbase", "followers", "stan", "groupie",
|
517 |
+
"paparazzi", "tabloid", "gossip", "rumor", "scandal", "controversy",
|
518 |
+
"interview", "press conference", "red carpet", "premiere", "gala", "award show",
|
519 |
+
|
520 |
+
# Awards and recognition
|
521 |
+
"award", "nominee", "nomination", "winner", "recipient", "honor", "accolade",
|
522 |
+
"Oscar", "Academy Award", "Emmy", "Grammy", "Tony", "Golden Globe", "BAFTA",
|
523 |
+
"MTV Award", "People's Choice", "Critics' Choice", "SAG Award", "Billboard Award",
|
524 |
+
"best actor", "best actress", "best director", "best picture", "best film",
|
525 |
+
"best album", "best song", "hall of fame", "lifetime achievement", "legacy",
|
526 |
+
|
527 |
+
# Media and publishing
|
528 |
+
"book", "novel", "fiction", "non-fiction", "memoir", "biography", "autobiography",
|
529 |
+
"bestseller", "bestselling", "author", "writer", "novelist", "literary",
|
530 |
+
"literature", "publisher", "publishing", "imprint", "edition", "volume",
|
531 |
+
"chapter", "page", "paragraph", "prose", "narrative", "plot", "storyline",
|
532 |
+
"character", "protagonist", "antagonist", "setting", "theme", "genre",
|
533 |
+
"mystery", "thriller", "romance", "sci-fi", "fantasy", "young adult", "YA",
|
534 |
+
"comic", "comic book", "graphic novel", "manga", "anime", "cartoon",
|
535 |
+
|
536 |
+
# Digital entertainment
|
537 |
+
"streaming", "stream", "subscription", "platform", "service", "content",
|
538 |
+
"Netflix", "Disney+", "Amazon Prime", "Hulu", "HBO", "HBO Max", "Apple TV+",
|
539 |
+
"Peacock", "Paramount+", "YouTube", "YouTube Premium", "TikTok", "Instagram",
|
540 |
+
"influencer", "content creator", "vlogger", "blogger", "podcaster", "podcast",
|
541 |
+
"episode", "download", "subscriber", "follower", "like", "share", "viral",
|
542 |
+
"trending", "binge-watch", "marathon", "spoiler", "recap", "review", "trailer",
|
543 |
+
"teaser", "behind the scenes", "BTS", "exclusive", "original"
|
544 |
+
]
|
545 |
+
}
|
546 |
+
|
547 |
+
# Add domain-specific RSS feeds for different categories
|
548 |
+
CATEGORY_SPECIFIC_FEEDS = {
|
549 |
+
"ai": [
|
550 |
+
"https://www.artificialintelligence-news.com/feed/",
|
551 |
+
"https://www.deeplearningweekly.com/feed",
|
552 |
+
"https://openai.com/news/rss.xml",
|
553 |
+
"https://aiweekly.co/issues.rss",
|
554 |
+
"https://news.mit.edu/topic/mitartificial-intelligence2-rss.xml",
|
555 |
+
"https://ai.stanford.edu/blog/feed.xml",
|
556 |
+
"https://feeds.feedburner.com/blogspot/gJZg",
|
557 |
+
"https://blog.google/technology/ai/rss/",
|
558 |
+
"https://deepmind.google/blog/rss.xml",
|
559 |
+
"https://blog.tensorflow.org/feeds/posts/default",
|
560 |
+
"https://aws.amazon.com/blogs/machine-learning/feed/",
|
561 |
+
"https://machinelearning.apple.com/rss.xml",
|
562 |
+
"https://msrc.microsoft.com/blog/feed",
|
563 |
+
"https://learn.microsoft.com/en-us/archive/blogs/machinelearning/feed.xml",
|
564 |
+
"https://rss.arxiv.org/rss/cs.LG"
|
565 |
+
],
|
566 |
+
"science": [
|
567 |
+
"https://www.science.org/rss/news_current.xml",
|
568 |
+
"https://www.nature.com/nature.rss",
|
569 |
+
"http://rss.sciam.com/basic-science",
|
570 |
+
"http://rss.sciam.com/ScientificAmerican-Global",
|
571 |
+
"https://www.newscientist.com/feed/home/?cmpid=RSS|NSNS-Home",
|
572 |
+
"https://phys.org/rss-feed/"
|
573 |
+
],
|
574 |
+
"technology": [
|
575 |
+
"https://www.wired.com/feed/category/business/latest/rss",
|
576 |
+
"https://techcrunch.com/feed/",
|
577 |
+
"https://www.technologyreview.com/feed/",
|
578 |
+
"https://arstechnica.com/feed/",
|
579 |
+
"https://www.theverge.com/rss/index.xml",
|
580 |
+
"https://news.ycombinator.com/rss"
|
581 |
+
],
|
582 |
+
"politics": [
|
583 |
+
"https://feeds.washingtonpost.com/rss/politics",
|
584 |
+
"https://rss.nytimes.com/services/xml/rss/nyt/Politics.xml",
|
585 |
+
"https://feeds.bbci.co.uk/news/politics/rss.xml",
|
586 |
+
"https://www.politico.com/rss/politicopicks.xml",
|
587 |
+
"https://www.realclearpolitics.com/index.xml"
|
588 |
+
],
|
589 |
+
"business": [
|
590 |
+
"https://www.ft.com/rss/home",
|
591 |
+
"https://feeds.bloomberg.com/markets/news.rss",
|
592 |
+
"https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
|
593 |
+
"https://feeds.washingtonpost.com/rss/business",
|
594 |
+
"https://www.entrepreneur.com/latest.rss",
|
595 |
+
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10001147",
|
596 |
+
"https://feeds.content.dowjones.io/public/rss/WSJcomUSBusiness",
|
597 |
+
"https://feeds.a.dj.com/rss/RSSMarketsMain.xml"
|
598 |
+
],
|
599 |
+
"world": [
|
600 |
+
"https://feeds.bbci.co.uk/news/world/rss.xml",
|
601 |
+
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
602 |
+
"https://www.aljazeera.com/xml/rss/all.xml",
|
603 |
+
"https://feeds.washingtonpost.com/rss/world",
|
604 |
+
"http://rss.cnn.com/rss/cnn_world.rss"
|
605 |
+
],
|
606 |
+
"sports": [
|
607 |
+
"https://www.espn.com/espn/rss/news",
|
608 |
+
"https://www.cbssports.com/rss/headlines/",
|
609 |
+
"https://www.espncricinfo.com/rss/content/story/feeds/0.xml",
|
610 |
+
"https://api.foxsports.com/v1/rss",
|
611 |
+
"https://www.sportingnews.com/us/rss",
|
612 |
+
"https://www.theguardian.com/sport/rss",
|
613 |
+
],
|
614 |
+
"entertainment": [
|
615 |
+
"https://www.hollywoodreporter.com/feed/",
|
616 |
+
"https://variety.com/feed/",
|
617 |
+
"https://www.eonline.com/syndication/feeds/rssfeeds/topstories.xml",
|
618 |
+
"https://www.rollingstone.com/feed/",
|
619 |
+
"https://rss.nytimes.com/services/xml/rss/nyt/Arts.xml"
|
620 |
+
],
|
621 |
+
"fact_checking": [
|
622 |
+
"https://www.snopes.com/feed/",
|
623 |
+
"https://www.politifact.com/rss/all/",
|
624 |
+
"https://www.factcheck.org/feed/",
|
625 |
+
"https://leadstories.com/atom.xml",
|
626 |
+
"https://fullfact.org/feed/all/",
|
627 |
+
"https://www.truthorfiction.com/feed/"
|
628 |
+
]
|
629 |
+
}
|
630 |
+
|
631 |
+
def detect_claim_category(claim: str) -> Tuple[str, float]:
|
632 |
+
"""
|
633 |
+
Detect the most likely category of a claim and its confidence score
|
634 |
+
|
635 |
+
This function analyzes the claim text and matches it against category-specific keywords
|
636 |
+
to determine the most likely category for the claim (AI, science, politics, etc.).
|
637 |
+
|
638 |
+
Args:
|
639 |
+
claim (str): The claim text
|
640 |
+
|
641 |
+
Returns:
|
642 |
+
tuple: (category_name, confidence_score)
|
643 |
+
"""
|
644 |
+
if not claim:
|
645 |
+
return "general", 0.3
|
646 |
+
|
647 |
+
# Lowercase for better matching
|
648 |
+
claim_lower = claim.lower()
|
649 |
+
|
650 |
+
# Count matches for each category
|
651 |
+
category_scores = {}
|
652 |
+
|
653 |
+
for category, keywords in CLAIM_CATEGORIES.items():
|
654 |
+
# Count how many keywords from this category appear in the claim
|
655 |
+
matches = sum(1 for keyword in keywords if keyword.lower() in claim_lower)
|
656 |
+
|
657 |
+
# Calculate a simple score based on matches
|
658 |
+
if matches > 0:
|
659 |
+
# Calculate a more significant score based on number of matches
|
660 |
+
score = min(0.9, 0.3 + (matches * 0.1)) # Base 0.3 + 0.1 per match, max 0.9
|
661 |
+
category_scores[category] = score
|
662 |
+
|
663 |
+
# Find category with highest score
|
664 |
+
if not category_scores:
|
665 |
+
return "general", 0.3
|
666 |
+
|
667 |
+
top_category = max(category_scores.items(), key=lambda x: x[1])
|
668 |
+
category_name, confidence = top_category
|
669 |
+
|
670 |
+
# If the top score is too low, return general
|
671 |
+
if confidence < 0.3:
|
672 |
+
return "general", 0.3
|
673 |
+
|
674 |
+
return category_name, confidence
|
675 |
+
|
676 |
+
def get_category_specific_rss_feeds(category: str, max_feeds: int = 5) -> List[str]:
|
677 |
+
"""
|
678 |
+
Get a list of RSS feeds specific to a category
|
679 |
+
|
680 |
+
This function returns a subset of category-specific RSS feeds to use
|
681 |
+
for evidence gathering.
|
682 |
+
|
683 |
+
Args:
|
684 |
+
category (str): The claim category
|
685 |
+
max_feeds (int): Maximum number of feeds to return
|
686 |
+
|
687 |
+
Returns:
|
688 |
+
list: List of RSS feed URLs
|
689 |
+
"""
|
690 |
+
# Get category-specific feeds
|
691 |
+
category_feeds = CATEGORY_SPECIFIC_FEEDS.get(category, [])
|
692 |
+
|
693 |
+
# Limit to max_feeds
|
694 |
+
return category_feeds[:min(max_feeds, len(category_feeds))]
|
695 |
+
|
696 |
+
def get_fallback_category(category: str) -> Optional[str]:
|
697 |
+
"""
|
698 |
+
Get a fallback category for a given category when insufficient evidence is found
|
699 |
+
|
700 |
+
This function determines which alternative category to use when the
|
701 |
+
primary category doesn't yield sufficient evidence. For example,
|
702 |
+
AI claims fall back to technology sources.
|
703 |
+
|
704 |
+
Args:
|
705 |
+
category (str): The primary category to find a fallback for
|
706 |
+
|
707 |
+
Returns:
|
708 |
+
str or None: Fallback category name or None if no fallback exists
|
709 |
+
"""
|
710 |
+
# Define fallback categories for specific categories
|
711 |
+
fallbacks = {
|
712 |
+
"ai": "technology", # For AI claims, use technology as fallback
|
713 |
+
# Other categories fall back to default RSS feeds, handled in retrieve_combined_evidence
|
714 |
+
}
|
715 |
+
|
716 |
+
return fallbacks.get(category)
|
modules/claim_extraction.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import time
|
3 |
+
import re
|
4 |
+
from langdetect import detect
|
5 |
+
import spacy
|
6 |
+
|
7 |
+
from utils.performance import PerformanceTracker
|
8 |
+
from utils.models import get_nlp_model, get_llm_model
|
9 |
+
from modules.classification import normalize_tense
|
10 |
+
|
11 |
+
logger = logging.getLogger("misinformation_detector")
|
12 |
+
|
13 |
+
performance_tracker = PerformanceTracker()
|
14 |
+
|
15 |
+
def extract_claims(text):
|
16 |
+
"""
|
17 |
+
Extract the main factual claim from the provided text.
|
18 |
+
For concise claims (<30 words), preserves them exactly.
|
19 |
+
For longer text, uses OpenAI to extract the claim.
|
20 |
+
"""
|
21 |
+
logger.info(f"Extracting claims from: {text}")
|
22 |
+
start_time = time.time()
|
23 |
+
|
24 |
+
# First, check if the input already appears to be a concise claim
|
25 |
+
if len(text.split()) < 30:
|
26 |
+
logger.info("Input appears to be a concise claim already, preserving as-is")
|
27 |
+
performance_tracker.log_processing_time(start_time)
|
28 |
+
performance_tracker.log_claim_processed()
|
29 |
+
return text
|
30 |
+
|
31 |
+
try:
|
32 |
+
# For longer text, use OpenAI for extraction
|
33 |
+
extracted_claim = extract_with_openai(text)
|
34 |
+
|
35 |
+
# Log processing time
|
36 |
+
performance_tracker.log_processing_time(start_time)
|
37 |
+
performance_tracker.log_claim_processed()
|
38 |
+
|
39 |
+
logger.info(f"Extracted claim: {extracted_claim}")
|
40 |
+
return extracted_claim
|
41 |
+
except Exception as e:
|
42 |
+
logger.error(f"Error extracting claims: {str(e)}")
|
43 |
+
# Fallback to original text on error
|
44 |
+
return text
|
45 |
+
|
46 |
+
def extract_with_openai(text):
|
47 |
+
"""
|
48 |
+
Use OpenAI model for claim extraction
|
49 |
+
"""
|
50 |
+
try:
|
51 |
+
# Get LLM model
|
52 |
+
llm_model = get_llm_model()
|
53 |
+
|
54 |
+
# Create a very explicit prompt to avoid hallucination
|
55 |
+
prompt = f"""
|
56 |
+
Extract the main factual claim from the following text.
|
57 |
+
DO NOT add any information not present in the original text.
|
58 |
+
DO NOT add locations, dates, or other details.
|
59 |
+
ONLY extract what is explicitly stated.
|
60 |
+
|
61 |
+
Text: {text}
|
62 |
+
|
63 |
+
Main factual claim:
|
64 |
+
"""
|
65 |
+
|
66 |
+
# Call OpenAI with temperature=0 for deterministic output
|
67 |
+
response = llm_model.invoke(prompt, temperature=0)
|
68 |
+
extracted_claim = response.content.strip()
|
69 |
+
|
70 |
+
# Further clean up any explanations or extra text
|
71 |
+
if ":" in extracted_claim:
|
72 |
+
parts = extracted_claim.split(":")
|
73 |
+
if len(parts) > 1:
|
74 |
+
extracted_claim = parts[-1].strip()
|
75 |
+
|
76 |
+
logger.info(f"OpenAI extraction: {extracted_claim}")
|
77 |
+
|
78 |
+
# Validate that we're not adding info not in the original
|
79 |
+
nlp = get_nlp_model()
|
80 |
+
extracted_claim = validate_extraction(text, extracted_claim, nlp)
|
81 |
+
|
82 |
+
return extracted_claim
|
83 |
+
except Exception as e:
|
84 |
+
logger.error(f"Error in OpenAI claim extraction: {str(e)}")
|
85 |
+
return text # Fallback to original
|
86 |
+
|
87 |
+
def validate_extraction(original_text, extracted_claim, nlp):
|
88 |
+
"""
|
89 |
+
Validate that the extracted claim doesn't add information not present in the original text
|
90 |
+
"""
|
91 |
+
# If extraction fails or is empty, return original
|
92 |
+
if not extracted_claim or extracted_claim.strip() == "":
|
93 |
+
logger.warning("Empty extraction result, using original text")
|
94 |
+
return original_text
|
95 |
+
|
96 |
+
# Check for added location information
|
97 |
+
location_terms = ["united states", "america", "u.s.", "usa", "china", "india", "europe",
|
98 |
+
"russia", "japan", "uk", "germany", "france", "australia"]
|
99 |
+
for term in location_terms:
|
100 |
+
if term in extracted_claim.lower() and term not in original_text.lower():
|
101 |
+
logger.warning(f"Extraction added location '{term}' not in original, using original text")
|
102 |
+
return original_text
|
103 |
+
|
104 |
+
# Check for entity preservation/addition using spaCy
|
105 |
+
try:
|
106 |
+
# Get entities from extracted text
|
107 |
+
extracted_doc = nlp(extracted_claim)
|
108 |
+
extracted_entities = [ent.text.lower() for ent in extracted_doc.ents]
|
109 |
+
|
110 |
+
# Get entities from original text
|
111 |
+
original_doc = nlp(original_text)
|
112 |
+
original_entities = [ent.text.lower() for ent in original_doc.ents]
|
113 |
+
|
114 |
+
# Check for new entities that don't exist in original
|
115 |
+
for entity in extracted_entities:
|
116 |
+
if not any(entity in orig_entity or orig_entity in entity for orig_entity in original_entities):
|
117 |
+
logger.warning(f"Extraction added new entity '{entity}', using original text")
|
118 |
+
return original_text
|
119 |
+
|
120 |
+
return extracted_claim
|
121 |
+
except Exception as e:
|
122 |
+
logger.error(f"Error in extraction validation: {str(e)}")
|
123 |
+
return original_text # On error, safer to return original
|
124 |
+
|
125 |
+
def shorten_claim_for_evidence(claim):
|
126 |
+
"""
|
127 |
+
Shorten a claim to use for evidence retrieval by preserving important entities,
|
128 |
+
verbs, and keywords while maintaining claim context
|
129 |
+
|
130 |
+
Args:
|
131 |
+
claim (str): The original claim
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
str: A shortened version of the claim optimized for evidence retrieval
|
135 |
+
"""
|
136 |
+
try:
|
137 |
+
normalized_claim = normalize_tense(claim)
|
138 |
+
# Get NLP model
|
139 |
+
nlp = get_nlp_model()
|
140 |
+
|
141 |
+
# Process claim with NLP
|
142 |
+
doc = nlp(claim)
|
143 |
+
|
144 |
+
# Components to extract
|
145 |
+
important_components = []
|
146 |
+
|
147 |
+
# 1. Extract all named entities as highest priority
|
148 |
+
entities = [ent.text for ent in doc.ents]
|
149 |
+
important_components.extend(entities)
|
150 |
+
|
151 |
+
# 2. Extract key proper nouns if not already captured in entities
|
152 |
+
for token in doc:
|
153 |
+
if token.pos_ == "PROPN" and token.text not in important_components:
|
154 |
+
important_components.append(token.text)
|
155 |
+
|
156 |
+
# 3. Extract main verbs (actions)
|
157 |
+
verbs = []
|
158 |
+
for token in doc:
|
159 |
+
if token.pos_ == "VERB" and not token.is_stop:
|
160 |
+
verbs.append(token.text)
|
161 |
+
|
162 |
+
# 4. Check for important title terms like "president", "prime minister"
|
163 |
+
title_terms = ["president", "prime minister", "minister", "chancellor", "premier",
|
164 |
+
"governor", "mayor", "senator", "CEO", "founder", "director"]
|
165 |
+
|
166 |
+
for term in title_terms:
|
167 |
+
if term in claim.lower():
|
168 |
+
# Find the full phrase (e.g., "Canadian Prime Minister")
|
169 |
+
matches = re.finditer(r'(?i)(?:\w+\s+)*\b' + re.escape(term) + r'\b(?:\s+\w+)*', claim)
|
170 |
+
for match in matches:
|
171 |
+
phrase = match.group(0)
|
172 |
+
if phrase not in important_components:
|
173 |
+
important_components.append(phrase)
|
174 |
+
|
175 |
+
# 5. Add important temporal indicators
|
176 |
+
temporal_terms = ["today", "yesterday", "recently", "just", "now",
|
177 |
+
"current", "currently", "latest", "new", "week",
|
178 |
+
"month", "year", "announces", "announced", "introduces",
|
179 |
+
"introduced", "launches", "launched", "releases",
|
180 |
+
"released", "rolls out", "rolled out", "presents", "presented", "unveils", "unveiled",
|
181 |
+
"starts", "started", "begins", "began", "initiates", "initiated", "anymore"
|
182 |
+
]
|
183 |
+
|
184 |
+
# Add significant temporal context
|
185 |
+
temporal_context = []
|
186 |
+
for term in temporal_terms:
|
187 |
+
if term in claim.lower():
|
188 |
+
temporal_matches = re.finditer(r'(?i)(?:\w+\s+){0,2}\b' + re.escape(term) + r'\b(?:\s+\w+){0,2}', claim)
|
189 |
+
for match in temporal_matches:
|
190 |
+
temporal_context.append(match.group(0))
|
191 |
+
|
192 |
+
# 6. Always include negation words as they're critical for meaning
|
193 |
+
negation_terms = ["not", "no longer", "former", "ex-", "isn't", "aren't", "doesn't", "don't"]
|
194 |
+
|
195 |
+
negation_context = []
|
196 |
+
for term in negation_terms:
|
197 |
+
if term in claim.lower():
|
198 |
+
# Find the context around the negation (3 words before and after)
|
199 |
+
neg_matches = re.finditer(r'(?i)(?:\w+\s+){0,3}\b' + re.escape(term) + r'\b(?:\s+\w+){0,3}', claim)
|
200 |
+
for match in neg_matches:
|
201 |
+
negation_context.append(match.group(0))
|
202 |
+
|
203 |
+
# Combine all components
|
204 |
+
all_components = important_components + verbs + temporal_context + negation_context
|
205 |
+
|
206 |
+
# Remove duplicates while preserving order
|
207 |
+
seen = set()
|
208 |
+
unique_components = []
|
209 |
+
for component in all_components:
|
210 |
+
if component.lower() not in seen:
|
211 |
+
seen.add(component.lower())
|
212 |
+
unique_components.append(component)
|
213 |
+
|
214 |
+
# If we have too few components (< 2), use the original claim
|
215 |
+
if len(unique_components) < 2:
|
216 |
+
# If the claim is already short (< 10 words), use as is
|
217 |
+
if len(claim.split()) < 10:
|
218 |
+
return claim
|
219 |
+
|
220 |
+
# Otherwise, use the first 8 words
|
221 |
+
words = claim.split()
|
222 |
+
return " ".join(words[:min(8, len(words))])
|
223 |
+
|
224 |
+
# Join components to create shortened claim
|
225 |
+
# Sort components to maintain approximate original word order
|
226 |
+
def get_position(comp):
|
227 |
+
return claim.lower().find(comp.lower())
|
228 |
+
|
229 |
+
unique_components.sort(key=get_position)
|
230 |
+
shortened_claim = " ".join(unique_components)
|
231 |
+
|
232 |
+
# If the shortened claim is still too long, limit to first 10 words
|
233 |
+
if len(shortened_claim.split()) > 10:
|
234 |
+
return " ".join(shortened_claim.split()[:10])
|
235 |
+
|
236 |
+
return shortened_claim
|
237 |
+
|
238 |
+
except Exception as e:
|
239 |
+
logger.error(f"Error in shortening claim: {str(e)}")
|
240 |
+
# Return original claim on error
|
241 |
+
return claim
|
modules/classification.py
ADDED
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
+
from utils.models import get_llm_model, get_nlp_model
|
4 |
+
from utils.performance import PerformanceTracker
|
5 |
+
|
6 |
+
logger = logging.getLogger("misinformation_detector")
|
7 |
+
|
8 |
+
performance_tracker = PerformanceTracker()
|
9 |
+
|
10 |
+
def classify_with_llm(query, evidence):
|
11 |
+
"""
|
12 |
+
Classification function that evaluates evidence against a claim
|
13 |
+
to determine support, contradiction, or insufficient evidence.
|
14 |
+
|
15 |
+
This function analyzes the provided evidence to evaluate if it supports,
|
16 |
+
contradicts, or is insufficient to verify the claim. It implements:
|
17 |
+
- Strict output formatting requirements
|
18 |
+
- Evidence source validation
|
19 |
+
- Confidence scoring based on confirmation strength
|
20 |
+
- Flexible regex pattern matching
|
21 |
+
- Detailed debug logging
|
22 |
+
- Fallback parsing for non-standard responses
|
23 |
+
|
24 |
+
Args:
|
25 |
+
query (str): The factual claim being verified
|
26 |
+
evidence (list): Evidence items to evaluate against the claim
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
list: Classification results with labels and confidence scores
|
30 |
+
"""
|
31 |
+
logger.info(f"Classifying evidence for claim: {query}")
|
32 |
+
|
33 |
+
# Get the LLM model
|
34 |
+
llm_model = get_llm_model()
|
35 |
+
|
36 |
+
# Skip if no evidence
|
37 |
+
if not evidence:
|
38 |
+
logger.warning("No evidence provided for classification")
|
39 |
+
return []
|
40 |
+
|
41 |
+
# Normalize evidence to a list
|
42 |
+
if not isinstance(evidence, list):
|
43 |
+
if evidence:
|
44 |
+
try:
|
45 |
+
evidence = [evidence]
|
46 |
+
except Exception as e:
|
47 |
+
logger.error(f"Could not convert evidence to list: {e}")
|
48 |
+
return []
|
49 |
+
else:
|
50 |
+
return []
|
51 |
+
|
52 |
+
# Extract essential claim components for improved keyword detection
|
53 |
+
claim_components = extract_claim_keywords(query)
|
54 |
+
essential_keywords = claim_components.get("keywords", [])
|
55 |
+
essential_entities = claim_components.get("entities", [])
|
56 |
+
|
57 |
+
# Ensure processing is limited to top 10 evidence items to reduce token usage
|
58 |
+
evidence = evidence[:10]
|
59 |
+
|
60 |
+
# Validate evidence for verifiable sources
|
61 |
+
validated_evidence = []
|
62 |
+
for idx, chunk in enumerate(evidence):
|
63 |
+
# Basic evidence validation
|
64 |
+
if not isinstance(chunk, str) or not chunk.strip():
|
65 |
+
continue
|
66 |
+
|
67 |
+
# Check if evidence contains source information
|
68 |
+
has_valid_source = False
|
69 |
+
if "URL:" in chunk and ("http://" in chunk or "https://" in chunk):
|
70 |
+
has_valid_source = True
|
71 |
+
elif "Source:" in chunk and len(chunk.split("Source:")[1].strip()) > 3:
|
72 |
+
has_valid_source = True
|
73 |
+
|
74 |
+
# Add validation flag to evidence
|
75 |
+
validated_evidence.append({
|
76 |
+
"text": chunk,
|
77 |
+
"index": idx + 1,
|
78 |
+
"has_valid_source": has_valid_source
|
79 |
+
})
|
80 |
+
|
81 |
+
# If no valid evidence remains, return early
|
82 |
+
if not validated_evidence:
|
83 |
+
logger.warning("No valid evidence items to classify")
|
84 |
+
return []
|
85 |
+
|
86 |
+
try:
|
87 |
+
# Format evidence items with validation information
|
88 |
+
evidence_text = ""
|
89 |
+
for item in validated_evidence:
|
90 |
+
# Truncate long evidence
|
91 |
+
chunk_text = item["text"]
|
92 |
+
if len(chunk_text) > 1000:
|
93 |
+
chunk_text = chunk_text[:1000] + "..."
|
94 |
+
|
95 |
+
# Include validation status in the prompt
|
96 |
+
source_status = "WITH VALID SOURCE" if item["has_valid_source"] else "WARNING: NO CLEAR SOURCE"
|
97 |
+
evidence_text += f"EVIDENCE {item['index']}:\n{chunk_text}\n[{source_status}]\n\n"
|
98 |
+
|
99 |
+
# Create a structured prompt with explicit format instructions and validation requirements
|
100 |
+
prompt = f"""
|
101 |
+
CLAIM: {query}
|
102 |
+
|
103 |
+
EVIDENCE:
|
104 |
+
{evidence_text}
|
105 |
+
|
106 |
+
TASK: Evaluate if each evidence supports, contradicts, or is insufficient/irrelevant to the claim.
|
107 |
+
|
108 |
+
INSTRUCTIONS:
|
109 |
+
1. For each evidence, provide your analysis in EXACTLY this format:
|
110 |
+
|
111 |
+
EVIDENCE [number] ANALYSIS:
|
112 |
+
Classification: [Choose exactly one: support/contradict/insufficient]
|
113 |
+
Confidence: [number between 0-100]
|
114 |
+
Reason: [brief explanation]
|
115 |
+
|
116 |
+
2. Support = Evidence EXPLICITLY confirms ALL parts of the claim are true
|
117 |
+
3. Contradict = Evidence EXPLICITLY confirms the claim is false
|
118 |
+
4. Insufficient = Evidence is irrelevant, ambiguous, or doesn't provide enough information
|
119 |
+
|
120 |
+
CRITICAL VALIDATION RULES:
|
121 |
+
- Mark as "support" ONLY when evidence EXPLICITLY mentions ALL key entities AND actions from the claim
|
122 |
+
- Do not label evidence as "support" if it only discusses the same topic without confirming the specific claim
|
123 |
+
- Do not make inferential leaps - if the evidence doesn't explicitly state the claim, mark as "insufficient"
|
124 |
+
- Assign LOW confidence (0-50) when evidence doesn't explicitly mention all claim elements
|
125 |
+
- Assign ZERO confidence (0) to evidence without valid sources
|
126 |
+
- If evidence describes similar but different events, mark as "insufficient", not "support" or "contradict"
|
127 |
+
- If evidence describes the same topic as the claim but does not confirm or contradict the claim, mark as "insufficient", not "support" or "contradict"
|
128 |
+
- If evidence is in a different language or unrelated topic, mark as "insufficient" with 0 confidence
|
129 |
+
- Check that all entities (names, places, dates, numbers) in the claim are explicitly confirmed
|
130 |
+
|
131 |
+
FOCUS ON THE EXACT CLAIM ONLY.
|
132 |
+
ESSENTIAL KEYWORDS TO LOOK FOR: {', '.join(essential_keywords)}
|
133 |
+
ESSENTIAL ENTITIES TO VERIFY: {', '.join(essential_entities)}
|
134 |
+
|
135 |
+
IMPORTANT NOTE ABOUT VERB TENSES: When analyzing this claim, treat present tense verbs (like "unveils")
|
136 |
+
and perfect form verbs (like "has unveiled") as equivalent to their simple past tense forms
|
137 |
+
(like "unveiled"). The tense variation should not affect your classification decision.
|
138 |
+
"""
|
139 |
+
|
140 |
+
# Get response with temperature=0 for consistency
|
141 |
+
result = llm_model.invoke(prompt, temperature=0)
|
142 |
+
result_text = result.content.strip()
|
143 |
+
|
144 |
+
# Log the raw LLM response for debugging
|
145 |
+
logger.debug(f"Raw LLM classification response:\n{result_text}")
|
146 |
+
|
147 |
+
# Define a more flexible regex pattern matching the requested format
|
148 |
+
# This pattern accommodates variations in whitespace and formatting
|
149 |
+
analysis_pattern = r'EVIDENCE\s+(\d+)\s+ANALYSIS:[\s\n]*Classification:[\s\n]*(support|contradict|insufficient)[\s\n]*Confidence:[\s\n]*(\d+)[\s\n]*Reason:[\s\n]*(.*?)(?=[\s\n]*EVIDENCE\s+\d+\s+ANALYSIS:|[\s\n]*$)'
|
150 |
+
|
151 |
+
# Parse each evidence analysis
|
152 |
+
classification_results = []
|
153 |
+
|
154 |
+
# Try matching with our pattern
|
155 |
+
matches = list(re.finditer(analysis_pattern, result_text, re.IGNORECASE | re.DOTALL))
|
156 |
+
|
157 |
+
# Log match information for debugging
|
158 |
+
logger.debug(f"Found {len(matches)} structured evidence analyses in response")
|
159 |
+
|
160 |
+
# Process matches
|
161 |
+
for match in matches:
|
162 |
+
try:
|
163 |
+
evidence_idx = int(match.group(1)) - 1
|
164 |
+
classification = match.group(2).lower()
|
165 |
+
confidence = int(match.group(3)) / 100.0 # Convert to 0-1 scale
|
166 |
+
reason = match.group(4).strip()
|
167 |
+
|
168 |
+
# Check if this evidence item exists in our original list
|
169 |
+
if 0 <= evidence_idx < len(evidence):
|
170 |
+
# Get the original evidence text
|
171 |
+
evidence_text = evidence[evidence_idx]
|
172 |
+
|
173 |
+
# Check for valid source
|
174 |
+
source_valid = False
|
175 |
+
if "URL:" in evidence_text and ("http://" in evidence_text or "https://" in evidence_text):
|
176 |
+
source_valid = True
|
177 |
+
elif "Source:" in evidence_text:
|
178 |
+
source_valid = True
|
179 |
+
|
180 |
+
# Reduce confidence for evidence without valid sources
|
181 |
+
if not source_valid and confidence > 0.3:
|
182 |
+
confidence = 0.3
|
183 |
+
reason += " (Confidence reduced due to lack of verifiable source)"
|
184 |
+
|
185 |
+
# Create result entry
|
186 |
+
classification_results.append({
|
187 |
+
"label": classification,
|
188 |
+
"confidence": confidence,
|
189 |
+
"evidence": evidence_text,
|
190 |
+
"reason": reason
|
191 |
+
})
|
192 |
+
except (ValueError, IndexError) as e:
|
193 |
+
logger.error(f"Error parsing evidence analysis: {e}")
|
194 |
+
|
195 |
+
# If no structured matches were found, try using a simpler approach
|
196 |
+
if not classification_results:
|
197 |
+
logger.warning("No structured evidence analysis found, using fallback method")
|
198 |
+
|
199 |
+
# Log detailed information about the failure
|
200 |
+
logger.warning(f"Expected format not found in response. Response excerpt: {result_text[:200]}...")
|
201 |
+
|
202 |
+
# Simple fallback parsing based on keywords
|
203 |
+
for idx, ev in enumerate(evidence):
|
204 |
+
# Check for keywords in the LLM response
|
205 |
+
ev_mention = f"EVIDENCE {idx+1}"
|
206 |
+
if ev_mention in result_text:
|
207 |
+
# Find the section for this evidence
|
208 |
+
parts = result_text.split(ev_mention)
|
209 |
+
if len(parts) > 1:
|
210 |
+
analysis_text = parts[1].split("EVIDENCE")[0] if "EVIDENCE" in parts[1] else parts[1]
|
211 |
+
|
212 |
+
# Determine classification
|
213 |
+
label = "insufficient" # Default
|
214 |
+
confidence = 0.0 # Default - zero confidence for fallback parsing
|
215 |
+
|
216 |
+
# Check for support indicators
|
217 |
+
if "support" in analysis_text.lower() or "confirms" in analysis_text.lower():
|
218 |
+
label = "support"
|
219 |
+
confidence = 0.4 # Lower confidence for fallback support
|
220 |
+
|
221 |
+
# Check for contradict indicators
|
222 |
+
elif "contradict" in analysis_text.lower() or "false" in analysis_text.lower():
|
223 |
+
label = "contradict"
|
224 |
+
confidence = 0.4 # Lower confidence for fallback contradict
|
225 |
+
|
226 |
+
# Check for valid source to adjust confidence
|
227 |
+
source_valid = False
|
228 |
+
if "URL:" in ev and ("http://" in ev or "https://" in ev):
|
229 |
+
source_valid = True
|
230 |
+
elif "Source:" in ev:
|
231 |
+
source_valid = True
|
232 |
+
|
233 |
+
if not source_valid:
|
234 |
+
confidence = min(confidence, 0.3)
|
235 |
+
|
236 |
+
# Create basic result
|
237 |
+
classification_results.append({
|
238 |
+
"label": label,
|
239 |
+
"confidence": confidence,
|
240 |
+
"evidence": ev,
|
241 |
+
"reason": f"Determined via fallback parsing. {'Valid source found.' if source_valid else 'Warning: No clear source identified.'}"
|
242 |
+
})
|
243 |
+
|
244 |
+
logger.debug(f"Fallback parsing for evidence {idx+1}: {label} with confidence {confidence}")
|
245 |
+
|
246 |
+
logger.info(f"Classified {len(classification_results)} evidence items")
|
247 |
+
return classification_results
|
248 |
+
|
249 |
+
except Exception as e:
|
250 |
+
logger.error(f"Error in evidence classification: {str(e)}")
|
251 |
+
# Provide a basic fallback
|
252 |
+
fallback_results = []
|
253 |
+
for ev in evidence:
|
254 |
+
fallback_results.append({
|
255 |
+
"label": "insufficient",
|
256 |
+
"confidence": 0.5,
|
257 |
+
"evidence": ev,
|
258 |
+
"reason": "Classification failed with error, using fallback"
|
259 |
+
})
|
260 |
+
return fallback_results
|
261 |
+
|
262 |
+
def normalize_tense(claim):
|
263 |
+
"""
|
264 |
+
Normalize verb tenses in claims to ensure consistent classification.
|
265 |
+
|
266 |
+
This function standardizes verb forms by converting present simple tense
|
267 |
+
verbs (e.g., "unveils") and perfect forms (e.g., "has unveiled") to their
|
268 |
+
past tense equivalents (e.g., "unveiled"). This ensures that semantically
|
269 |
+
equivalent claims are processed consistently regardless of verb tense
|
270 |
+
variations.
|
271 |
+
|
272 |
+
Args:
|
273 |
+
claim (str): The original claim text to normalize
|
274 |
+
|
275 |
+
Returns:
|
276 |
+
str: The normalized claim with consistent tense handling
|
277 |
+
|
278 |
+
Note:
|
279 |
+
This function specifically targets present simple and perfect forms,
|
280 |
+
preserving the semantic differences of continuous forms (is unveiling)
|
281 |
+
and future tense (will unveil).
|
282 |
+
"""
|
283 |
+
# Define patterns to normalize common verb forms.
|
284 |
+
# Each tuple contains (regex_pattern, replacement_text)
|
285 |
+
tense_patterns = [
|
286 |
+
# Present simple to past tense conversions
|
287 |
+
(r'\bunveils\b', r'unveiled'),
|
288 |
+
(r'\blaunches\b', r'launched'),
|
289 |
+
(r'\breleases\b', r'released'),
|
290 |
+
(r'\bannounces\b', r'announced'),
|
291 |
+
(r'\binvites\b', r'invited'),
|
292 |
+
(r'\bretaliates\b', r'retaliated'),
|
293 |
+
(r'\bends\b', r'ended'),
|
294 |
+
(r'\bbegins\b', r'began'),
|
295 |
+
(r'\bstarts\b', r'started'),
|
296 |
+
(r'\bcompletes\b', r'completed'),
|
297 |
+
(r'\bfinishes\b', r'finished'),
|
298 |
+
(r'\bintroduces\b', r'introduced'),
|
299 |
+
(r'\bcreates\b', r'created'),
|
300 |
+
(r'\bdevelops\b', r'developed'),
|
301 |
+
(r'\bpublishes\b', r'published'),
|
302 |
+
(r'\bacquires\b', r'acquired'),
|
303 |
+
(r'\bbuys\b', r'bought'),
|
304 |
+
(r'\bsells\b', r'sold'),
|
305 |
+
|
306 |
+
# Perfect forms (has/have/had + past participle) to simple past
|
307 |
+
(r'\b(has|have|had)\s+unveiled\b', r'unveiled'),
|
308 |
+
(r'\b(has|have|had)\s+launched\b', r'launched'),
|
309 |
+
(r'\b(has|have|had)\s+released\b', r'released'),
|
310 |
+
(r'\b(has|have|had)\s+announced\b', r'announced'),
|
311 |
+
(r'\b(has|have|had)\s+invited\b', r'invited'),
|
312 |
+
(r'\b(has|have|had)\s+retaliated\b', r'retaliated'),
|
313 |
+
(r'\b(has|have|had)\s+ended\b', r'ended'),
|
314 |
+
(r'\b(has|have|had)\s+begun\b', r'began'),
|
315 |
+
(r'\b(has|have|had)\s+started\b', r'started'),
|
316 |
+
(r'\b(has|have|had)\s+introduced\b', r'introduced'),
|
317 |
+
(r'\b(has|have|had)\s+created\b', r'created'),
|
318 |
+
(r'\b(has|have|had)\s+developed\b', r'developed'),
|
319 |
+
(r'\b(has|have|had)\s+published\b', r'published'),
|
320 |
+
(r'\b(has|have|had)\s+acquired\b', r'acquired'),
|
321 |
+
(r'\b(has|have|had)\s+bought\b', r'bought'),
|
322 |
+
(r'\b(has|have|had)\s+sold\b', r'sold')
|
323 |
+
]
|
324 |
+
|
325 |
+
# Apply normalization patterns
|
326 |
+
normalized = claim
|
327 |
+
for pattern, replacement in tense_patterns:
|
328 |
+
normalized = re.sub(pattern, replacement, normalized, flags=re.IGNORECASE)
|
329 |
+
|
330 |
+
# Log if normalization occurred for debugging purposes
|
331 |
+
if normalized != claim:
|
332 |
+
logger.info(f"Normalized claim from: '{claim}' to: '{normalized}'")
|
333 |
+
|
334 |
+
return normalized
|
335 |
+
|
336 |
+
def aggregate_evidence(classification_results):
|
337 |
+
"""
|
338 |
+
Aggregate evidence classifications to determine overall verdict
|
339 |
+
using a weighted scoring system of evidence count and quality.
|
340 |
+
|
341 |
+
Args:
|
342 |
+
classification_results (list): List of evidence classification results
|
343 |
+
|
344 |
+
Returns:
|
345 |
+
tuple: (verdict, confidence) - The final verdict and confidence score
|
346 |
+
"""
|
347 |
+
logger.info(f"Aggregating evidence from {len(classification_results) if classification_results else 0} results")
|
348 |
+
|
349 |
+
if not classification_results:
|
350 |
+
logger.warning("No classification results to aggregate")
|
351 |
+
return "Uncertain", 0.0 # Default with zero confidence
|
352 |
+
|
353 |
+
# Only consider support and contradict evidence items
|
354 |
+
support_items = [item for item in classification_results if item.get("label") == "support"]
|
355 |
+
contradict_items = [item for item in classification_results if item.get("label") == "contradict"]
|
356 |
+
|
357 |
+
# Count number of support and contradict items
|
358 |
+
support_count = len(support_items)
|
359 |
+
contradict_count = len(contradict_items)
|
360 |
+
|
361 |
+
# Calculate confidence scores for support and contradict items
|
362 |
+
support_confidence_sum = sum(item.get("confidence", 0) for item in support_items)
|
363 |
+
contradict_confidence_sum = sum(item.get("confidence", 0) for item in contradict_items)
|
364 |
+
|
365 |
+
# Apply weights: 55% for count, 45% for quality (confidence)
|
366 |
+
# Normalize counts to avoid division by zero
|
367 |
+
max_count = max(1, max(support_count, contradict_count))
|
368 |
+
|
369 |
+
# Calculate weighted scores
|
370 |
+
count_support_score = (support_count / max_count) * 0.55
|
371 |
+
count_contradict_score = (contradict_count / max_count) * 0.55
|
372 |
+
|
373 |
+
# Normalize confidence scores to avoid division by zero
|
374 |
+
max_confidence_sum = max(1, max(support_confidence_sum, contradict_confidence_sum))
|
375 |
+
|
376 |
+
quality_support_score = (support_confidence_sum / max_confidence_sum) * 0.45
|
377 |
+
quality_contradict_score = (contradict_confidence_sum / max_confidence_sum) * 0.45
|
378 |
+
|
379 |
+
# Total scores
|
380 |
+
total_support = count_support_score + quality_support_score
|
381 |
+
total_contradict = count_contradict_score + quality_contradict_score
|
382 |
+
|
383 |
+
# Check if all evidence is irrelevant/insufficient
|
384 |
+
if support_count == 0 and contradict_count == 0:
|
385 |
+
logger.info("All evidence items are irrelevant/insufficient")
|
386 |
+
return "Uncertain", 0.0
|
387 |
+
|
388 |
+
# Determine verdict based on higher total score
|
389 |
+
if total_support > total_contradict:
|
390 |
+
verdict = "True (Based on Evidence)"
|
391 |
+
min_score = total_contradict
|
392 |
+
max_score = total_support
|
393 |
+
else:
|
394 |
+
verdict = "False (Based on Evidence)"
|
395 |
+
min_score = total_support
|
396 |
+
max_score = total_contradict
|
397 |
+
|
398 |
+
# Calculate final confidence using the formula:
|
399 |
+
# (1 - min_score/max_score) * 100%
|
400 |
+
if max_score > 0:
|
401 |
+
final_confidence = 1.0 - (min_score / max_score)
|
402 |
+
else:
|
403 |
+
final_confidence = 0.0
|
404 |
+
|
405 |
+
# Handle cases where confidence is very low
|
406 |
+
if final_confidence == 0.0:
|
407 |
+
return "Uncertain", 0.0
|
408 |
+
elif final_confidence < 0.1: # Less than 10%
|
409 |
+
# Keep the verdict but with very low confidence
|
410 |
+
logger.info(f"Very low confidence verdict: {verdict} with {final_confidence:.2f} confidence")
|
411 |
+
|
412 |
+
logger.info(f"Final verdict: {verdict}, confidence: {final_confidence:.2f}")
|
413 |
+
|
414 |
+
return verdict, final_confidence
|
415 |
+
|
416 |
+
def extract_claim_keywords(claim):
|
417 |
+
"""
|
418 |
+
Extract important keywords from claim using NLP processing
|
419 |
+
|
420 |
+
Args:
|
421 |
+
claim (str): The claim text
|
422 |
+
|
423 |
+
Returns:
|
424 |
+
dict: Dictionary containing keywords and other claim components
|
425 |
+
"""
|
426 |
+
try:
|
427 |
+
# Get NLP model
|
428 |
+
nlp = get_nlp_model()
|
429 |
+
|
430 |
+
# Process claim with NLP
|
431 |
+
doc = nlp(claim)
|
432 |
+
|
433 |
+
# Extract entities
|
434 |
+
entities = [ent.text for ent in doc.ents]
|
435 |
+
|
436 |
+
# Extract important keywords (non-stopword nouns, adjectives, and verbs longer than 3 chars)
|
437 |
+
keywords = []
|
438 |
+
for token in doc:
|
439 |
+
# Keep all important parts of speech, longer than 3 characters
|
440 |
+
if token.pos_ in ["NOUN", "PROPN", "ADJ", "VERB"] and not token.is_stop and len(token.text) > 3:
|
441 |
+
keywords.append(token.text.lower())
|
442 |
+
# Also include some important modifiers and quantifiers
|
443 |
+
elif token.pos_ in ["NUM", "ADV"] and not token.is_stop and len(token.text) > 1:
|
444 |
+
keywords.append(token.text.lower())
|
445 |
+
|
446 |
+
# Extract verbs separately
|
447 |
+
verbs = [token.lemma_.lower() for token in doc if token.pos_ == "VERB" and not token.is_stop]
|
448 |
+
|
449 |
+
# Also extract multi-word phrases that might be important
|
450 |
+
noun_phrases = []
|
451 |
+
for chunk in doc.noun_chunks:
|
452 |
+
if len(chunk.text) > 3 and not all(token.is_stop for token in chunk):
|
453 |
+
noun_phrases.append(chunk.text.lower())
|
454 |
+
|
455 |
+
# Add phrases to keywords if not already included
|
456 |
+
for phrase in noun_phrases:
|
457 |
+
if phrase not in keywords and phrase.lower() not in [k.lower() for k in keywords]:
|
458 |
+
keywords.append(phrase.lower())
|
459 |
+
|
460 |
+
# Return all components
|
461 |
+
return {
|
462 |
+
"entities": entities,
|
463 |
+
"keywords": keywords,
|
464 |
+
"verbs": verbs,
|
465 |
+
"noun_phrases": noun_phrases
|
466 |
+
}
|
467 |
+
|
468 |
+
except Exception as e:
|
469 |
+
logger.error(f"Error extracting claim keywords: {e}")
|
470 |
+
# Return basic fallback using simple word extraction
|
471 |
+
words = [word.lower() for word in claim.split() if len(word) > 3]
|
472 |
+
return {"keywords": words, "entities": [], "verbs": [], "noun_phrases": []}
|
modules/evidence_retrieval.py
ADDED
@@ -0,0 +1,816 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Evidence retrieval module for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module provides functions for retrieving evidence from various sources,
|
5 |
+
analyzing relevance using entity extraction and verb matching, and
|
6 |
+
combining evidence to support fact-checking operations.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import logging
|
10 |
+
import time
|
11 |
+
import re
|
12 |
+
import random
|
13 |
+
import requests
|
14 |
+
import json
|
15 |
+
import ssl
|
16 |
+
from urllib.parse import urlencode
|
17 |
+
from bs4 import BeautifulSoup
|
18 |
+
from SPARQLWrapper import SPARQLWrapper, JSON
|
19 |
+
from datetime import datetime, timedelta
|
20 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
21 |
+
|
22 |
+
from utils.api_utils import api_error_handler, safe_json_parse
|
23 |
+
from utils.models import get_nlp_model
|
24 |
+
from modules.claim_extraction import shorten_claim_for_evidence
|
25 |
+
from modules.rss_feed import retrieve_evidence_from_rss
|
26 |
+
from config import NEWS_API_KEY, FACTCHECK_API_KEY
|
27 |
+
# Import the performance tracker
|
28 |
+
from utils.performance import PerformanceTracker
|
29 |
+
performance_tracker = PerformanceTracker()
|
30 |
+
|
31 |
+
logger = logging.getLogger("misinformation_detector")
|
32 |
+
|
33 |
+
def extract_claim_components(claim):
|
34 |
+
"""
|
35 |
+
Extract key components from a claim using NER and dependency parsing.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
claim (str): The claim text
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
dict: Dictionary containing entities, verbs, and important keywords
|
42 |
+
"""
|
43 |
+
if not claim:
|
44 |
+
return {"entities": [], "verbs": [], "keywords": []}
|
45 |
+
|
46 |
+
try:
|
47 |
+
# Get NLP model
|
48 |
+
nlp = get_nlp_model()
|
49 |
+
doc = nlp(claim)
|
50 |
+
|
51 |
+
# Extract named entities - keep original case for better matching
|
52 |
+
entities = []
|
53 |
+
for ent in doc.ents:
|
54 |
+
entities.append(ent.text)
|
55 |
+
|
56 |
+
# Also extract any capitalized words as potential entities not caught by NER
|
57 |
+
words = claim.split()
|
58 |
+
for word in words:
|
59 |
+
clean_word = word.strip('.,;:!?()[]{}""\'')
|
60 |
+
# Check if word starts with capital letter and isn't already in entities
|
61 |
+
if clean_word and clean_word[0].isupper() and clean_word not in entities:
|
62 |
+
entities.append(clean_word)
|
63 |
+
|
64 |
+
# Extract main verbs
|
65 |
+
verbs = []
|
66 |
+
for token in doc:
|
67 |
+
if token.pos_ == "VERB" and not token.is_stop:
|
68 |
+
# Get the lemma to handle different verb forms
|
69 |
+
verbs.append(token.lemma_.lower())
|
70 |
+
|
71 |
+
# Extract important keywords (non-stopword nouns, adjectives)
|
72 |
+
keywords = []
|
73 |
+
for token in doc:
|
74 |
+
if token.pos_ in ["NOUN", "ADJ"] and not token.is_stop and len(token.text) > 2:
|
75 |
+
keywords.append(token.text.lower())
|
76 |
+
|
77 |
+
# Extract temporal indicators
|
78 |
+
temporal_words = []
|
79 |
+
temporal_indicators = ["today", "yesterday", "recently", "just", "now",
|
80 |
+
"current", "currently", "latest", "new", "week",
|
81 |
+
"month", "year", "announces", "announced", "introduces",
|
82 |
+
"introduced", "launches", "launched", "releases",
|
83 |
+
"released", "rolls out", "rolled out", "presents", "presented", "unveils", "unveiled",
|
84 |
+
"starts", "started", "begins", "began", "initiates", "initiated", "anymore"
|
85 |
+
]
|
86 |
+
|
87 |
+
for token in doc:
|
88 |
+
if token.text.lower() in temporal_indicators:
|
89 |
+
temporal_words.append(token.text.lower())
|
90 |
+
|
91 |
+
return {
|
92 |
+
"entities": entities,
|
93 |
+
"verbs": verbs,
|
94 |
+
"keywords": keywords,
|
95 |
+
"temporal_words": temporal_words
|
96 |
+
}
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Error extracting claim components: {e}")
|
99 |
+
return {"entities": [], "verbs": [], "keywords": [], "temporal_words": []}
|
100 |
+
|
101 |
+
def analyze_evidence_relevance(evidence_items, claim_components):
|
102 |
+
"""
|
103 |
+
Analyze evidence relevance based on entity match, verb match and keyword match.
|
104 |
+
|
105 |
+
Args:
|
106 |
+
evidence_items (list): List of evidence text strings
|
107 |
+
claim_components (dict): Components extracted from the claim
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
list: List of (evidence, score) tuples sorted by relevance score
|
111 |
+
"""
|
112 |
+
if not evidence_items or not claim_components:
|
113 |
+
return []
|
114 |
+
|
115 |
+
scored_evidence = []
|
116 |
+
|
117 |
+
# Extract components for easier access
|
118 |
+
claim_entities = claim_components.get("entities", [])
|
119 |
+
claim_verbs = claim_components.get("verbs", [])
|
120 |
+
claim_keywords = claim_components.get("keywords", [])
|
121 |
+
|
122 |
+
for evidence in evidence_items:
|
123 |
+
if not isinstance(evidence, str):
|
124 |
+
continue
|
125 |
+
|
126 |
+
evidence_lower = evidence.lower()
|
127 |
+
|
128 |
+
# 1. Count entity matches - try both case-sensitive and case-insensitive matching
|
129 |
+
entity_matches = 0
|
130 |
+
for entity in claim_entities:
|
131 |
+
# Try exact match first (preserves case)
|
132 |
+
if entity in evidence:
|
133 |
+
entity_matches += 1
|
134 |
+
# Then try lowercase match
|
135 |
+
elif entity.lower() in evidence_lower:
|
136 |
+
entity_matches += 1
|
137 |
+
|
138 |
+
# 2. Count verb matches (always lowercase)
|
139 |
+
verb_matches = sum(1 for verb in claim_verbs if verb in evidence_lower)
|
140 |
+
|
141 |
+
# 3. Calculate entity and verb weighted score
|
142 |
+
entity_verb_score = (entity_matches * 3.0) + (verb_matches * 2.0)
|
143 |
+
|
144 |
+
# 4. Count keyword matches (always lowercase)
|
145 |
+
keyword_matches = sum(1 for keyword in claim_keywords if keyword in evidence_lower)
|
146 |
+
|
147 |
+
# 5. Determine final score based on entity and verb matches
|
148 |
+
if entity_verb_score > 0:
|
149 |
+
final_score = entity_verb_score
|
150 |
+
else:
|
151 |
+
final_score = keyword_matches * 1.0 # Use keyword matches if no entity/verb matches
|
152 |
+
|
153 |
+
scored_evidence.append((evidence, final_score))
|
154 |
+
|
155 |
+
# Sort by score (descending)
|
156 |
+
scored_evidence.sort(key=lambda x: x[1], reverse=True)
|
157 |
+
|
158 |
+
return scored_evidence
|
159 |
+
|
160 |
+
def get_recent_date_range(claim=None):
|
161 |
+
"""
|
162 |
+
Return date range for news filtering based on temporal indicators in the claim.
|
163 |
+
|
164 |
+
Args:
|
165 |
+
claim (str, optional): The claim text to analyze for temporal indicators
|
166 |
+
|
167 |
+
Returns:
|
168 |
+
tuple: (from_date, to_date) as formatted strings 'YYYY-MM-DD'
|
169 |
+
"""
|
170 |
+
today = datetime.now()
|
171 |
+
|
172 |
+
# Default to 3 days for no claim or claims without temporal indicators
|
173 |
+
default_days = 3
|
174 |
+
extended_days = 15 # For 'recently', 'this week', etc.
|
175 |
+
|
176 |
+
if claim:
|
177 |
+
# Specific day indicators get 3 days
|
178 |
+
specific_day_terms = ["today", "yesterday", "day before yesterday"]
|
179 |
+
|
180 |
+
# Extended time terms get 15 days
|
181 |
+
extended_time_terms = [
|
182 |
+
"recently", "currently", "freshly", "this week", "few days",
|
183 |
+
"couple of days", "last week", "past week", "several days",
|
184 |
+
"anymore"
|
185 |
+
]
|
186 |
+
|
187 |
+
claim_lower = claim.lower()
|
188 |
+
|
189 |
+
# Check for extended time terms first, then specific day terms
|
190 |
+
if any(term in claim_lower for term in extended_time_terms):
|
191 |
+
from_date = (today - timedelta(days=extended_days)).strftime('%Y-%m-%d')
|
192 |
+
to_date = today.strftime('%Y-%m-%d')
|
193 |
+
logger.info(f"Using extended time range of {extended_days} days based on temporal indicators")
|
194 |
+
return from_date, to_date
|
195 |
+
elif any(term in claim_lower for term in specific_day_terms):
|
196 |
+
from_date = (today - timedelta(days=default_days)).strftime('%Y-%m-%d')
|
197 |
+
to_date = today.strftime('%Y-%m-%d')
|
198 |
+
logger.info(f"Using specific day range of {default_days} days based on temporal indicators")
|
199 |
+
return from_date, to_date
|
200 |
+
|
201 |
+
# Default case - use standard 3-day window
|
202 |
+
from_date = (today - timedelta(days=default_days)).strftime('%Y-%m-%d')
|
203 |
+
to_date = today.strftime('%Y-%m-%d')
|
204 |
+
return from_date, to_date
|
205 |
+
|
206 |
+
@api_error_handler("wikipedia")
|
207 |
+
def retrieve_evidence_from_wikipedia(claim):
|
208 |
+
"""Retrieve evidence from Wikipedia for a given claim"""
|
209 |
+
logger.info(f"Retrieving evidence from Wikipedia for: {claim}")
|
210 |
+
|
211 |
+
# Ensure shortened_claim is a string
|
212 |
+
try:
|
213 |
+
shortened_claim = shorten_claim_for_evidence(claim)
|
214 |
+
except Exception as e:
|
215 |
+
logger.error(f"Error in claim shortening: {e}")
|
216 |
+
shortened_claim = claim # Fallback to original claim
|
217 |
+
|
218 |
+
# Ensure query_parts is a list of strings
|
219 |
+
query_parts = str(shortened_claim).split()
|
220 |
+
evidence = []
|
221 |
+
source_count = {"wikipedia": 0}
|
222 |
+
|
223 |
+
for i in range(len(query_parts), 0, -1): # Start with full query, shorten iteratively
|
224 |
+
try:
|
225 |
+
# Safely join and encode query
|
226 |
+
current_query = "+".join(query_parts[:i])
|
227 |
+
search_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={current_query}&format=json"
|
228 |
+
logger.info(f"Wikipedia search URL: {search_url}")
|
229 |
+
|
230 |
+
headers = {
|
231 |
+
"User-Agent": "MisinformationDetectionResearchBot/1.0 (Research Project)"
|
232 |
+
}
|
233 |
+
|
234 |
+
# Make the search request with reduced timeout
|
235 |
+
response = requests.get(search_url, headers=headers, timeout=7)
|
236 |
+
response.raise_for_status()
|
237 |
+
|
238 |
+
# Safely parse JSON
|
239 |
+
search_data = safe_json_parse(response, "wikipedia")
|
240 |
+
|
241 |
+
# Safely extract search results
|
242 |
+
search_results = search_data.get("query", {}).get("search", [])
|
243 |
+
|
244 |
+
# Ensure search_results is a list
|
245 |
+
if not isinstance(search_results, list):
|
246 |
+
logger.warning(f"Unexpected search results type: {type(search_results)}")
|
247 |
+
search_results = []
|
248 |
+
|
249 |
+
# Use ThreadPoolExecutor to fetch page content in parallel
|
250 |
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
251 |
+
# Submit up to 3 page requests in parallel
|
252 |
+
futures = []
|
253 |
+
for idx, result in enumerate(search_results[:3]):
|
254 |
+
# Ensure result is a dictionary
|
255 |
+
if not isinstance(result, dict):
|
256 |
+
logger.warning(f"Skipping non-dictionary result: {type(result)}")
|
257 |
+
continue
|
258 |
+
|
259 |
+
# Safely extract title
|
260 |
+
page_title = result.get("title", "")
|
261 |
+
if not page_title:
|
262 |
+
continue
|
263 |
+
|
264 |
+
page_url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
|
265 |
+
|
266 |
+
# Submit the page request task to executor
|
267 |
+
futures.append(executor.submit(
|
268 |
+
fetch_wikipedia_page_content,
|
269 |
+
page_url,
|
270 |
+
page_title,
|
271 |
+
headers
|
272 |
+
))
|
273 |
+
|
274 |
+
# Process completed futures as they finish
|
275 |
+
for future in as_completed(futures):
|
276 |
+
try:
|
277 |
+
page_result = future.result()
|
278 |
+
if page_result:
|
279 |
+
evidence.append(page_result)
|
280 |
+
source_count["wikipedia"] += 1
|
281 |
+
except Exception as e:
|
282 |
+
logger.error(f"Error processing Wikipedia page: {e}")
|
283 |
+
|
284 |
+
# Stop if we found any evidence
|
285 |
+
if evidence:
|
286 |
+
break
|
287 |
+
|
288 |
+
except Exception as e:
|
289 |
+
logger.error(f"Error retrieving from Wikipedia: {str(e)}")
|
290 |
+
continue
|
291 |
+
|
292 |
+
# Ensure success is a boolean
|
293 |
+
success = bool(evidence)
|
294 |
+
|
295 |
+
# Safely log evidence retrieval
|
296 |
+
try:
|
297 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
298 |
+
except Exception as e:
|
299 |
+
logger.error(f"Error logging evidence retrieval: {e}")
|
300 |
+
|
301 |
+
if not evidence:
|
302 |
+
logger.warning("No evidence found from Wikipedia.")
|
303 |
+
|
304 |
+
return evidence
|
305 |
+
|
306 |
+
def fetch_wikipedia_page_content(page_url, page_title, headers):
|
307 |
+
"""Helper function to fetch and parse Wikipedia page content"""
|
308 |
+
try:
|
309 |
+
# Get page content with reduced timeout
|
310 |
+
page_response = requests.get(page_url, headers=headers, timeout=5)
|
311 |
+
page_response.raise_for_status()
|
312 |
+
|
313 |
+
# Extract relevant sections using BeautifulSoup
|
314 |
+
soup = BeautifulSoup(page_response.text, 'html.parser')
|
315 |
+
paragraphs = soup.find_all('p', limit=3) # Limit to first 3 paragraphs
|
316 |
+
content = " ".join([para.get_text(strip=True) for para in paragraphs])
|
317 |
+
|
318 |
+
# Truncate content to reduce token usage earlier in the pipeline
|
319 |
+
if len(content) > 1000:
|
320 |
+
content = content[:1000] + "..."
|
321 |
+
|
322 |
+
if content.strip(): # Ensure content is not empty
|
323 |
+
return f"Title: {page_title}, URL: {page_url}, Content: {content}"
|
324 |
+
return None
|
325 |
+
except Exception as e:
|
326 |
+
logger.error(f"Error fetching Wikipedia page {page_url}: {e}")
|
327 |
+
return None
|
328 |
+
|
329 |
+
@api_error_handler("wikidata")
|
330 |
+
def retrieve_evidence_from_wikidata(claim):
|
331 |
+
"""Retrieve evidence from Wikidata for a given claim"""
|
332 |
+
logger.info(f"Retrieving evidence from Wikidata for: {claim}")
|
333 |
+
|
334 |
+
# Prepare entities for SPARQL query
|
335 |
+
shortened_claim = shorten_claim_for_evidence(claim)
|
336 |
+
query_terms = shortened_claim.split()
|
337 |
+
|
338 |
+
# Initialize SPARQLWrapper for Wikidata
|
339 |
+
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
|
340 |
+
|
341 |
+
# Use a more conservative user agent to avoid blocks
|
342 |
+
sparql.addCustomHttpHeader("User-Agent", "MisinformationDetectionResearchBot/1.0")
|
343 |
+
|
344 |
+
# Fix SSL issues by disabling SSL verification for this specific request
|
345 |
+
try:
|
346 |
+
# Create a context where we don't verify SSL certs
|
347 |
+
import ssl
|
348 |
+
import urllib.request
|
349 |
+
|
350 |
+
# Create a context that doesn't verify certificates
|
351 |
+
ssl_context = ssl._create_unverified_context()
|
352 |
+
|
353 |
+
# Monkey patch the opener for SPARQLWrapper
|
354 |
+
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
355 |
+
urllib.request.install_opener(opener)
|
356 |
+
except Exception as e:
|
357 |
+
logger.error(f"Error setting up SSL context: {str(e)}")
|
358 |
+
|
359 |
+
# Construct basic SPARQL query for relevant entities
|
360 |
+
query = """
|
361 |
+
SELECT ?item ?itemLabel ?description ?article WHERE {
|
362 |
+
SERVICE wikibase:mwapi {
|
363 |
+
bd:serviceParam wikibase:api "EntitySearch" .
|
364 |
+
bd:serviceParam wikibase:endpoint "www.wikidata.org" .
|
365 |
+
bd:serviceParam mwapi:search "%s" .
|
366 |
+
bd:serviceParam mwapi:language "en" .
|
367 |
+
?item wikibase:apiOutputItem mwapi:item .
|
368 |
+
}
|
369 |
+
?item schema:description ?description .
|
370 |
+
FILTER(LANG(?description) = "en")
|
371 |
+
OPTIONAL {
|
372 |
+
?article schema:about ?item .
|
373 |
+
?article schema:isPartOf <https://en.wikipedia.org/> .
|
374 |
+
}
|
375 |
+
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
|
376 |
+
}
|
377 |
+
LIMIT 5
|
378 |
+
""" % " ".join(query_terms)
|
379 |
+
|
380 |
+
sparql.setQuery(query)
|
381 |
+
sparql.setReturnFormat(JSON)
|
382 |
+
|
383 |
+
try:
|
384 |
+
results = sparql.query().convert()
|
385 |
+
|
386 |
+
wikidata_evidence = []
|
387 |
+
|
388 |
+
for result in results["results"]["bindings"]:
|
389 |
+
entity_label = result.get("itemLabel", {}).get("value", "Unknown")
|
390 |
+
description = result.get("description", {}).get("value", "No description")
|
391 |
+
article_url = result.get("article", {}).get("value", "")
|
392 |
+
|
393 |
+
# Truncate description to reduce token usage
|
394 |
+
if len(description) > 1000:
|
395 |
+
description = description[:1000] + "..."
|
396 |
+
|
397 |
+
evidence_text = f"Entity: {entity_label}, Description: {description}"
|
398 |
+
if article_url:
|
399 |
+
evidence_text += f", URL: {article_url}"
|
400 |
+
|
401 |
+
wikidata_evidence.append(evidence_text)
|
402 |
+
|
403 |
+
logger.info(f"Retrieved {len(wikidata_evidence)} Wikidata entities")
|
404 |
+
return wikidata_evidence
|
405 |
+
|
406 |
+
except Exception as e:
|
407 |
+
logger.error(f"Error retrieving from Wikidata: {str(e)}")
|
408 |
+
return []
|
409 |
+
|
410 |
+
@api_error_handler("openalex")
|
411 |
+
def retrieve_evidence_from_openalex(claim):
|
412 |
+
"""Retrieve evidence from OpenAlex for a given claim (replacement for Semantic Scholar)"""
|
413 |
+
logger.info(f"Retrieving evidence from OpenAlex for: {claim}")
|
414 |
+
|
415 |
+
try:
|
416 |
+
shortened_claim = shorten_claim_for_evidence(claim)
|
417 |
+
query = shortened_claim.replace(" ", "+")
|
418 |
+
|
419 |
+
# OpenAlex API endpoint
|
420 |
+
api_url = f"https://api.openalex.org/works?search={query}&filter=is_paratext:false&per_page=3"
|
421 |
+
|
422 |
+
headers = {
|
423 |
+
"Accept": "application/json",
|
424 |
+
"User-Agent": "MisinformationDetectionResearchBot/1.0 ([email protected])",
|
425 |
+
}
|
426 |
+
|
427 |
+
scholarly_evidence = []
|
428 |
+
|
429 |
+
try:
|
430 |
+
# Request with reduced timeout
|
431 |
+
response = requests.get(api_url, headers=headers, timeout=8)
|
432 |
+
|
433 |
+
# Check response status
|
434 |
+
if response.status_code == 200:
|
435 |
+
# Successfully retrieved data
|
436 |
+
data = safe_json_parse(response, "openalex")
|
437 |
+
papers = data.get("results", [])
|
438 |
+
|
439 |
+
for paper in papers:
|
440 |
+
title = paper.get("title", "Unknown Title")
|
441 |
+
abstract = paper.get("abstract_inverted_index", None)
|
442 |
+
|
443 |
+
# OpenAlex stores abstracts in an inverted index format, so we need to reconstruct it
|
444 |
+
abstract_text = "No abstract available"
|
445 |
+
if abstract:
|
446 |
+
try:
|
447 |
+
# Simple approach to reconstruct from inverted index
|
448 |
+
# For a production app, implement a proper reconstruction algorithm
|
449 |
+
words = list(abstract.keys())
|
450 |
+
abstract_text = " ".join(words[:30]) + "..."
|
451 |
+
except Exception as e:
|
452 |
+
logger.error(f"Error reconstructing abstract: {e}")
|
453 |
+
|
454 |
+
url = paper.get("doi", "")
|
455 |
+
if url and not url.startswith("http"):
|
456 |
+
url = f"https://doi.org/{url}"
|
457 |
+
|
458 |
+
year = ""
|
459 |
+
publication_date = paper.get("publication_date", "")
|
460 |
+
if publication_date:
|
461 |
+
year = publication_date.split("-")[0]
|
462 |
+
|
463 |
+
# Truncate abstract to reasonable length
|
464 |
+
if len(abstract_text) > 1000:
|
465 |
+
abstract_text = abstract_text[:1000] + "..."
|
466 |
+
|
467 |
+
evidence_text = f"Title: {title}, Year: {year}, Abstract: {abstract_text}, URL: {url}"
|
468 |
+
scholarly_evidence.append(evidence_text)
|
469 |
+
|
470 |
+
else:
|
471 |
+
logger.error(f"OpenAlex API error: {response.status_code}")
|
472 |
+
|
473 |
+
except requests.exceptions.Timeout:
|
474 |
+
logger.warning("OpenAlex request timed out")
|
475 |
+
except requests.exceptions.ConnectionError:
|
476 |
+
logger.warning("OpenAlex connection error")
|
477 |
+
except Exception as e:
|
478 |
+
logger.error(f"Unexpected error in OpenAlex request: {str(e)}")
|
479 |
+
|
480 |
+
logger.info(f"Retrieved {len(scholarly_evidence)} scholarly papers from OpenAlex")
|
481 |
+
return scholarly_evidence
|
482 |
+
|
483 |
+
except Exception as e:
|
484 |
+
logger.error(f"Fatal error in OpenAlex retrieval: {str(e)}")
|
485 |
+
return []
|
486 |
+
|
487 |
+
@api_error_handler("factcheck")
|
488 |
+
def retrieve_evidence_from_factcheck(claim):
|
489 |
+
"""Retrieve evidence from Google's Fact Check Tools API for a given claim"""
|
490 |
+
logger.info(f"Retrieving evidence from Google's Fact Check Tools API for: {claim}")
|
491 |
+
factcheck_api_key = FACTCHECK_API_KEY
|
492 |
+
|
493 |
+
# Safely shorten claim
|
494 |
+
try:
|
495 |
+
shortened_claim = shorten_claim_for_evidence(claim)
|
496 |
+
except Exception as e:
|
497 |
+
logger.error(f"Error shortening claim: {e}")
|
498 |
+
shortened_claim = claim
|
499 |
+
|
500 |
+
query_parts = str(shortened_claim).split()
|
501 |
+
factcheck_results = []
|
502 |
+
source_count = {"factcheck": 0}
|
503 |
+
|
504 |
+
for i in range(len(query_parts), 0, -1): # Iteratively try shorter queries
|
505 |
+
try:
|
506 |
+
current_query = " ".join(query_parts[:i])
|
507 |
+
encoded_query = urlencode({"query": current_query})
|
508 |
+
factcheck_url = f"https://factchecktools.googleapis.com/v1alpha1/claims:search?{encoded_query}&key={factcheck_api_key}"
|
509 |
+
logger.info(f"Factcheck URL: {factcheck_url}")
|
510 |
+
|
511 |
+
# Make request with reduced timeout
|
512 |
+
response = requests.get(factcheck_url, timeout=7)
|
513 |
+
response.raise_for_status()
|
514 |
+
data = safe_json_parse(response, "factcheck")
|
515 |
+
|
516 |
+
# Safely extract claims
|
517 |
+
claims = data.get("claims", [])
|
518 |
+
if not isinstance(claims, list):
|
519 |
+
logger.warning(f"Unexpected claims type: {type(claims)}")
|
520 |
+
claims = []
|
521 |
+
|
522 |
+
if claims: # If results found
|
523 |
+
logger.info(f"Results found for query '{current_query}'.")
|
524 |
+
for item in claims:
|
525 |
+
try:
|
526 |
+
# Ensure item is a dictionary
|
527 |
+
if not isinstance(item, dict):
|
528 |
+
logger.warning(f"Skipping non-dictionary item: {type(item)}")
|
529 |
+
continue
|
530 |
+
|
531 |
+
claim_text = str(item.get("text", ""))
|
532 |
+
# Truncate claim text
|
533 |
+
if len(claim_text) > 1000:
|
534 |
+
claim_text = claim_text[:1000] + "..."
|
535 |
+
|
536 |
+
reviews = item.get("claimReview", [])
|
537 |
+
|
538 |
+
# Ensure reviews is a list
|
539 |
+
if not isinstance(reviews, list):
|
540 |
+
logger.warning(f"Unexpected reviews type: {type(reviews)}")
|
541 |
+
reviews = []
|
542 |
+
|
543 |
+
for review in reviews:
|
544 |
+
# Ensure review is a dictionary
|
545 |
+
if not isinstance(review, dict):
|
546 |
+
logger.warning(f"Skipping non-dictionary review: {type(review)}")
|
547 |
+
continue
|
548 |
+
|
549 |
+
publisher = str(review.get("publisher", {}).get("name", "Unknown Source"))
|
550 |
+
rating = str(review.get("textualRating", "Unknown"))
|
551 |
+
review_url = str(review.get("url", ""))
|
552 |
+
|
553 |
+
if claim_text:
|
554 |
+
factcheck_results.append(
|
555 |
+
f"Claim: {claim_text}, Rating: {rating}, " +
|
556 |
+
f"Source: {publisher}, URL: {review_url}"
|
557 |
+
)
|
558 |
+
source_count["factcheck"] += 1
|
559 |
+
|
560 |
+
except Exception as e:
|
561 |
+
logger.error(f"Error processing FactCheck result: {e}")
|
562 |
+
|
563 |
+
break # Break once we have results
|
564 |
+
else:
|
565 |
+
logger.info(f"No results for query '{current_query}', trying shorter version.")
|
566 |
+
|
567 |
+
except Exception as e:
|
568 |
+
logger.error(f"Error in FactCheck retrieval: {e}")
|
569 |
+
|
570 |
+
# Safely log evidence retrieval
|
571 |
+
try:
|
572 |
+
success = bool(factcheck_results)
|
573 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
574 |
+
except Exception as e:
|
575 |
+
logger.error(f"Error logging evidence retrieval: {e}")
|
576 |
+
|
577 |
+
if not factcheck_results:
|
578 |
+
logger.warning("No factcheck evidence found after trying all query variants.")
|
579 |
+
|
580 |
+
return factcheck_results
|
581 |
+
|
582 |
+
@api_error_handler("newsapi")
|
583 |
+
def retrieve_news_articles(claim, requires_recent=False):
|
584 |
+
"""Retrieve evidence from News API for a given claim with improved single request approach"""
|
585 |
+
logger.info(f"Retrieving evidence from News API for: {claim}")
|
586 |
+
|
587 |
+
# Get API key
|
588 |
+
news_api_key = NEWS_API_KEY
|
589 |
+
if not news_api_key:
|
590 |
+
logger.error("No News API key available")
|
591 |
+
return []
|
592 |
+
|
593 |
+
news_results = []
|
594 |
+
source_count = {"news": 0}
|
595 |
+
|
596 |
+
# Get date range for recent news
|
597 |
+
from_date, to_date = get_recent_date_range()
|
598 |
+
logger.info(f"Filtering for news from {from_date} to {to_date}")
|
599 |
+
|
600 |
+
try:
|
601 |
+
# Extract a simplified claim for better matching
|
602 |
+
shortened_claim = shorten_claim_for_evidence(claim)
|
603 |
+
|
604 |
+
# Use a single endpoint with proper parameters
|
605 |
+
encoded_query = urlencode({"q": shortened_claim})
|
606 |
+
|
607 |
+
# Use the 'everything' endpoint as it's more comprehensive
|
608 |
+
news_api_url = f"https://newsapi.org/v2/everything?{encoded_query}&apiKey={news_api_key}&language=en&pageSize=5&sortBy=publishedAt"
|
609 |
+
|
610 |
+
# Only apply date filtering if the claim requires recency
|
611 |
+
if requires_recent:
|
612 |
+
news_api_url += f"&from={from_date}&to={to_date}"
|
613 |
+
|
614 |
+
log_url = news_api_url.replace(news_api_key, "API_KEY_REDACTED")
|
615 |
+
logger.info(f"Requesting: {log_url}")
|
616 |
+
|
617 |
+
# Make a single request with proper headers and reduced timeout
|
618 |
+
headers = {
|
619 |
+
"User-Agent": "MisinformationDetectionResearchBot/1.0",
|
620 |
+
"X-Api-Key": news_api_key,
|
621 |
+
"Accept": "application/json"
|
622 |
+
}
|
623 |
+
|
624 |
+
response = requests.get(
|
625 |
+
news_api_url,
|
626 |
+
headers=headers,
|
627 |
+
timeout=8
|
628 |
+
)
|
629 |
+
|
630 |
+
logger.info(f"Response status: {response.status_code}")
|
631 |
+
|
632 |
+
if response.status_code == 200:
|
633 |
+
data = safe_json_parse(response, "newsapi")
|
634 |
+
|
635 |
+
if data.get("status") == "ok":
|
636 |
+
articles = data.get("articles", [])
|
637 |
+
logger.info(f"Found {len(articles)} articles")
|
638 |
+
|
639 |
+
for article in articles:
|
640 |
+
try:
|
641 |
+
# Robust article parsing
|
642 |
+
title = str(article.get("title", ""))
|
643 |
+
description = str(article.get("description", ""))
|
644 |
+
content = str(article.get("content", ""))
|
645 |
+
source_name = str(article.get("source", {}).get("name", "Unknown"))
|
646 |
+
url = str(article.get("url", ""))
|
647 |
+
published_at = str(article.get("publishedAt", ""))
|
648 |
+
|
649 |
+
# Parse date to prioritize recent content
|
650 |
+
article_date = None
|
651 |
+
try:
|
652 |
+
if published_at:
|
653 |
+
article_date = datetime.strptime(published_at.split('T')[0], '%Y-%m-%d')
|
654 |
+
except Exception as date_error:
|
655 |
+
logger.warning(f"Could not parse date: {published_at}")
|
656 |
+
|
657 |
+
# Calculate recency score (higher = more recent)
|
658 |
+
recency_score = 1.0 # Default
|
659 |
+
if article_date:
|
660 |
+
days_old = (datetime.now() - article_date).days
|
661 |
+
if days_old == 0: # Today
|
662 |
+
recency_score = 3.0
|
663 |
+
elif days_old == 1: # Yesterday
|
664 |
+
recency_score = 2.0
|
665 |
+
|
666 |
+
# Use description if content is empty or too short
|
667 |
+
if not content or len(content) < 50:
|
668 |
+
content = description
|
669 |
+
|
670 |
+
# Truncate content to reduce token usage
|
671 |
+
if len(content) > 1000:
|
672 |
+
content = content[:1000] + "..."
|
673 |
+
|
674 |
+
# Ensure meaningful content
|
675 |
+
if title and (content or description):
|
676 |
+
news_item = {
|
677 |
+
"text": (
|
678 |
+
f"Title: {title}, " +
|
679 |
+
f"Source: {source_name}, " +
|
680 |
+
f"Date: {published_at}, " +
|
681 |
+
f"URL: {url}, " +
|
682 |
+
f"Content: {content}"
|
683 |
+
),
|
684 |
+
"recency_score": recency_score,
|
685 |
+
"date": article_date
|
686 |
+
}
|
687 |
+
news_results.append(news_item)
|
688 |
+
source_count["news"] += 1
|
689 |
+
logger.info(f"Added article: {title}")
|
690 |
+
|
691 |
+
except Exception as article_error:
|
692 |
+
logger.error(f"Error processing article: {article_error}")
|
693 |
+
|
694 |
+
# Sort results by recency
|
695 |
+
if news_results:
|
696 |
+
news_results.sort(key=lambda x: x.get('recency_score', 0), reverse=True)
|
697 |
+
|
698 |
+
except Exception as query_error:
|
699 |
+
logger.error(f"Error processing query: {query_error}")
|
700 |
+
|
701 |
+
# Convert to plain text list for compatibility with existing code
|
702 |
+
news_texts = [item["text"] for item in news_results]
|
703 |
+
|
704 |
+
# Log evidence retrieval
|
705 |
+
try:
|
706 |
+
success = bool(news_texts)
|
707 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
708 |
+
except Exception as log_error:
|
709 |
+
logger.error(f"Error logging evidence retrieval: {log_error}")
|
710 |
+
|
711 |
+
# Log results
|
712 |
+
if news_texts:
|
713 |
+
logger.info(f"Retrieved {len(news_texts)} news articles")
|
714 |
+
else:
|
715 |
+
logger.warning("No news articles found")
|
716 |
+
|
717 |
+
return news_texts
|
718 |
+
|
719 |
+
def retrieve_combined_evidence(claim):
|
720 |
+
"""
|
721 |
+
Retrieve evidence from multiple sources in parallel and analyze relevance.
|
722 |
+
|
723 |
+
This function:
|
724 |
+
1. Extracts claim components (entities, verbs, keywords)
|
725 |
+
2. Determines if the claim is temporal
|
726 |
+
3. Retrieves evidence from all sources in parallel
|
727 |
+
4. Analyzes relevance based on entity and verb matching
|
728 |
+
5. Returns the most relevant evidence items for claim verification
|
729 |
+
|
730 |
+
Args:
|
731 |
+
claim (str): The factual claim to gather evidence for
|
732 |
+
|
733 |
+
Returns:
|
734 |
+
list: List of the most relevant evidence items (max 5) for claim verification
|
735 |
+
"""
|
736 |
+
logger.info(f"Starting evidence retrieval for: {claim}")
|
737 |
+
start_time = time.time()
|
738 |
+
|
739 |
+
# Use the category detector to identify the claim category
|
740 |
+
from modules.category_detection import get_category_specific_rss_feeds, get_fallback_category, detect_claim_category
|
741 |
+
|
742 |
+
# Extract key claim components for relevance matching
|
743 |
+
claim_components = extract_claim_components(claim)
|
744 |
+
logger.info(f"Extracted claim components: entities={claim_components['entities']}, verbs={claim_components['verbs']}")
|
745 |
+
|
746 |
+
# Determine if claim has temporal attributes
|
747 |
+
requires_recent_evidence = bool(claim_components.get("temporal_words", []))
|
748 |
+
logger.info(f"Claim requires recent evidence: {requires_recent_evidence}")
|
749 |
+
|
750 |
+
# Determine the claim category
|
751 |
+
category, confidence = detect_claim_category(claim)
|
752 |
+
logger.info(f"Detected claim category: {category} (confidence: {confidence:.2f})")
|
753 |
+
|
754 |
+
# Initialize results container
|
755 |
+
all_evidence = []
|
756 |
+
source_counts = {}
|
757 |
+
|
758 |
+
# Define all evidence sources to query in parallel
|
759 |
+
evidence_sources = [
|
760 |
+
("wikipedia", retrieve_evidence_from_wikipedia, [claim]),
|
761 |
+
("wikidata", retrieve_evidence_from_wikidata, [claim]),
|
762 |
+
("scholarly", retrieve_evidence_from_openalex, [claim]),
|
763 |
+
("claimreview", retrieve_evidence_from_factcheck, [claim]),
|
764 |
+
("news", retrieve_news_articles, [claim, requires_recent_evidence])
|
765 |
+
]
|
766 |
+
|
767 |
+
# Add RSS feeds based on category with appropriate fallback
|
768 |
+
if category == "ai":
|
769 |
+
# For AI category, add AI-specific RSS feeds
|
770 |
+
category_feeds = get_category_specific_rss_feeds(category)
|
771 |
+
evidence_sources.append(("rss_ai", retrieve_evidence_from_rss, [claim, 10, category_feeds]))
|
772 |
+
|
773 |
+
# Add technology fallback feeds for AI
|
774 |
+
fallback_category = get_fallback_category(category) # Should be "technology"
|
775 |
+
if fallback_category:
|
776 |
+
fallback_feeds = get_category_specific_rss_feeds(fallback_category)
|
777 |
+
evidence_sources.append(("rss_tech", retrieve_evidence_from_rss, [claim, 10, fallback_feeds]))
|
778 |
+
else:
|
779 |
+
# For other categories, add their specific RSS feeds
|
780 |
+
category_feeds = get_category_specific_rss_feeds(category)
|
781 |
+
if category_feeds:
|
782 |
+
evidence_sources.append(("rss_category", retrieve_evidence_from_rss, [claim, 10, category_feeds]))
|
783 |
+
|
784 |
+
# Add default RSS feeds as fallback for all non-AI categories
|
785 |
+
evidence_sources.append(("rss_default", retrieve_evidence_from_rss, [claim, 10]))
|
786 |
+
|
787 |
+
# Execute all evidence gathering in parallel
|
788 |
+
with ThreadPoolExecutor(max_workers=len(evidence_sources)) as executor:
|
789 |
+
# Create a mapping of futures to source names for easier tracking
|
790 |
+
futures = {}
|
791 |
+
for source_name, func, args in evidence_sources:
|
792 |
+
future = executor.submit(func, *args)
|
793 |
+
futures[future] = source_name
|
794 |
+
|
795 |
+
# Process results as they complete
|
796 |
+
for future in as_completed(futures):
|
797 |
+
source_name = futures[future]
|
798 |
+
try:
|
799 |
+
evidence_items = future.result()
|
800 |
+
if evidence_items:
|
801 |
+
all_evidence.extend(evidence_items)
|
802 |
+
source_counts[source_name] = len(evidence_items)
|
803 |
+
logger.info(f"Retrieved {len(evidence_items)} items from {source_name}")
|
804 |
+
except Exception as e:
|
805 |
+
logger.error(f"Error retrieving from {source_name}: {str(e)}")
|
806 |
+
|
807 |
+
# If no evidence was found at all, create a minimal placeholder
|
808 |
+
if not all_evidence:
|
809 |
+
logger.warning("No evidence found from any source")
|
810 |
+
return [f"No specific evidence found for the claim: '{claim}'. This may be due to the claim being very recent, niche, or involving private information."]
|
811 |
+
|
812 |
+
# Analyze evidence relevance
|
813 |
+
scored_evidence = analyze_evidence_relevance(all_evidence, claim_components)
|
814 |
+
|
815 |
+
# Return top 10 most relevant evidence items
|
816 |
+
return [evidence for evidence, score in scored_evidence[:10]]
|
modules/explanation.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
+
import ast
|
4 |
+
from utils.models import get_llm_model
|
5 |
+
|
6 |
+
logger = logging.getLogger("misinformation_detector")
|
7 |
+
|
8 |
+
def extract_most_relevant_evidence(evidence_results):
|
9 |
+
"""
|
10 |
+
Intelligently extract the most relevant piece of evidence
|
11 |
+
|
12 |
+
Args:
|
13 |
+
evidence_results (list): List of evidence items
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
str: Most relevant evidence piece
|
17 |
+
"""
|
18 |
+
if not evidence_results:
|
19 |
+
return None
|
20 |
+
|
21 |
+
# If evidence is a dictionary with 'evidence' key
|
22 |
+
if isinstance(evidence_results[0], dict):
|
23 |
+
# Sort by confidence if available
|
24 |
+
sorted_evidence = sorted(
|
25 |
+
evidence_results,
|
26 |
+
key=lambda x: x.get('confidence', 0),
|
27 |
+
reverse=True
|
28 |
+
)
|
29 |
+
|
30 |
+
# Return the evidence from the highest confidence item
|
31 |
+
for item in sorted_evidence:
|
32 |
+
evidence = item.get('evidence')
|
33 |
+
if evidence:
|
34 |
+
return evidence
|
35 |
+
|
36 |
+
# If plain list of evidence
|
37 |
+
return next((ev for ev in evidence_results if ev and isinstance(ev, str)), None)
|
38 |
+
|
39 |
+
def generate_explanation(claim, evidence_results, truth_label, confidence=None):
|
40 |
+
"""
|
41 |
+
Generate an explanation for the claim's classification based on evidence.
|
42 |
+
|
43 |
+
This function creates a human-readable explanation of why a claim was classified
|
44 |
+
as true, false, or uncertain. It handles different truth label formats through
|
45 |
+
normalization and provides robust fallback mechanisms for error cases.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
claim (str): The original factual claim being verified
|
49 |
+
evidence_results (list/str): Evidence supporting the classification, can be
|
50 |
+
a list of evidence items or structured results
|
51 |
+
truth_label (str): Classification of the claim (True/False/Uncertain),
|
52 |
+
which may come in various formats
|
53 |
+
confidence (float, optional): Confidence level between 0 and 1
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
str: Natural language explanation of the verdict with appropriate
|
57 |
+
confidence framing and evidence citations
|
58 |
+
"""
|
59 |
+
logger.info(f"Generating explanation for claim with verdict: {truth_label}")
|
60 |
+
|
61 |
+
try:
|
62 |
+
# Normalize truth_label to handle different formats consistently
|
63 |
+
normalized_label = normalize_truth_label(truth_label)
|
64 |
+
|
65 |
+
# Normalize evidence_results to a list
|
66 |
+
if not isinstance(evidence_results, list):
|
67 |
+
try:
|
68 |
+
evidence_results = ast.literal_eval(str(evidence_results)) if evidence_results else []
|
69 |
+
except:
|
70 |
+
evidence_results = [evidence_results] if evidence_results else []
|
71 |
+
|
72 |
+
# Get the LLM model
|
73 |
+
explanation_model = get_llm_model()
|
74 |
+
|
75 |
+
# Extract most relevant evidence
|
76 |
+
most_relevant_evidence = extract_most_relevant_evidence(evidence_results)
|
77 |
+
|
78 |
+
# Prepare evidence text for prompt
|
79 |
+
evidence_text = "\n".join([
|
80 |
+
f"Evidence {i+1}: {str(ev)[:200] + '...' if len(str(ev)) > 200 else str(ev)}"
|
81 |
+
for i, ev in enumerate(evidence_results[:5])
|
82 |
+
])
|
83 |
+
|
84 |
+
# Filter only supporting and contradicting evidence for clarity
|
85 |
+
support_items = [item for item in evidence_results if isinstance(item, dict) and item.get("label") == "support"]
|
86 |
+
contradict_items = [item for item in evidence_results if isinstance(item, dict) and item.get("label") == "contradict"]
|
87 |
+
|
88 |
+
# Convert confidence to percentage and description
|
89 |
+
confidence_desc = ""
|
90 |
+
very_low_confidence = False
|
91 |
+
|
92 |
+
# For Uncertain verdicts, always use 0% confidence regardless of evidence confidence values
|
93 |
+
if "uncertain" in normalized_label.lower():
|
94 |
+
confidence = 0.0
|
95 |
+
confidence_desc = "no confidence (0%)"
|
96 |
+
elif confidence is not None:
|
97 |
+
confidence_pct = int(confidence * 100)
|
98 |
+
|
99 |
+
if confidence == 0.0:
|
100 |
+
confidence_desc = "no confidence (0%)"
|
101 |
+
elif confidence < 0.1:
|
102 |
+
confidence_desc = f"very low confidence ({confidence_pct}%)"
|
103 |
+
very_low_confidence = True
|
104 |
+
elif confidence < 0.3:
|
105 |
+
confidence_desc = f"low confidence ({confidence_pct}%)"
|
106 |
+
elif confidence < 0.7:
|
107 |
+
confidence_desc = f"moderate confidence ({confidence_pct}%)"
|
108 |
+
elif confidence < 0.9:
|
109 |
+
confidence_desc = f"high confidence ({confidence_pct}%)"
|
110 |
+
else:
|
111 |
+
confidence_desc = f"very high confidence ({confidence_pct}%)"
|
112 |
+
else:
|
113 |
+
# Default if no confidence provided
|
114 |
+
confidence_desc = "uncertain confidence"
|
115 |
+
|
116 |
+
# Create prompt with specific instructions based on the type of claim
|
117 |
+
has_negation = any(neg in claim.lower() for neg in ["not", "no longer", "isn't", "doesn't", "won't", "cannot"])
|
118 |
+
|
119 |
+
# For claims with "True" verdict
|
120 |
+
if "true" in normalized_label.lower():
|
121 |
+
# Special case for very low confidence (but not zero)
|
122 |
+
if very_low_confidence:
|
123 |
+
prompt = f"""
|
124 |
+
Claim: "{claim}"
|
125 |
+
|
126 |
+
Verdict: {normalized_label} (with {confidence_desc})
|
127 |
+
|
128 |
+
Available Evidence:
|
129 |
+
{evidence_text}
|
130 |
+
|
131 |
+
Task: Generate a clear explanation that:
|
132 |
+
1. States that the claim appears to be true based on the available evidence
|
133 |
+
2. EMPHASIZES that the confidence level is VERY LOW ({confidence_pct}%)
|
134 |
+
3. Explains that this means the evidence slightly favors the claim but is not strong enough to be certain
|
135 |
+
4. STRONGLY recommends that the user verify this with other authoritative sources
|
136 |
+
5. Is factual and precise
|
137 |
+
"""
|
138 |
+
else:
|
139 |
+
prompt = f"""
|
140 |
+
Claim: "{claim}"
|
141 |
+
|
142 |
+
Verdict: {normalized_label} (with {confidence_desc})
|
143 |
+
|
144 |
+
Available Evidence:
|
145 |
+
{evidence_text}
|
146 |
+
|
147 |
+
Task: Generate a clear explanation that:
|
148 |
+
1. Clearly states that the claim IS TRUE based on the evidence
|
149 |
+
2. {"Pay special attention to the logical relationship since the claim contains negation" if has_negation else "Explains why the evidence supports the claim"}
|
150 |
+
3. Uses confidence level of {confidence_desc}
|
151 |
+
4. Highlights the most relevant supporting evidence
|
152 |
+
5. Is factual and precise
|
153 |
+
"""
|
154 |
+
|
155 |
+
# For claims with "False" verdict
|
156 |
+
elif "false" in normalized_label.lower():
|
157 |
+
# Special case for very low confidence (but not zero)
|
158 |
+
if very_low_confidence:
|
159 |
+
prompt = f"""
|
160 |
+
Claim: "{claim}"
|
161 |
+
|
162 |
+
Verdict: {normalized_label} (with {confidence_desc})
|
163 |
+
|
164 |
+
Available Evidence:
|
165 |
+
{evidence_text}
|
166 |
+
|
167 |
+
Task: Generate a clear explanation that:
|
168 |
+
1. States that the claim appears to be false based on the available evidence
|
169 |
+
2. EMPHASIZES that the confidence level is VERY LOW ({confidence_pct}%)
|
170 |
+
3. Explains that this means the evidence slightly contradicts the claim but is not strong enough to be certain
|
171 |
+
4. STRONGLY recommends that the user verify this with other authoritative sources
|
172 |
+
5. Is factual and precise
|
173 |
+
"""
|
174 |
+
else:
|
175 |
+
prompt = f"""
|
176 |
+
Claim: "{claim}"
|
177 |
+
|
178 |
+
Verdict: {normalized_label} (with {confidence_desc})
|
179 |
+
|
180 |
+
Available Evidence:
|
181 |
+
{evidence_text}
|
182 |
+
|
183 |
+
Task: Generate a clear explanation that:
|
184 |
+
1. Clearly states that the claim IS FALSE based on the evidence
|
185 |
+
2. {"Pay special attention to the logical relationship since the claim contains negation" if has_negation else "Explains why the evidence contradicts the claim"}
|
186 |
+
3. Uses confidence level of {confidence_desc}
|
187 |
+
4. Highlights the contradicting evidence
|
188 |
+
5. Is factual and precise
|
189 |
+
"""
|
190 |
+
|
191 |
+
# For uncertain claims
|
192 |
+
else:
|
193 |
+
prompt = f"""
|
194 |
+
Claim: "{claim}"
|
195 |
+
|
196 |
+
Verdict: {normalized_label} (with {confidence_desc})
|
197 |
+
|
198 |
+
Available Evidence:
|
199 |
+
{evidence_text}
|
200 |
+
|
201 |
+
Task: Generate a clear explanation that:
|
202 |
+
1. Clearly states that there is insufficient evidence to determine if the claim is true or false
|
203 |
+
2. Explains what information is missing or why the available evidence is insufficient
|
204 |
+
3. Uses confidence level of {confidence_desc}
|
205 |
+
4. Makes NO speculation about whether the claim might be true or false
|
206 |
+
5. Explicitly mentions that the user should seek information from other reliable sources
|
207 |
+
"""
|
208 |
+
|
209 |
+
# Generate explanation with multiple attempts for reliability
|
210 |
+
max_attempts = 3
|
211 |
+
for attempt in range(max_attempts):
|
212 |
+
try:
|
213 |
+
# Invoke the model
|
214 |
+
response = explanation_model.invoke(prompt)
|
215 |
+
explanation = response.content.strip()
|
216 |
+
|
217 |
+
# Validate explanation length
|
218 |
+
if explanation and len(explanation.split()) >= 5:
|
219 |
+
return explanation
|
220 |
+
|
221 |
+
except Exception as attempt_error:
|
222 |
+
logger.error(f"Explanation generation attempt {attempt+1} failed: {str(attempt_error)}")
|
223 |
+
|
224 |
+
# Ultimate fallback explanations if all attempts fail
|
225 |
+
if "uncertain" in normalized_label.lower():
|
226 |
+
return f"The claim '{claim}' cannot be verified due to insufficient evidence. The available information does not provide clear support for or against this claim. Consider consulting reliable sources for verification."
|
227 |
+
elif very_low_confidence:
|
228 |
+
return f"The claim '{claim}' appears to be {'supported' if 'true' in normalized_label.lower() else 'contradicted'} by the evidence, but with very low confidence ({confidence_pct}%). The evidence is not strong enough to make a definitive determination. It is strongly recommended to verify this information with other authoritative sources."
|
229 |
+
elif "true" in normalized_label.lower():
|
230 |
+
return f"The claim '{claim}' is supported by the evidence with {confidence_desc}. {most_relevant_evidence or 'The evidence indicates this claim is accurate.'}"
|
231 |
+
else:
|
232 |
+
return f"The claim '{claim}' is contradicted by the evidence with {confidence_desc}. {most_relevant_evidence or 'The evidence indicates this claim is not accurate.'}"
|
233 |
+
|
234 |
+
except Exception as e:
|
235 |
+
logger.error(f"Comprehensive error in explanation generation: {str(e)}")
|
236 |
+
# Final fallback with minimal but useful information
|
237 |
+
normalized_label = normalize_truth_label(truth_label)
|
238 |
+
return f"The claim is classified as {normalized_label} based on the available evidence."
|
239 |
+
|
240 |
+
def normalize_truth_label(truth_label):
|
241 |
+
"""
|
242 |
+
Normalize truth label to handle different formats consistently.
|
243 |
+
|
244 |
+
This function extracts the core truth classification (True/False/Uncertain) from
|
245 |
+
potentially complex or inconsistently formatted truth labels. It preserves
|
246 |
+
contextual information like "(Based on Evidence)" when present.
|
247 |
+
|
248 |
+
Args:
|
249 |
+
truth_label (str): The truth label to normalize, which may contain
|
250 |
+
additional descriptive text or formatting
|
251 |
+
|
252 |
+
Returns:
|
253 |
+
str: Normalized truth label that preserves the core classification and
|
254 |
+
important context while eliminating inconsistencies
|
255 |
+
|
256 |
+
Examples:
|
257 |
+
>>> normalize_truth_label("True (Based on Evidence)")
|
258 |
+
"True (Based on Evidence)"
|
259 |
+
>>> normalize_truth_label("false (Based on Evidence)")
|
260 |
+
"False (Based on Evidence)"
|
261 |
+
>>> normalize_truth_label("The evidence shows this claim is False")
|
262 |
+
"False"
|
263 |
+
"""
|
264 |
+
if not truth_label:
|
265 |
+
return "Uncertain"
|
266 |
+
|
267 |
+
# Convert to string if not already
|
268 |
+
label_str = str(truth_label)
|
269 |
+
|
270 |
+
# Extract the core label if it contains additional text like "(Based on Evidence)"
|
271 |
+
base_label_match = re.search(r'(True|False|Uncertain|Error)', label_str, re.IGNORECASE)
|
272 |
+
if base_label_match:
|
273 |
+
# Get the core label and capitalize it for consistency
|
274 |
+
base_label = base_label_match.group(1).capitalize()
|
275 |
+
|
276 |
+
# Add back the context if it was present
|
277 |
+
if "(Based on Evidence)" in label_str:
|
278 |
+
return f"{base_label} (Based on Evidence)"
|
279 |
+
return base_label
|
280 |
+
|
281 |
+
# Return the original if we couldn't normalize it
|
282 |
+
return label_str
|
modules/rss_feed.py
ADDED
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import feedparser
|
2 |
+
import time
|
3 |
+
import logging
|
4 |
+
import re
|
5 |
+
import ssl
|
6 |
+
import requests
|
7 |
+
from datetime import datetime, timedelta
|
8 |
+
from threading import Timer
|
9 |
+
from urllib.parse import urlparse
|
10 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
11 |
+
|
12 |
+
logger = logging.getLogger("misinformation_detector")
|
13 |
+
|
14 |
+
# Disable SSL certificate verification for feeds with self-signed certs
|
15 |
+
ssl._create_default_https_context = ssl._create_unverified_context
|
16 |
+
|
17 |
+
# List of RSS feeds to check for news
|
18 |
+
# These are popular news sources with reliable and frequently updated RSS feeds
|
19 |
+
RSS_FEEDS = [
|
20 |
+
# --------------------
|
21 |
+
# 🌐 General World News
|
22 |
+
# --------------------
|
23 |
+
"http://rss.cnn.com/rss/cnn_world.rss", # CNN World News
|
24 |
+
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml", # NYT World News
|
25 |
+
"https://feeds.washingtonpost.com/rss/world", # The Washington Post World News
|
26 |
+
"https://feeds.bbci.co.uk/news/world/rss.xml", # BBC News - World
|
27 |
+
|
28 |
+
# --------------------
|
29 |
+
# 🧠 Tech & Startup News (Global)
|
30 |
+
# --------------------
|
31 |
+
"https://techcrunch.com/feed/", # TechCrunch - Startup and Technology News
|
32 |
+
"https://venturebeat.com/feed/", # VentureBeat - Tech News
|
33 |
+
"https://www.wired.com/feed/rss", # Wired - Technology News
|
34 |
+
"https://www.cnet.com/rss/news/", # CNET - Technology News
|
35 |
+
"https://news.google.com/rss?gl=IN&ceid=IN:en&topic=t&hl=en-IN", # Google News India - Technology
|
36 |
+
"https://news.google.com/rss?gl=US&ceid=US:en&topic=t&hl=en-US", # Google News US - Technology
|
37 |
+
|
38 |
+
# --------------------
|
39 |
+
# 💼 Startup & VC Focused
|
40 |
+
# --------------------
|
41 |
+
"https://news.crunchbase.com/feed/", # Crunchbase News - Startup Funding
|
42 |
+
"https://techstartups.com/feed/", # Tech Startups - Startup News
|
43 |
+
|
44 |
+
# --------------------
|
45 |
+
# 📰 Global Business & Corporate Feeds
|
46 |
+
# --------------------
|
47 |
+
"https://feeds.bloomberg.com/technology/news.rss", # Bloomberg Technology News
|
48 |
+
"https://www.ft.com/technology?format=rss", # Financial Times Technology News
|
49 |
+
"https://news.google.com/rss?gl=IN&ceid=IN:en&topic=b&hl=en-IN", # Google News India - Business
|
50 |
+
|
51 |
+
# --------------------
|
52 |
+
# 🇮🇳 India-specific News
|
53 |
+
# --------------------
|
54 |
+
"https://inc42.com/feed/", # Inc42 - Indian Startups and Technology
|
55 |
+
"https://timesofindia.indiatimes.com/rssfeedstopstories.cms", # TOI - Top Stories
|
56 |
+
"https://timesofindia.indiatimes.com/rssfeedmostrecent.cms", # TOI - Most Recent Stories
|
57 |
+
"https://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms", # TOI - India News
|
58 |
+
"https://timesofindia.indiatimes.com/rssfeeds/296589292.cms", # TOI - World News
|
59 |
+
"https://timesofindia.indiatimes.com/rssfeeds/1898055.cms", # TOI - Business News
|
60 |
+
"https://timesofindia.indiatimes.com/rssfeeds/54829575.cms", # TOI - Cricket News
|
61 |
+
"https://timesofindia.indiatimes.com/rssfeeds/4719148.cms", # TOI - Sports News
|
62 |
+
"https://timesofindia.indiatimes.com/rssfeeds/-2128672765.cms", # TOI - Science News
|
63 |
+
|
64 |
+
# --------------------
|
65 |
+
# 🏏 Sports News (Global + Cricket)
|
66 |
+
# --------------------
|
67 |
+
"https://www.espn.com/espn/rss/news", # ESPN - Top Sports News
|
68 |
+
"https://feeds.skynews.com/feeds/rss/sports.xml", # Sky News - Sports
|
69 |
+
"https://sports.ndtv.com/rss/all", # NDTV Sports
|
70 |
+
"https://www.espncricinfo.com/rss/content/story/feeds/0.xml", # ESPN Cricinfo - Cricket News
|
71 |
+
|
72 |
+
# --------------------
|
73 |
+
# ✅ Fact-Checking Sources
|
74 |
+
# --------------------
|
75 |
+
"https://www.snopes.com/feed/", # Snopes - Fact Checking
|
76 |
+
"https://www.politifact.com/rss/all/", # PolitiFact - Fact Checking
|
77 |
+
"https://www.factcheck.org/feed/", # FactCheck - Fact Checking
|
78 |
+
"https://leadstories.com/atom.xml", # Lead Stories - Fact Checking
|
79 |
+
"https://fullfact.org/feed/all/", # Full Fact - Fact Checking
|
80 |
+
"https://www.truthorfiction.com/feed/", # TruthOrFiction - Fact Checking
|
81 |
+
|
82 |
+
# --------------------
|
83 |
+
# 🗳️ Politics & Policy (General)
|
84 |
+
# --------------------
|
85 |
+
"https://feeds.bbci.co.uk/news/politics/rss.xml", # BBC News - Politics
|
86 |
+
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml", # BBC - Science & Environment
|
87 |
+
|
88 |
+
# --------------------
|
89 |
+
# 🗳️ Science
|
90 |
+
# --------------------
|
91 |
+
"https://www.nature.com/nature.rss", # Nature science
|
92 |
+
"https://feeds.science.org/rss/science-advances.xml" # science.org
|
93 |
+
]
|
94 |
+
|
95 |
+
def clean_html(raw_html):
|
96 |
+
"""Remove HTML tags from text"""
|
97 |
+
if not raw_html:
|
98 |
+
return ""
|
99 |
+
clean_regex = re.compile('<.*?>')
|
100 |
+
clean_text = re.sub(clean_regex, '', raw_html)
|
101 |
+
# Remove extra whitespace
|
102 |
+
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
|
103 |
+
return clean_text
|
104 |
+
|
105 |
+
def parse_feed(feed_url, timeout=5):
|
106 |
+
"""
|
107 |
+
Parse a single RSS feed with proper timeout handling
|
108 |
+
Uses requests with timeout first, then passes content to feedparser
|
109 |
+
"""
|
110 |
+
try:
|
111 |
+
# Use requests with timeout to fetch the RSS content
|
112 |
+
response = requests.get(feed_url, timeout=timeout)
|
113 |
+
response.raise_for_status()
|
114 |
+
|
115 |
+
# Then parse the content with feedparser (which doesn't support timeout)
|
116 |
+
feed = feedparser.parse(response.content)
|
117 |
+
|
118 |
+
# Basic validation of the feed
|
119 |
+
if hasattr(feed, 'entries') and feed.entries:
|
120 |
+
return feed
|
121 |
+
else:
|
122 |
+
logger.warning(f"Feed {feed_url} parsed but contains no entries")
|
123 |
+
return None
|
124 |
+
|
125 |
+
except requests.exceptions.Timeout:
|
126 |
+
logger.warning(f"Timeout while fetching feed {feed_url}")
|
127 |
+
return None
|
128 |
+
except requests.exceptions.RequestException as e:
|
129 |
+
logger.error(f"Request error fetching feed {feed_url}: {str(e)}")
|
130 |
+
return None
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"Error parsing feed {feed_url}: {str(e)}")
|
133 |
+
return None
|
134 |
+
|
135 |
+
def fetch_all_feeds(feeds_list=None, max_workers=5, timeout=5):
|
136 |
+
"""
|
137 |
+
Fetch multiple RSS feeds with proper timeout handling
|
138 |
+
Returns a list of (domain, feed) tuples for successfully fetched feeds
|
139 |
+
"""
|
140 |
+
# Use default RSS_FEEDS list if none provided
|
141 |
+
if feeds_list is None:
|
142 |
+
feeds_list = RSS_FEEDS
|
143 |
+
|
144 |
+
results = []
|
145 |
+
|
146 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
147 |
+
future_to_url = {executor.submit(parse_feed, url, timeout): url for url in feeds_list}
|
148 |
+
for future in as_completed(future_to_url):
|
149 |
+
url = future_to_url[future]
|
150 |
+
try:
|
151 |
+
feed = future.result()
|
152 |
+
if feed and hasattr(feed, 'entries') and feed.entries:
|
153 |
+
# Extract domain for source attribution
|
154 |
+
domain = urlparse(url).netloc
|
155 |
+
results.append((domain, feed))
|
156 |
+
logger.info(f"Successfully fetched {domain} with {len(feed.entries)} entries")
|
157 |
+
except Exception as e:
|
158 |
+
logger.error(f"Error processing {url}: {str(e)}")
|
159 |
+
|
160 |
+
return results
|
161 |
+
|
162 |
+
def extract_date(entry):
|
163 |
+
"""Extract and normalize publication date from entry"""
|
164 |
+
for date_field in ['published_parsed', 'updated_parsed', 'created_parsed']:
|
165 |
+
if hasattr(entry, date_field) and getattr(entry, date_field):
|
166 |
+
try:
|
167 |
+
# Convert time tuple to datetime
|
168 |
+
time_tuple = getattr(entry, date_field)
|
169 |
+
return datetime(time_tuple[0], time_tuple[1], time_tuple[2],
|
170 |
+
time_tuple[3], time_tuple[4], time_tuple[5])
|
171 |
+
except Exception as e:
|
172 |
+
logger.debug(f"Error parsing {date_field}: {e}")
|
173 |
+
continue
|
174 |
+
|
175 |
+
# Try string dates
|
176 |
+
for date_field in ['published', 'updated', 'pubDate']:
|
177 |
+
if hasattr(entry, date_field) and getattr(entry, date_field):
|
178 |
+
try:
|
179 |
+
date_str = getattr(entry, date_field)
|
180 |
+
# Try various formats
|
181 |
+
for fmt in ['%a, %d %b %Y %H:%M:%S %z', '%a, %d %b %Y %H:%M:%S %Z',
|
182 |
+
'%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S%z']:
|
183 |
+
try:
|
184 |
+
return datetime.strptime(date_str, fmt)
|
185 |
+
except:
|
186 |
+
continue
|
187 |
+
except Exception as e:
|
188 |
+
logger.debug(f"Error parsing date string {date_field}: {e}")
|
189 |
+
continue
|
190 |
+
|
191 |
+
# Default to current time if parsing fails
|
192 |
+
return datetime.now()
|
193 |
+
|
194 |
+
def is_recent(entry_date, claim=None, max_days=3):
|
195 |
+
"""
|
196 |
+
Check if an entry is recent based on temporal indicators in the claim.
|
197 |
+
|
198 |
+
Args:
|
199 |
+
entry_date (datetime): The date of the entry to check
|
200 |
+
claim (str, optional): The claim text to analyze for temporal indicators
|
201 |
+
max_days (int, optional): Default maximum age in days
|
202 |
+
|
203 |
+
Returns:
|
204 |
+
bool: True if entry is considered recent, False otherwise
|
205 |
+
"""
|
206 |
+
if not entry_date:
|
207 |
+
return False
|
208 |
+
|
209 |
+
# Default max days if no claim is provided
|
210 |
+
default_days = max_days
|
211 |
+
extended_days = 15 # For 'recently', 'this week', etc.
|
212 |
+
|
213 |
+
if claim:
|
214 |
+
# Specific day indicators get default days
|
215 |
+
specific_day_terms = ["today", "yesterday", "day before yesterday"]
|
216 |
+
|
217 |
+
# Extended time terms get extended days
|
218 |
+
extended_time_terms = [
|
219 |
+
"recently", "currently", "freshly", "this week", "few days",
|
220 |
+
"couple of days", "last week", "past week", "several days",
|
221 |
+
"anymore"
|
222 |
+
]
|
223 |
+
|
224 |
+
claim_lower = claim.lower()
|
225 |
+
|
226 |
+
# Check for extended time terms first, then specific day terms
|
227 |
+
if any(term in claim_lower for term in extended_time_terms):
|
228 |
+
cutoff = datetime.now() - timedelta(days=extended_days)
|
229 |
+
return entry_date > cutoff
|
230 |
+
elif any(term in claim_lower for term in specific_day_terms):
|
231 |
+
cutoff = datetime.now() - timedelta(days=default_days)
|
232 |
+
return entry_date > cutoff
|
233 |
+
|
234 |
+
# Default case - use standard window
|
235 |
+
cutoff = datetime.now() - timedelta(days=default_days)
|
236 |
+
return entry_date > cutoff
|
237 |
+
|
238 |
+
def get_entry_relevance(entry, query_terms, domain):
|
239 |
+
"""Calculate relevance score for an entry based on query match and recency"""
|
240 |
+
if not hasattr(entry, 'title') or not entry.title:
|
241 |
+
return 0
|
242 |
+
|
243 |
+
# Extract text content
|
244 |
+
title = entry.title or ""
|
245 |
+
description = clean_html(entry.description) if hasattr(entry, 'description') else ""
|
246 |
+
content = ""
|
247 |
+
if hasattr(entry, 'content'):
|
248 |
+
for content_item in entry.content:
|
249 |
+
if 'value' in content_item:
|
250 |
+
content += clean_html(content_item['value']) + " "
|
251 |
+
|
252 |
+
# Extract published date
|
253 |
+
pub_date = extract_date(entry)
|
254 |
+
|
255 |
+
# Calculate recency score (0-1)
|
256 |
+
recency_score = 0
|
257 |
+
if pub_date:
|
258 |
+
days_old = (datetime.now() - pub_date).days
|
259 |
+
if days_old <= 1: # Today or yesterday
|
260 |
+
recency_score = 1.0
|
261 |
+
elif days_old <= 2:
|
262 |
+
recency_score = 0.8
|
263 |
+
elif days_old <= 3:
|
264 |
+
recency_score = 0.5
|
265 |
+
else:
|
266 |
+
recency_score = 0.2
|
267 |
+
|
268 |
+
# Calculate relevance score based on keyword matches
|
269 |
+
text = f"{title} {description} {content}".lower()
|
270 |
+
|
271 |
+
# Count how many query terms appear in the content
|
272 |
+
query_terms_lower = [term.lower() for term in query_terms]
|
273 |
+
matches = sum(1 for term in query_terms_lower if term in text)
|
274 |
+
|
275 |
+
# Calculate match score (0-1)
|
276 |
+
match_score = min(1.0, matches / max(1, len(query_terms) * 0.7))
|
277 |
+
|
278 |
+
# Boost score for exact phrase matches
|
279 |
+
query_phrase = " ".join(query_terms_lower)
|
280 |
+
if query_phrase in text:
|
281 |
+
match_score += 0.5
|
282 |
+
|
283 |
+
# Additional boost for title matches (they're more relevant)
|
284 |
+
title_matches = sum(1 for term in query_terms_lower if term in title.lower())
|
285 |
+
if title_matches > 0:
|
286 |
+
match_score += 0.2 * (title_matches / len(query_terms_lower))
|
287 |
+
|
288 |
+
# Source quality factor (can be adjusted based on source reliability)
|
289 |
+
source_factor = 1.0
|
290 |
+
high_quality_domains = ['bbc.co.uk', 'nytimes.com', 'reuters.com', 'washingtonpost.com',
|
291 |
+
'espncricinfo.com', 'cricbuzz.com', 'snopes.com']
|
292 |
+
if any(quality_domain in domain for quality_domain in high_quality_domains):
|
293 |
+
source_factor = 1.2
|
294 |
+
|
295 |
+
# Calculate final score
|
296 |
+
final_score = (match_score * 0.6) + (recency_score * 0.4) * source_factor
|
297 |
+
|
298 |
+
return min(1.0, final_score) # Cap at 1.0
|
299 |
+
|
300 |
+
def retrieve_evidence_from_rss(claim, max_results=10, category_feeds=None):
|
301 |
+
"""
|
302 |
+
Retrieve evidence from RSS feeds for a given claim
|
303 |
+
|
304 |
+
Args:
|
305 |
+
claim (str): The claim to verify
|
306 |
+
max_results (int): Maximum number of results to return
|
307 |
+
category_feeds (list, optional): List of category-specific RSS feeds to check
|
308 |
+
|
309 |
+
Returns:
|
310 |
+
list: List of relevant evidence items
|
311 |
+
"""
|
312 |
+
start_time = time.time()
|
313 |
+
logger.info(f"Retrieving evidence from RSS feeds for: {claim}")
|
314 |
+
|
315 |
+
# Extract key terms from claim
|
316 |
+
terms = [term.strip() for term in re.findall(r'\b\w+\b', claim) if len(term.strip()) > 2]
|
317 |
+
|
318 |
+
try:
|
319 |
+
# Use category-specific feeds if provided
|
320 |
+
feeds_to_use = category_feeds if category_feeds else RSS_FEEDS
|
321 |
+
|
322 |
+
# Log which feeds we're using
|
323 |
+
if category_feeds:
|
324 |
+
logger.info(f"Using {len(category_feeds)} category-specific RSS feeds")
|
325 |
+
else:
|
326 |
+
logger.info(f"Using {len(RSS_FEEDS)} default RSS feeds")
|
327 |
+
|
328 |
+
# Limit the number of feeds to process for efficiency
|
329 |
+
if len(feeds_to_use) > 10:
|
330 |
+
# If we have too many feeds, select a subset
|
331 |
+
# Prioritize fact-checking sources
|
332 |
+
fact_check_feeds = [feed for feed in feeds_to_use if "fact" in feed.lower() or "snopes" in feed.lower() or "politifact" in feed.lower()]
|
333 |
+
other_feeds = [feed for feed in feeds_to_use if feed not in fact_check_feeds]
|
334 |
+
|
335 |
+
# Take all fact-checking feeds plus a random selection of others
|
336 |
+
import random
|
337 |
+
selected_feeds = fact_check_feeds + random.sample(other_feeds, min(10 - len(fact_check_feeds), len(other_feeds)))
|
338 |
+
else:
|
339 |
+
selected_feeds = feeds_to_use
|
340 |
+
|
341 |
+
# Fetch all feeds in parallel with the selected feeds
|
342 |
+
feeds = fetch_all_feeds(selected_feeds)
|
343 |
+
|
344 |
+
if not feeds:
|
345 |
+
logger.warning("No RSS feeds could be fetched")
|
346 |
+
return []
|
347 |
+
|
348 |
+
all_entries = []
|
349 |
+
|
350 |
+
# Process all feed entries
|
351 |
+
for domain, feed in feeds:
|
352 |
+
for entry in feed.entries:
|
353 |
+
# Calculate relevance score
|
354 |
+
relevance = get_entry_relevance(entry, terms, domain)
|
355 |
+
|
356 |
+
if relevance > 0.3: # Only consider somewhat relevant entries
|
357 |
+
# Extract entry details
|
358 |
+
title = entry.title if hasattr(entry, 'title') else "No title"
|
359 |
+
link = entry.link if hasattr(entry, 'link') else ""
|
360 |
+
|
361 |
+
# Extract and clean description/content
|
362 |
+
description = ""
|
363 |
+
if hasattr(entry, 'description'):
|
364 |
+
description = clean_html(entry.description)
|
365 |
+
elif hasattr(entry, 'summary'):
|
366 |
+
description = clean_html(entry.summary)
|
367 |
+
elif hasattr(entry, 'content'):
|
368 |
+
for content_item in entry.content:
|
369 |
+
if 'value' in content_item:
|
370 |
+
description += clean_html(content_item['value']) + " "
|
371 |
+
|
372 |
+
# Truncate description if too long
|
373 |
+
if len(description) > 1000:
|
374 |
+
description = description[:1000] + "..."
|
375 |
+
|
376 |
+
# Get publication date
|
377 |
+
pub_date = extract_date(entry)
|
378 |
+
date_str = pub_date.strftime('%Y-%m-%d') if pub_date else "Unknown date"
|
379 |
+
|
380 |
+
# Format as evidence text
|
381 |
+
evidence_text = (
|
382 |
+
f"Title: {title}, "
|
383 |
+
f"Source: {domain} (RSS), "
|
384 |
+
f"Date: {date_str}, "
|
385 |
+
f"URL: {link}, "
|
386 |
+
f"Content: {description}"
|
387 |
+
)
|
388 |
+
|
389 |
+
all_entries.append({
|
390 |
+
"text": evidence_text,
|
391 |
+
"relevance": relevance,
|
392 |
+
"date": pub_date or datetime.now()
|
393 |
+
})
|
394 |
+
|
395 |
+
# Sort entries by relevance
|
396 |
+
all_entries.sort(key=lambda x: x["relevance"], reverse=True)
|
397 |
+
|
398 |
+
# Take top results
|
399 |
+
top_entries = all_entries[:max_results]
|
400 |
+
|
401 |
+
logger.info(f"Retrieved {len(top_entries)} relevant RSS items from {len(feeds)} feeds in {time.time() - start_time:.2f}s")
|
402 |
+
|
403 |
+
# Return just the text portion
|
404 |
+
return [entry["text"] for entry in top_entries]
|
405 |
+
|
406 |
+
except Exception as e:
|
407 |
+
logger.error(f"Error in RSS retrieval: {str(e)}")
|
408 |
+
return []
|
utils/__init__.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utils package initialization.
|
3 |
+
|
4 |
+
This package provides utility functions for the AskVeracity fact-checking system.
|
5 |
+
"""
|
6 |
+
|
7 |
+
from .api_utils import api_error_handler, safe_json_parse, RateLimiter
|
8 |
+
from .performance import PerformanceTracker
|
9 |
+
from .models import initialize_models, get_nlp_model, get_llm_model
|
10 |
+
|
11 |
+
|
12 |
+
__all__ = [
|
13 |
+
'api_error_handler',
|
14 |
+
'safe_json_parse',
|
15 |
+
'RateLimiter',
|
16 |
+
'PerformanceTracker',
|
17 |
+
'initialize_models',
|
18 |
+
'get_nlp_model',
|
19 |
+
'get_llm_model'
|
20 |
+
]
|
utils/api_utils.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
API utilities for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module provides utilities for handling API calls, rate limiting,
|
5 |
+
error handling, and exponential backoff for retrying failed requests.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import time
|
9 |
+
import functools
|
10 |
+
import random
|
11 |
+
import logging
|
12 |
+
import requests
|
13 |
+
from datetime import datetime, timedelta
|
14 |
+
from collections import deque
|
15 |
+
|
16 |
+
from config import RATE_LIMITS, ERROR_BACKOFF
|
17 |
+
|
18 |
+
logger = logging.getLogger("misinformation_detector")
|
19 |
+
|
20 |
+
class RateLimiter:
|
21 |
+
"""
|
22 |
+
Rate limiter for API calls with support for different APIs.
|
23 |
+
|
24 |
+
This class implements a token bucket algorithm for rate limiting,
|
25 |
+
with support for different rate limits for different APIs.
|
26 |
+
It also provides exponential backoff for error handling.
|
27 |
+
"""
|
28 |
+
|
29 |
+
def __init__(self):
|
30 |
+
"""Initialize the rate limiter with configuration from settings."""
|
31 |
+
# Store rate limits for different APIs
|
32 |
+
self.limits = {}
|
33 |
+
|
34 |
+
# Initialize limits from config
|
35 |
+
for api_name, limit_info in RATE_LIMITS.items():
|
36 |
+
self.limits[api_name] = {
|
37 |
+
"requests": limit_info["requests"],
|
38 |
+
"period": limit_info["period"],
|
39 |
+
"timestamps": deque()
|
40 |
+
}
|
41 |
+
|
42 |
+
# Error backoff settings
|
43 |
+
self.max_retries = ERROR_BACKOFF["max_retries"]
|
44 |
+
self.initial_backoff = ERROR_BACKOFF["initial_backoff"]
|
45 |
+
self.backoff_factor = ERROR_BACKOFF["backoff_factor"]
|
46 |
+
|
47 |
+
def check_and_update(self, api_name):
|
48 |
+
"""
|
49 |
+
Check if request is allowed and update timestamps.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
api_name (str): Name of the API to check
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
tuple: (allowed, wait_time)
|
56 |
+
- allowed (bool): Whether the request is allowed
|
57 |
+
- wait_time (float): Time to wait if not allowed
|
58 |
+
"""
|
59 |
+
if api_name not in self.limits:
|
60 |
+
return True, 0 # Unknown API, allow by default
|
61 |
+
|
62 |
+
now = datetime.now()
|
63 |
+
limit_info = self.limits[api_name]
|
64 |
+
|
65 |
+
# Remove timestamps older than the period
|
66 |
+
cutoff = now - timedelta(seconds=limit_info["period"])
|
67 |
+
while limit_info["timestamps"] and limit_info["timestamps"][0] < cutoff:
|
68 |
+
limit_info["timestamps"].popleft()
|
69 |
+
|
70 |
+
# Check if we're at the rate limit
|
71 |
+
if len(limit_info["timestamps"]) >= limit_info["requests"]:
|
72 |
+
# Calculate wait time until oldest timestamp expires
|
73 |
+
wait_time = (limit_info["timestamps"][0] + timedelta(seconds=limit_info["period"]) - now).total_seconds()
|
74 |
+
return False, max(0, wait_time)
|
75 |
+
|
76 |
+
# Add current timestamp and allow request
|
77 |
+
limit_info["timestamps"].append(now)
|
78 |
+
return True, 0
|
79 |
+
|
80 |
+
def wait_if_needed(self, api_name):
|
81 |
+
"""
|
82 |
+
Wait if rate limit is reached.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
api_name (str): Name of the API to check
|
86 |
+
|
87 |
+
Returns:
|
88 |
+
bool: True if waited, False otherwise
|
89 |
+
"""
|
90 |
+
allowed, wait_time = self.check_and_update(api_name)
|
91 |
+
if not allowed:
|
92 |
+
logger.info(f"Rate limit reached for {api_name}. Waiting {wait_time:.2f} seconds...")
|
93 |
+
time.sleep(wait_time + 0.1) # Add a small buffer
|
94 |
+
return True
|
95 |
+
return False
|
96 |
+
|
97 |
+
def get_backoff_time(self, attempt):
|
98 |
+
"""
|
99 |
+
Calculate exponential backoff time with jitter.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
attempt (int): Current attempt number (0-based)
|
103 |
+
|
104 |
+
Returns:
|
105 |
+
float: Backoff time in seconds
|
106 |
+
"""
|
107 |
+
backoff = self.initial_backoff * (self.backoff_factor ** attempt)
|
108 |
+
# Add jitter to prevent thundering herd problem
|
109 |
+
jitter = random.uniform(0, 0.1 * backoff)
|
110 |
+
return backoff + jitter
|
111 |
+
|
112 |
+
|
113 |
+
# Create rate limiter instance
|
114 |
+
rate_limiter = RateLimiter()
|
115 |
+
|
116 |
+
# API Error Handler decorator
|
117 |
+
def api_error_handler(api_name):
|
118 |
+
"""
|
119 |
+
Decorator for API calls with error handling and rate limiting.
|
120 |
+
|
121 |
+
This decorator handles rate limiting, retries with exponential
|
122 |
+
backoff, and error handling for API calls.
|
123 |
+
|
124 |
+
Args:
|
125 |
+
api_name (str): Name of the API being called
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
callable: Decorated function
|
129 |
+
"""
|
130 |
+
def decorator(func):
|
131 |
+
@functools.wraps(func)
|
132 |
+
def wrapper(*args, **kwargs):
|
133 |
+
try:
|
134 |
+
# Apply rate limiting - make sure rate_limiter exists and has the method
|
135 |
+
if hasattr(rate_limiter, 'wait_if_needed'):
|
136 |
+
rate_limiter.wait_if_needed(api_name)
|
137 |
+
|
138 |
+
# Track retries
|
139 |
+
for attempt in range(rate_limiter.max_retries):
|
140 |
+
try:
|
141 |
+
return func(*args, **kwargs)
|
142 |
+
except requests.exceptions.HTTPError as e:
|
143 |
+
status_code = e.response.status_code if hasattr(e, 'response') else 0
|
144 |
+
|
145 |
+
# Handle specific HTTP errors
|
146 |
+
if status_code == 429: # Too Many Requests
|
147 |
+
logger.warning(f"{api_name} rate limit exceeded (429). Attempt {attempt+1}/{rate_limiter.max_retries}")
|
148 |
+
# Get retry-after header or use exponential backoff
|
149 |
+
retry_after = e.response.headers.get('Retry-After')
|
150 |
+
if retry_after and retry_after.isdigit():
|
151 |
+
wait_time = int(retry_after)
|
152 |
+
else:
|
153 |
+
wait_time = rate_limiter.get_backoff_time(attempt)
|
154 |
+
logger.info(f"Waiting {wait_time} seconds before retry...")
|
155 |
+
time.sleep(wait_time)
|
156 |
+
elif status_code >= 500: # Server errors
|
157 |
+
logger.warning(f"{api_name} server error ({status_code}). Attempt {attempt+1}/{rate_limiter.max_retries}")
|
158 |
+
time.sleep(rate_limiter.get_backoff_time(attempt))
|
159 |
+
elif status_code == 403: # Forbidden - likely API key issue
|
160 |
+
logger.error(f"{api_name} access forbidden (403). Check API key.")
|
161 |
+
return None # Don't retry on auth errors
|
162 |
+
elif status_code == 404: # Not Found
|
163 |
+
logger.warning(f"{api_name} resource not found (404).")
|
164 |
+
return None # Don't retry on resource not found
|
165 |
+
else:
|
166 |
+
logger.error(f"{api_name} HTTP error: {e}")
|
167 |
+
if attempt < rate_limiter.max_retries - 1:
|
168 |
+
wait_time = rate_limiter.get_backoff_time(attempt)
|
169 |
+
logger.info(f"Waiting {wait_time} seconds before retry...")
|
170 |
+
time.sleep(wait_time)
|
171 |
+
else:
|
172 |
+
return None
|
173 |
+
|
174 |
+
except requests.exceptions.ConnectionError as e:
|
175 |
+
logger.error(f"{api_name} connection error: {e}")
|
176 |
+
if attempt < rate_limiter.max_retries - 1:
|
177 |
+
wait_time = rate_limiter.get_backoff_time(attempt)
|
178 |
+
logger.info(f"Waiting {wait_time} seconds before retry...")
|
179 |
+
time.sleep(wait_time)
|
180 |
+
else:
|
181 |
+
return None
|
182 |
+
|
183 |
+
except requests.exceptions.Timeout as e:
|
184 |
+
logger.error(f"{api_name} timeout error: {e}")
|
185 |
+
if attempt < rate_limiter.max_retries - 1:
|
186 |
+
wait_time = rate_limiter.get_backoff_time(attempt)
|
187 |
+
logger.info(f"Waiting {wait_time} seconds before retry...")
|
188 |
+
time.sleep(wait_time)
|
189 |
+
else:
|
190 |
+
return None
|
191 |
+
|
192 |
+
except Exception as e:
|
193 |
+
logger.error(f"{api_name} unexpected error: {str(e)}")
|
194 |
+
if attempt < rate_limiter.max_retries - 1:
|
195 |
+
wait_time = rate_limiter.get_backoff_time(attempt)
|
196 |
+
logger.info(f"Waiting {wait_time} seconds before retry...")
|
197 |
+
time.sleep(wait_time)
|
198 |
+
else:
|
199 |
+
return None
|
200 |
+
|
201 |
+
# If we've exhausted all retries
|
202 |
+
logger.error(f"{api_name} call failed after {rate_limiter.max_retries} attempts")
|
203 |
+
return None
|
204 |
+
|
205 |
+
except Exception as e:
|
206 |
+
# Catch any unexpected errors in the decorator itself
|
207 |
+
logger.error(f"{api_name} decorator error: {str(e)}")
|
208 |
+
return None
|
209 |
+
|
210 |
+
return wrapper
|
211 |
+
return decorator
|
212 |
+
|
213 |
+
def safe_json_parse(response, api_name):
|
214 |
+
"""
|
215 |
+
Safely parse JSON response with error handling.
|
216 |
+
|
217 |
+
Args:
|
218 |
+
response (requests.Response): Response object to parse
|
219 |
+
api_name (str): Name of the API for logging
|
220 |
+
|
221 |
+
Returns:
|
222 |
+
dict: Parsed JSON or empty dict on error
|
223 |
+
"""
|
224 |
+
try:
|
225 |
+
return response.json()
|
226 |
+
except ValueError as e:
|
227 |
+
logger.error(f"Error parsing {api_name} JSON response: {e}")
|
228 |
+
logger.debug(f"Response content: {response.text[:500]}...")
|
229 |
+
return {}
|
utils/models.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Model management utility for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module provides functions for initializing, caching, and
|
5 |
+
retrieving language models used throughout the application.
|
6 |
+
It ensures models are loaded efficiently and reused appropriately.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import logging
|
11 |
+
import functools
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
import spacy
|
14 |
+
|
15 |
+
logger = logging.getLogger("misinformation_detector")
|
16 |
+
|
17 |
+
# Global variables for models
|
18 |
+
nlp = None
|
19 |
+
model = None
|
20 |
+
models_initialized = False
|
21 |
+
|
22 |
+
# Add caching decorator
|
23 |
+
def cached_model(func):
|
24 |
+
"""
|
25 |
+
Decorator to cache model loading for improved performance.
|
26 |
+
|
27 |
+
This decorator ensures that models are only loaded once and
|
28 |
+
then reused for subsequent calls, improving performance by
|
29 |
+
avoiding redundant model loading.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
func (callable): Function that loads a model
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
callable: Wrapped function that returns a cached model
|
36 |
+
"""
|
37 |
+
cache = {}
|
38 |
+
|
39 |
+
@functools.wraps(func)
|
40 |
+
def wrapper(*args, **kwargs):
|
41 |
+
# Use function name as cache key
|
42 |
+
key = func.__name__
|
43 |
+
if key not in cache:
|
44 |
+
logger.info(f"Model not in cache, calling {key}...")
|
45 |
+
cache[key] = func(*args, **kwargs)
|
46 |
+
return cache[key]
|
47 |
+
|
48 |
+
return wrapper
|
49 |
+
|
50 |
+
def initialize_models():
|
51 |
+
"""
|
52 |
+
Initialize all required models.
|
53 |
+
|
54 |
+
This function loads and initializes all the language models
|
55 |
+
needed by the application, including spaCy for NLP tasks and
|
56 |
+
OpenAI for LLM-based processing.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
str: Initialization status message
|
60 |
+
|
61 |
+
Raises:
|
62 |
+
ValueError: If OpenAI API key is not set
|
63 |
+
"""
|
64 |
+
global nlp, model, models_initialized
|
65 |
+
|
66 |
+
# Skip initialization if already done
|
67 |
+
if models_initialized:
|
68 |
+
logger.info("Models already initialized, skipping initialization")
|
69 |
+
return "Models already initialized"
|
70 |
+
|
71 |
+
# Check OpenAI API key
|
72 |
+
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
|
73 |
+
logger.error("OPENAI_API_KEY environment variable not set or empty")
|
74 |
+
raise ValueError("OpenAI API key is required. Please set it in the Hugging Face Space secrets.")
|
75 |
+
|
76 |
+
try:
|
77 |
+
# Load NLP model
|
78 |
+
try:
|
79 |
+
logger.info("Loading spaCy NLP model...")
|
80 |
+
nlp = spacy.load("en_core_web_sm")
|
81 |
+
logger.info("Loaded spaCy NLP model")
|
82 |
+
except OSError as e:
|
83 |
+
# This handles the case if the model wasn't installed correctly
|
84 |
+
logger.warning(f"Could not load spaCy model: {str(e)}")
|
85 |
+
logger.info("Attempting to download spaCy model...")
|
86 |
+
try:
|
87 |
+
import subprocess
|
88 |
+
import sys
|
89 |
+
# This downloads the model if it's missing
|
90 |
+
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
91 |
+
# Try loading again
|
92 |
+
nlp = spacy.load("en_core_web_sm")
|
93 |
+
logger.info("Successfully downloaded and loaded spaCy model")
|
94 |
+
except Exception as download_err:
|
95 |
+
logger.error(f"Failed to download spaCy model: {str(download_err)}")
|
96 |
+
# Continue with other initialization, we'll handle missing NLP model elsewhere
|
97 |
+
|
98 |
+
# Set up OpenAI model
|
99 |
+
logger.info("Initializing ChatOpenAI model...")
|
100 |
+
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
101 |
+
logger.info("Initialized ChatOpenAI model")
|
102 |
+
|
103 |
+
# Mark initialization as complete
|
104 |
+
models_initialized = True
|
105 |
+
return "Models initialized successfully"
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
logger.error(f"Error initializing models: {str(e)}")
|
109 |
+
raise e
|
110 |
+
|
111 |
+
@cached_model
|
112 |
+
def get_nlp_model():
|
113 |
+
"""
|
114 |
+
Get the spaCy NLP model, initializing if needed.
|
115 |
+
|
116 |
+
This function returns a cached spaCy model for NLP tasks.
|
117 |
+
If the model hasn't been loaded yet, it will be loaded.
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
spacy.Language: Loaded spaCy model
|
121 |
+
"""
|
122 |
+
global nlp
|
123 |
+
if nlp is None:
|
124 |
+
try:
|
125 |
+
# Try to load just the spaCy model if not loaded yet
|
126 |
+
logger.info("Loading spaCy NLP model...")
|
127 |
+
nlp = spacy.load("en_core_web_sm")
|
128 |
+
logger.info("Loaded spaCy NLP model")
|
129 |
+
except Exception as e:
|
130 |
+
logger.error(f"Error loading spaCy model: {str(e)}")
|
131 |
+
# Fall back to full initialization
|
132 |
+
initialize_models()
|
133 |
+
return nlp
|
134 |
+
|
135 |
+
@cached_model
|
136 |
+
def get_llm_model():
|
137 |
+
"""
|
138 |
+
Get the ChatOpenAI model, initializing if needed.
|
139 |
+
|
140 |
+
This function returns a cached OpenAI LLM model.
|
141 |
+
If the model hasn't been loaded yet, it will be loaded.
|
142 |
+
|
143 |
+
Returns:
|
144 |
+
ChatOpenAI: Loaded LLM model
|
145 |
+
"""
|
146 |
+
global model
|
147 |
+
if model is None:
|
148 |
+
try:
|
149 |
+
# Try to load just the LLM model if not loaded yet
|
150 |
+
logger.info("Initializing ChatOpenAI model...")
|
151 |
+
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
152 |
+
logger.info("Initialized ChatOpenAI model")
|
153 |
+
except Exception as e:
|
154 |
+
logger.error(f"Error initializing ChatOpenAI model: {str(e)}")
|
155 |
+
# Fall back to full initialization
|
156 |
+
initialize_models()
|
157 |
+
return model
|
utils/performance.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Performance tracking utility for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module provides functionality to track and analyze the
|
5 |
+
performance of the application, including processing times,
|
6 |
+
success rates, and resource utilization.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import time
|
10 |
+
import logging
|
11 |
+
|
12 |
+
logger = logging.getLogger("misinformation_detector")
|
13 |
+
|
14 |
+
class PerformanceTracker:
|
15 |
+
"""
|
16 |
+
Tracks and logs performance metrics for the fact-checking system.
|
17 |
+
|
18 |
+
This class maintains counters and statistics for various performance
|
19 |
+
metrics, such as processing times, evidence retrieval success rates,
|
20 |
+
and confidence scores.
|
21 |
+
"""
|
22 |
+
|
23 |
+
def __init__(self):
|
24 |
+
"""Initialize the performance tracker with empty metrics."""
|
25 |
+
self.metrics = {
|
26 |
+
"claims_processed": 0,
|
27 |
+
"evidence_retrieval_success_rate": [],
|
28 |
+
"processing_times": [],
|
29 |
+
"confidence_scores": [],
|
30 |
+
"source_types_used": {},
|
31 |
+
"temporal_relevance": []
|
32 |
+
}
|
33 |
+
|
34 |
+
def log_claim_processed(self):
|
35 |
+
"""
|
36 |
+
Increment the counter for processed claims.
|
37 |
+
This should be called whenever a claim is processed successfully.
|
38 |
+
"""
|
39 |
+
self.metrics["claims_processed"] += 1
|
40 |
+
|
41 |
+
def log_evidence_retrieval(self, success, sources_count):
|
42 |
+
"""
|
43 |
+
Log the success or failure of evidence retrieval.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
success (bool): Whether evidence retrieval was successful
|
47 |
+
sources_count (dict): Count of evidence items by source type
|
48 |
+
"""
|
49 |
+
# Ensure success is a boolean
|
50 |
+
success_value = 1 if success else 0
|
51 |
+
self.metrics["evidence_retrieval_success_rate"].append(success_value)
|
52 |
+
|
53 |
+
# Safely process source types
|
54 |
+
if isinstance(sources_count, dict):
|
55 |
+
for source_type, count in sources_count.items():
|
56 |
+
# Ensure source_type is a string and count is an integer
|
57 |
+
source_type = str(source_type)
|
58 |
+
try:
|
59 |
+
count = int(count)
|
60 |
+
except (ValueError, TypeError):
|
61 |
+
count = 1
|
62 |
+
|
63 |
+
# Update source types used
|
64 |
+
self.metrics["source_types_used"][source_type] = \
|
65 |
+
self.metrics["source_types_used"].get(source_type, 0) + count
|
66 |
+
|
67 |
+
def log_processing_time(self, start_time):
|
68 |
+
"""
|
69 |
+
Log the processing time for an operation.
|
70 |
+
|
71 |
+
Args:
|
72 |
+
start_time (float): Start time obtained from time.time()
|
73 |
+
"""
|
74 |
+
end_time = time.time()
|
75 |
+
processing_time = end_time - start_time
|
76 |
+
self.metrics["processing_times"].append(processing_time)
|
77 |
+
|
78 |
+
def log_confidence_score(self, score):
|
79 |
+
"""
|
80 |
+
Log a confidence score.
|
81 |
+
|
82 |
+
Args:
|
83 |
+
score (float): Confidence score between 0 and 1
|
84 |
+
"""
|
85 |
+
# Ensure score is a float between 0 and 1
|
86 |
+
try:
|
87 |
+
score = float(score)
|
88 |
+
if 0 <= score <= 1:
|
89 |
+
self.metrics["confidence_scores"].append(score)
|
90 |
+
except (ValueError, TypeError):
|
91 |
+
logger.warning(f"Invalid confidence score: {score}")
|
92 |
+
|
93 |
+
def log_temporal_relevance(self, relevance_score):
|
94 |
+
"""
|
95 |
+
Log a temporal relevance score.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
relevance_score (float): Temporal relevance score between 0 and 1
|
99 |
+
"""
|
100 |
+
# Ensure relevance score is a float between 0 and 1
|
101 |
+
try:
|
102 |
+
relevance_score = float(relevance_score)
|
103 |
+
if 0 <= relevance_score <= 1:
|
104 |
+
self.metrics["temporal_relevance"].append(relevance_score)
|
105 |
+
except (ValueError, TypeError):
|
106 |
+
logger.warning(f"Invalid temporal relevance score: {relevance_score}")
|
107 |
+
|
108 |
+
def get_summary(self):
|
109 |
+
"""
|
110 |
+
Get a summary of all performance metrics.
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
dict: Summary of performance metrics
|
114 |
+
"""
|
115 |
+
# Safely calculate averages with error handling
|
116 |
+
def safe_avg(metric_list):
|
117 |
+
try:
|
118 |
+
return sum(metric_list) / max(len(metric_list), 1)
|
119 |
+
except (TypeError, ValueError):
|
120 |
+
return 0.0
|
121 |
+
|
122 |
+
return {
|
123 |
+
"claims_processed": self.metrics["claims_processed"],
|
124 |
+
"avg_evidence_retrieval_success_rate": safe_avg(self.metrics["evidence_retrieval_success_rate"]),
|
125 |
+
"avg_processing_time": safe_avg(self.metrics["processing_times"]),
|
126 |
+
"avg_confidence_score": safe_avg(self.metrics["confidence_scores"]),
|
127 |
+
"source_types_used": dict(self.metrics["source_types_used"]),
|
128 |
+
"avg_temporal_relevance": safe_avg(self.metrics["temporal_relevance"])
|
129 |
+
}
|
130 |
+
|
131 |
+
def reset(self):
|
132 |
+
"""Reset all performance metrics."""
|
133 |
+
self.__init__()
|
134 |
+
logger.info("Performance metrics have been reset")
|
135 |
+
return "Performance metrics reset successfully"
|