ankanghosh commited on
Commit
674908b
·
verified ·
1 Parent(s): 8983168

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -526
app.py DELETED
@@ -1,526 +0,0 @@
1
- """
2
- Main Streamlit application for the Fake News Detector.
3
-
4
- This module implements the user interface for claim verification,
5
- rendering the results and handling user interactions. It also
6
- manages the application lifecycle including initialization and cleanup.
7
- """
8
-
9
- import streamlit as st
10
- import time
11
- import json
12
- import os
13
- import logging
14
- import atexit
15
- import sys
16
- from pathlib import Path
17
-
18
- # Configure logging first, before other imports
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
22
- handlers=[logging.StreamHandler()]
23
- )
24
- logger = logging.getLogger("misinformation_detector")
25
-
26
- # Check for critical environment variables
27
- if not os.environ.get("OPENAI_API_KEY"):
28
- logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
29
-
30
- # Import our modules
31
- from utils.models import initialize_models
32
- from utils.performance import PerformanceTracker
33
-
34
- # Import agent functionality
35
- import agent
36
-
37
- # Initialize performance tracker
38
- performance_tracker = PerformanceTracker()
39
-
40
- # Ensure data directory exists
41
- data_dir = Path("data")
42
- if not data_dir.exists():
43
- logger.info("Creating data directory")
44
- data_dir.mkdir(exist_ok=True)
45
-
46
- # Set page configuration
47
- st.set_page_config(
48
- page_title="AskVeracity",
49
- page_icon="🔍",
50
- layout="wide",
51
- )
52
-
53
- # Hide the "Press ⌘+Enter to apply" text with CSS
54
- st.markdown("""
55
- <style>
56
- /* Hide the shortcut text that appears at the bottom of text areas */
57
- .stTextArea div:has(textarea) + div {
58
- visibility: hidden !important;
59
- height: 0px !important;
60
- position: absolute !important;
61
- }
62
- </style>
63
- """, unsafe_allow_html=True)
64
-
65
- @st.cache_resource
66
- def get_agent():
67
- """
68
- Initialize and cache the agent for reuse across requests.
69
-
70
- This function creates and caches the fact-checking agent to avoid
71
- recreating it for every request. It's decorated with st.cache_resource
72
- to ensure the agent is only initialized once per session.
73
-
74
- Returns:
75
- object: Initialized LangGraph agent for fact checking
76
- """
77
- logger.info("Initializing models and agent (cached)")
78
- initialize_models()
79
- return agent.setup_agent()
80
-
81
- def cleanup_resources():
82
- """
83
- Clean up resources when app is closed.
84
-
85
- This function is registered with atexit to ensure resources
86
- are properly released when the application terminates.
87
- """
88
- try:
89
- # Clear any cached data
90
- st.cache_data.clear()
91
-
92
- # Reset performance tracker
93
- performance_tracker.reset()
94
-
95
- # Log cleanup
96
- logger.info("Resources cleaned up successfully")
97
- except Exception as e:
98
- logger.error(f"Error during cleanup: {e}")
99
-
100
- # Register cleanup handler
101
- atexit.register(cleanup_resources)
102
-
103
- # App title and description
104
- st.title("🔍 AskVeracity")
105
- st.markdown("""
106
- This is a simple AI-powered agentic tool - a fact-checking system that analyzes claims to determine
107
- their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
108
- news outlets, and academic repositories. The application aims to support broader efforts in misinformation detection.
109
- """)
110
-
111
- # Sidebar with app information
112
- with st.sidebar:
113
- st.header("About")
114
- st.info(
115
- "This system uses a combination of NLP techniques and LLMs to "
116
- "extract claims, gather evidence, and classify the truthfulness of statements.\n\n"
117
- "**Technical:** Built with Python, Streamlit, LangGraph, and OpenAI, leveraging spaCy for NLP and various APIs for retrieving evidence from diverse sources."
118
- )
119
-
120
- # Application information
121
- st.markdown("### How It Works")
122
- st.info(
123
- "1. Enter any recent news or a factual claim\n"
124
- "2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
125
- "3. The system analyzes the evidence to determine truthfulness\n"
126
- "4. Results show the verdict with supporting evidence"
127
- )
128
-
129
- # Our Mission
130
- st.markdown("### Our Mission")
131
- st.info(
132
- "AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
133
- "We believe in empowering people with factual information to make informed decisions."
134
- )
135
-
136
- # Limitations and Usage
137
- st.markdown("### Limitations")
138
- st.warning(
139
- "Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
140
- "Performance is typically best with widely-reported news and information published within the last 48 hours. "
141
- "Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
142
- "may be judged false if circumstances have changed, and vice versa."
143
- )
144
-
145
- # Best Practices
146
- st.markdown("### Best Practices")
147
- st.success(
148
- "For optimal results:\n\n"
149
- "• Keep claims short and precise\n\n"
150
- "• Include key details in your claim\n\n"
151
- "• Phrase claims as direct statements rather than questions\n\n"
152
- "• Be specific about who said what"
153
- )
154
-
155
- # Example comparison
156
- with st.expander("📝 Examples of Effective Claims"):
157
- st.markdown("""
158
- **Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
159
-
160
- **More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
161
- """)
162
-
163
- # Important Notes
164
- st.markdown("### Important Notes")
165
- st.info(
166
- "• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
167
- "• Results can vary based on available evidence and LLM behavior\n\n"
168
- "• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
169
- "• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
170
- "• We recommend cross-verifying critical information with additional sources"
171
- )
172
-
173
- # Privacy Information
174
- st.markdown("### Data Privacy")
175
- st.info(
176
- "We do not collect or store any data about the claims you submit. "
177
- "Your interactions are processed by OpenAI's API. Please refer to "
178
- "[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
179
- )
180
-
181
- # Feedback Section
182
- st.markdown("### Feedback")
183
- st.success(
184
- "AskVeracity is evolving and we welcome your feedback to help us improve. "
185
- "Please reach out to us with questions, suggestions, or concerns."
186
- )
187
-
188
- # Initialize session state variables
189
- if 'processing' not in st.session_state:
190
- st.session_state.processing = False
191
- if 'claim_to_process' not in st.session_state:
192
- st.session_state.claim_to_process = ""
193
- if 'has_result' not in st.session_state:
194
- st.session_state.has_result = False
195
- if 'result' not in st.session_state:
196
- st.session_state.result = None
197
- if 'total_time' not in st.session_state:
198
- st.session_state.total_time = 0
199
- if 'fresh_state' not in st.session_state:
200
- st.session_state.fresh_state = True
201
- # Initialize verify button disabled state
202
- if 'verify_btn_disabled' not in st.session_state:
203
- st.session_state.verify_btn_disabled = False
204
- # Add a flag to clear the input field
205
- if 'clear_form' not in st.session_state:
206
- st.session_state.clear_form = False
207
-
208
- # Main interface
209
- st.markdown("### Enter a claim to verify")
210
-
211
- # Input area
212
- claim_input = st.text_area("",
213
- height=100,
214
- placeholder=(
215
- "Examples: The Eiffel Tower is located in Rome, Italy. "
216
- "Meta recently released its Llama 4 large language model. "
217
- "Justin Trudeau is not the Canadian Prime Minister anymore. "
218
- "China retaliated with 125% tariffs against U.S. imports. "
219
- "A recent piece of news."
220
- ),
221
- key="claim_input_area",
222
- value="" if st.session_state.clear_form else None, # Clear if flag is set
223
- label_visibility="collapsed",
224
- max_chars=None)
225
-
226
- # Reset the clear_form flag after using it
227
- if st.session_state.clear_form:
228
- st.session_state.clear_form = False
229
-
230
- # Information about result variability
231
- st.caption("""
232
- 💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
233
- - Incorporate the most recent evidence available
234
- - Benefit from the AI's ability to consider multiple perspectives
235
- - Adapt to evolving information landscapes
236
- """)
237
-
238
- st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.")
239
-
240
- # Create a clean interface based on state
241
- if st.session_state.fresh_state:
242
- # Only show the verify button in fresh state
243
- verify_button = st.button(
244
- "Verify Claim",
245
- type="primary",
246
- key="verify_btn"
247
- )
248
-
249
- # When button is clicked and not already processing
250
- if verify_button and not st.session_state.processing:
251
- if not claim_input:
252
- st.error("Please enter a claim to verify.")
253
- else:
254
- # Store the claim and set processing state
255
- st.session_state.claim_to_process = claim_input
256
- st.session_state.processing = True
257
- st.session_state.fresh_state = False
258
- # Force a rerun to refresh UI
259
- st.rerun()
260
-
261
- else:
262
- # This is either during processing or showing results
263
-
264
- # Create a container for processing and results
265
- analysis_container = st.container()
266
-
267
- with analysis_container:
268
- # If we're processing, show the processing UI
269
- if st.session_state.processing:
270
- st.subheader("🔄 Processing...")
271
- status = st.empty()
272
- status.text("Verifying claim... (this may take a while)")
273
- progress_bar = st.progress(0)
274
-
275
- # Initialize models and agent if needed
276
- if not hasattr(st.session_state, 'agent_initialized'):
277
- with st.spinner("Initializing system..."):
278
- st.session_state.agent = get_agent()
279
- st.session_state.agent_initialized = True
280
-
281
- try:
282
- # Use the stored claim for processing
283
- claim_to_process = st.session_state.claim_to_process
284
-
285
- # Process the claim with the agent
286
- start_time = time.time()
287
- result = agent.process_claim(claim_to_process, st.session_state.agent)
288
- total_time = time.time() - start_time
289
-
290
- # Update progress as claim processing completes
291
- progress_bar.progress(100)
292
-
293
- # Check for None result
294
- if result is None:
295
- st.error("Failed to process the claim. Please try again.")
296
- st.session_state.processing = False
297
- st.session_state.fresh_state = True
298
- else:
299
- # If result exists but key values are missing, provide default values
300
- if "classification" not in result or result["classification"] is None:
301
- result["classification"] = "Uncertain"
302
-
303
- if "confidence" not in result or result["confidence"] is None:
304
- result["confidence"] = 0.6 # Default to 0.6 instead of 0.0
305
-
306
- if "explanation" not in result or result["explanation"] is None:
307
- result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
308
-
309
- # Update result with timing information
310
- if "processing_times" not in result:
311
- result["processing_times"] = {"total": total_time}
312
-
313
- # Store the result and timing information
314
- st.session_state.result = result
315
- st.session_state.total_time = total_time
316
- st.session_state.has_result = True
317
- st.session_state.processing = False
318
-
319
- # Clear processing indicators before showing results
320
- status.empty()
321
- progress_bar.empty()
322
-
323
- # Force rerun to display results
324
- st.rerun()
325
-
326
- except Exception as e:
327
- # Handle any exceptions and reset processing state
328
- logger.error(f"Error during claim processing: {str(e)}")
329
- st.error(f"An error occurred: {str(e)}")
330
- st.session_state.processing = False
331
- st.session_state.fresh_state = True
332
- # Force rerun to re-enable button
333
- st.rerun()
334
-
335
- # Display results if available
336
- elif st.session_state.has_result and st.session_state.result:
337
- result = st.session_state.result
338
- total_time = st.session_state.total_time
339
- claim_to_process = st.session_state.claim_to_process
340
-
341
- st.subheader("📊 Verification Results")
342
-
343
- result_col1, result_col2 = st.columns([2, 1])
344
-
345
- with result_col1:
346
- # Display both original and processed claim if they differ
347
- if "claim" in result and result["claim"] and result["claim"] != claim_to_process:
348
- st.markdown(f"**Original Claim:** {claim_to_process}")
349
- st.markdown(f"**Processed Claim:** {result['claim']}")
350
- else:
351
- st.markdown(f"**Claim:** {claim_to_process}")
352
-
353
- # Make verdict colorful based on classification
354
- truth_label = result.get('classification', 'Uncertain')
355
- if truth_label and "True" in truth_label:
356
- verdict_color = "green"
357
- elif truth_label and "False" in truth_label:
358
- verdict_color = "red"
359
- else:
360
- verdict_color = "gray"
361
-
362
- st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
363
-
364
- # Ensure confidence value is used
365
- if "confidence" in result and result["confidence"] is not None:
366
- confidence_value = result["confidence"]
367
- # Make sure confidence is a numeric value between 0 and 1
368
- try:
369
- confidence_value = float(confidence_value)
370
- if confidence_value < 0:
371
- confidence_value = 0.0
372
- elif confidence_value > 1:
373
- confidence_value = 1.0
374
- except (ValueError, TypeError):
375
- confidence_value = 0.6 # Fallback to reasonable default
376
- else:
377
- confidence_value = 0.6 # Default confidence
378
-
379
- # Display the confidence
380
- st.markdown(f"**Confidence:** {confidence_value:.2%}")
381
- st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
382
-
383
- # Add disclaimer about cross-verification
384
- st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
385
-
386
- with result_col2:
387
- st.markdown("**Processing Time**")
388
- times = result.get("processing_times", {"total": total_time})
389
- st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
390
-
391
- # Show agent thoughts
392
- if "thoughts" in result and result["thoughts"]:
393
- st.markdown("**AI Reasoning Process**")
394
- thoughts = result.get("thoughts", [])
395
- for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts
396
- st.markdown(f"{i+1}. {thought}")
397
- if len(thoughts) > 5:
398
- with st.expander("Show all reasoning steps"):
399
- for i, thought in enumerate(thoughts):
400
- st.markdown(f"{i+1}. {thought}")
401
-
402
- # Display evidence
403
- st.subheader("📝 Evidence")
404
- evidence_count = result.get("evidence_count", 0)
405
- evidence = result.get("evidence", [])
406
-
407
- # Ensure evidence is a list
408
- if not isinstance(evidence, list):
409
- if isinstance(evidence, str):
410
- # Try to parse string as a list
411
- try:
412
- import ast
413
- parsed_evidence = ast.literal_eval(evidence)
414
- if isinstance(parsed_evidence, list):
415
- evidence = parsed_evidence
416
- else:
417
- evidence = [evidence]
418
- except:
419
- evidence = [evidence]
420
- else:
421
- evidence = [str(evidence)] if evidence else []
422
-
423
- # Update evidence count based on actual evidence list
424
- evidence_count = len(evidence)
425
-
426
- # Check for empty evidence
427
- if evidence_count == 0 or not any(ev for ev in evidence if ev):
428
- st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
429
- else:
430
- st.markdown(f"Retrieved {evidence_count} pieces of evidence")
431
-
432
- # Get classification results
433
- classification_results = result.get("classification_results", [])
434
-
435
- # Only show evidence tabs if we have evidence
436
- if evidence and any(ev for ev in evidence if ev):
437
- # Create tabs for different evidence categories
438
- evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"])
439
-
440
- with evidence_tabs[0]:
441
- for i, ev in enumerate(evidence):
442
- if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence
443
- with st.expander(f"Evidence {i+1}", expanded=i==0):
444
- st.text(ev)
445
-
446
- with evidence_tabs[1]:
447
- if classification_results:
448
- # Check if classification_results items have the expected format
449
- valid_results = []
450
- for res in classification_results:
451
- if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res:
452
- if res.get("evidence"): # Only include results with actual evidence
453
- valid_results.append(res)
454
-
455
- if valid_results:
456
- sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True)
457
- top_results = sorted_results[:min(3, len(sorted_results))]
458
-
459
- for i, res in enumerate(top_results):
460
- with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0):
461
- st.text(res.get("evidence", "No evidence text available"))
462
- st.markdown(f"**Classification:** {res.get('label', 'unknown')}")
463
- else:
464
- # If no valid results, just show the evidence
465
- shown = False
466
- for i, ev in enumerate(evidence[:3]):
467
- if ev and isinstance(ev, str) and ev.strip():
468
- with st.expander(f"Evidence {i+1}", expanded=i==0):
469
- st.text(ev)
470
- shown = True
471
- if not shown:
472
- st.info("No detailed classification results available.")
473
- else:
474
- # Just show regular evidence if no classification details
475
- shown = False
476
- for i, ev in enumerate(evidence[:3]):
477
- if ev and isinstance(ev, str) and ev.strip():
478
- with st.expander(f"Evidence {i+1}", expanded=i==0):
479
- st.text(ev)
480
- shown = True
481
- if not shown:
482
- st.info("No detailed classification results available.")
483
-
484
- with evidence_tabs[2]:
485
- evidence_sources = {}
486
- for ev in evidence:
487
- if not ev or not isinstance(ev, str):
488
- continue
489
-
490
- source = "Unknown"
491
- # Extract source info from evidence text
492
- if "URL:" in ev:
493
- import re
494
- url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
495
- if url_match:
496
- source = url_match.group(1)
497
-
498
- if source in evidence_sources:
499
- evidence_sources[source] += 1
500
- else:
501
- evidence_sources[source] = 1
502
-
503
- # Display evidence source distribution
504
- if evidence_sources:
505
- st.markdown("**Evidence Source Distribution**")
506
- for source, count in evidence_sources.items():
507
- st.markdown(f"- {source}: {count} item(s)")
508
- else:
509
- st.info("No source information available in the evidence.")
510
- else:
511
- st.warning("No evidence was retrieved for this claim.")
512
-
513
- # Button to start a new verification
514
- if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
515
- st.session_state.fresh_state = True
516
- st.session_state.has_result = False
517
- st.session_state.result = None
518
- st.session_state.clear_form = True # Set flag to clear the form on next rerun
519
- st.rerun()
520
-
521
- # Footer with additional information
522
- st.markdown("---")
523
- st.caption("""
524
- **AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
525
- While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
526
- """)