rahideer commited on
Commit
7bf94f0
Β·
verified Β·
1 Parent(s): b0efa4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -8,21 +8,29 @@ import streamlit as st
8
 
9
  st.set_page_config(page_title="News Fact Checker", page_icon="πŸ“°")
10
 
11
- # Step 1: Unzip dataset (only once)
12
  @st.cache_data
13
  def extract_dataset():
14
  zip_path = "climate.zip"
15
- extract_dir = "climate"
16
 
17
- if not os.path.exists(os.path.join(extract_dir, "ag_news_csv", "train.csv")):
18
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
19
  zip_ref.extractall(extract_dir)
20
 
21
- df = pd.read_csv(os.path.join(extract_dir, "ag_news_csv", "train.csv"), header=None, names=["label", "title", "description"])
22
- df["text"] = df["title"] + ". " + df["description"]
23
- return df.head(1000) # Sample only top 1000 rows
24
 
25
- # Step 2: Load embedding model + summarizer
 
 
 
 
 
 
 
 
 
 
26
  @st.cache_resource
27
  def load_models():
28
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
@@ -30,16 +38,16 @@ def load_models():
30
  return embedder, summarizer
31
 
32
  st.title("πŸ“° News Fact Checker")
33
- st.markdown("Enter a news-related **claim** and get back a summary based on real climate news articles to help verify it.")
34
 
35
- # Step 3: UI
36
- claim = st.text_input("πŸ” Enter your claim here:")
37
  data = extract_dataset()
38
  embedder, summarizer = load_models()
39
 
40
- # Step 4: Process and return result
41
  if claim:
42
- with st.spinner("πŸ” Searching relevant news..."):
43
  corpus = data["text"].tolist()
44
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
45
  query_embedding = embedder.encode(claim, convert_to_tensor=True)
@@ -53,9 +61,9 @@ if claim:
53
 
54
  summary = summarizer(combined, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
55
 
56
- st.markdown("### βœ… Fact-Checked Summary")
57
  st.success(summary)
58
 
59
- with st.expander("πŸ”Ž Top Relevant News Passages"):
60
  for i, passage in enumerate(top_passages, 1):
61
  st.markdown(f"**Snippet {i}:** {passage}")
 
8
 
9
  st.set_page_config(page_title="News Fact Checker", page_icon="πŸ“°")
10
 
11
+ # Step 1: Unzip dataset
12
  @st.cache_data
13
  def extract_dataset():
14
  zip_path = "climate.zip"
15
+ extract_dir = "climate_extracted"
16
 
17
+ if not os.path.exists(extract_dir):
18
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
19
  zip_ref.extractall(extract_dir)
20
 
21
+ train_path = os.path.join(extract_dir, "climate", "train")
 
 
22
 
23
+ # Try CSV or TSV format detection
24
+ try:
25
+ df = pd.read_csv(train_path, header=None)
26
+ except:
27
+ df = pd.read_csv(train_path, sep='\t', header=None)
28
+
29
+ df.columns = ["label", "title", "description"]
30
+ df["text"] = df["title"].astype(str) + ". " + df["description"].astype(str)
31
+ return df.head(1000)
32
+
33
+ # Step 2: Load models
34
  @st.cache_resource
35
  def load_models():
36
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
 
38
  return embedder, summarizer
39
 
40
  st.title("πŸ“° News Fact Checker")
41
+ st.markdown("Enter a **claim** about climate or news events. We'll pull relevant facts from real news and summarize them for you.")
42
 
43
+ # Step 3: User input
44
+ claim = st.text_input("πŸ” Enter your claim:")
45
  data = extract_dataset()
46
  embedder, summarizer = load_models()
47
 
48
+ # Step 4: Fact checking
49
  if claim:
50
+ with st.spinner("Searching news..."):
51
  corpus = data["text"].tolist()
52
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
53
  query_embedding = embedder.encode(claim, convert_to_tensor=True)
 
61
 
62
  summary = summarizer(combined, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
63
 
64
+ st.markdown("### βœ… Summary Based on News")
65
  st.success(summary)
66
 
67
+ with st.expander("πŸ”Ž View Related News Snippets"):
68
  for i, passage in enumerate(top_passages, 1):
69
  st.markdown(f"**Snippet {i}:** {passage}")