Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,21 +8,29 @@ import streamlit as st
|
|
8 |
|
9 |
st.set_page_config(page_title="News Fact Checker", page_icon="π°")
|
10 |
|
11 |
-
# Step 1: Unzip dataset
|
12 |
@st.cache_data
|
13 |
def extract_dataset():
|
14 |
zip_path = "climate.zip"
|
15 |
-
extract_dir = "
|
16 |
|
17 |
-
if not os.path.exists(
|
18 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
19 |
zip_ref.extractall(extract_dir)
|
20 |
|
21 |
-
|
22 |
-
df["text"] = df["title"] + ". " + df["description"]
|
23 |
-
return df.head(1000) # Sample only top 1000 rows
|
24 |
|
25 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
@st.cache_resource
|
27 |
def load_models():
|
28 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
@@ -30,16 +38,16 @@ def load_models():
|
|
30 |
return embedder, summarizer
|
31 |
|
32 |
st.title("π° News Fact Checker")
|
33 |
-
st.markdown("Enter a
|
34 |
|
35 |
-
# Step 3:
|
36 |
-
claim = st.text_input("π Enter your claim
|
37 |
data = extract_dataset()
|
38 |
embedder, summarizer = load_models()
|
39 |
|
40 |
-
# Step 4:
|
41 |
if claim:
|
42 |
-
with st.spinner("
|
43 |
corpus = data["text"].tolist()
|
44 |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
|
45 |
query_embedding = embedder.encode(claim, convert_to_tensor=True)
|
@@ -53,9 +61,9 @@ if claim:
|
|
53 |
|
54 |
summary = summarizer(combined, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
|
55 |
|
56 |
-
st.markdown("### β
|
57 |
st.success(summary)
|
58 |
|
59 |
-
with st.expander("π
|
60 |
for i, passage in enumerate(top_passages, 1):
|
61 |
st.markdown(f"**Snippet {i}:** {passage}")
|
|
|
8 |
|
9 |
st.set_page_config(page_title="News Fact Checker", page_icon="π°")
|
10 |
|
11 |
+
# Step 1: Unzip dataset
|
12 |
@st.cache_data
|
13 |
def extract_dataset():
|
14 |
zip_path = "climate.zip"
|
15 |
+
extract_dir = "climate_extracted"
|
16 |
|
17 |
+
if not os.path.exists(extract_dir):
|
18 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
19 |
zip_ref.extractall(extract_dir)
|
20 |
|
21 |
+
train_path = os.path.join(extract_dir, "climate", "train")
|
|
|
|
|
22 |
|
23 |
+
# Try CSV or TSV format detection
|
24 |
+
try:
|
25 |
+
df = pd.read_csv(train_path, header=None)
|
26 |
+
except:
|
27 |
+
df = pd.read_csv(train_path, sep='\t', header=None)
|
28 |
+
|
29 |
+
df.columns = ["label", "title", "description"]
|
30 |
+
df["text"] = df["title"].astype(str) + ". " + df["description"].astype(str)
|
31 |
+
return df.head(1000)
|
32 |
+
|
33 |
+
# Step 2: Load models
|
34 |
@st.cache_resource
|
35 |
def load_models():
|
36 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
38 |
return embedder, summarizer
|
39 |
|
40 |
st.title("π° News Fact Checker")
|
41 |
+
st.markdown("Enter a **claim** about climate or news events. We'll pull relevant facts from real news and summarize them for you.")
|
42 |
|
43 |
+
# Step 3: User input
|
44 |
+
claim = st.text_input("π Enter your claim:")
|
45 |
data = extract_dataset()
|
46 |
embedder, summarizer = load_models()
|
47 |
|
48 |
+
# Step 4: Fact checking
|
49 |
if claim:
|
50 |
+
with st.spinner("Searching news..."):
|
51 |
corpus = data["text"].tolist()
|
52 |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
|
53 |
query_embedding = embedder.encode(claim, convert_to_tensor=True)
|
|
|
61 |
|
62 |
summary = summarizer(combined, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
|
63 |
|
64 |
+
st.markdown("### β
Summary Based on News")
|
65 |
st.success(summary)
|
66 |
|
67 |
+
with st.expander("π View Related News Snippets"):
|
68 |
for i, passage in enumerate(top_passages, 1):
|
69 |
st.markdown(f"**Snippet {i}:** {passage}")
|