Spaces:

pritamdeka
/

Biomedical-Fact-Checker

Sleeping

App Files Files Community

pritamdeka commited on Jul 4

Commit

965aebe

verified ·

1 Parent(s): f93074a

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -21

app.py CHANGED Viewed

@@ -16,9 +16,16 @@ import torch
 PUBMED_N = 100    # Number of abstracts to retrieve initially
 TOP_ABSTRACTS = 10  # Number of top semantic abstracts to keep per claim
 NLI_MODEL_NAME = "pritamdeka/PubMedBERT-MNLI-MedNLI"
-SBERT_MODEL_NAME = "pritamdeka/S-BioBert-snli-multinli-stsb"
 NLI_LABELS = ['CONTRADICTION', 'NEUTRAL', 'ENTAILMENT']
 # --------- Indicator Phrases for Claim Extraction ---------
 indicator_phrases = [
     "found that", "findings suggest", "shows that", "showed that", "demonstrated", "demonstrates",
@@ -48,16 +55,7 @@ indicator_phrases = [
 nli_tokenizer = AutoTokenizer.from_pretrained(NLI_MODEL_NAME)
 nli_model = AutoModelForSequenceClassification.from_pretrained(NLI_MODEL_NAME)
 sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
-# --- Load fast Llama-3.2-1B-Instruct summarizer pipeline ---
-model_id = "meta-llama/Llama-3.2-1B-Instruct"
-pipe = pipeline(
-    "text-generation",
-    model=model_id,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto",
-    max_new_tokens=128,
-)
 def extract_claims_pattern(article_text):
     sentences = sent_tokenize(article_text)
@@ -130,11 +128,29 @@ def extract_evidence_nli(claim, title, abstract):
         })
     return evidence
-def summarize_evidence_llm(claim, evidence_list):
     support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
     contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
-    # Compose prompt for summarization.
     messages = [
         {"role": "system", "content": "You are a helpful biomedical assistant. Summarize scientific evidence in plain English for the general public."},
         {"role": "user", "content":
@@ -145,6 +161,7 @@ def summarize_evidence_llm(claim, evidence_list):
         }
     ]
     try:
         outputs = pipe(
             messages,
             max_new_tokens=96,
@@ -152,7 +169,6 @@ def summarize_evidence_llm(claim, evidence_list):
             temperature=0.1,
         )
         out = outputs[0]["generated_text"]
-        # If the model returns all messages, just take the last message (often the answer).
         if isinstance(out, list) and "content" in out[-1]:
             return out[-1]["content"].strip()
         return out.strip()
@@ -171,7 +187,7 @@ def format_evidence_html(evidence_list):
         )
     return html
-def factcheck_app(article_url):
     try:
         art = Article(article_url)
         art.download()
@@ -204,7 +220,7 @@ def factcheck_app(article_url):
             control_ev = extract_evidence_nli(claim, titles[idx_non_top], abstracts[idx_non_top])
             evidence_results.append({"title": f"(Control) {titles[idx_non_top]}", "evidence": control_ev})
         all_evidence_sentences = [ev for abs_res in evidence_results for ev in abs_res["evidence"]]
-        summary = summarize_evidence_llm(claim, all_evidence_sentences)
         results_html += f"<hr><b>Claim:</b> {claim}<br><b>Layman summary:</b> {summary}<br>"
         for abs_res in evidence_results:
             results_html += f"<br><b>Abstract:</b> {abs_res['title']}<br>{format_evidence_html(abs_res['evidence'])}"
@@ -216,8 +232,9 @@ description = """
 This app extracts key scientific claims from a news article, finds the most relevant PubMed biomedical research papers, checks which sentences in those papers support or contradict each claim, and gives you a plain-English summary verdict.<br><br>
 <b>How to use it:</b><br>
 1. Paste the link to a biomedical news article.<br>
-2. Wait for the results.<br>
-3. For each claim, you will see:<br>
 - A plain summary of what research says.<br>
 - Color-coded evidence sentences (green=support, red=contradict, gray=neutral).<br>
 - The titles of the most relevant PubMed articles.<br><br>
@@ -226,11 +243,18 @@ This app extracts key scientific claims from a news article, finds the most rele
 iface = gr.Interface(
     fn=factcheck_app,
-    inputs=gr.Textbox(lines=2, label="Paste a news article URL"),
     outputs=[gr.HTML(label="Fact-Check Results (Summary & Evidence)"), gr.JSON(label="All Results (JSON)")],
     title="BioMedical News Fact-Checking & Research Evidence Finder",
     description=description,
-    examples=[["https://www.medicalnewstoday.com/articles/omicron-what-do-we-know-about-the-stealth-variant"]],
     allow_flagging="never"
 )

 PUBMED_N = 100    # Number of abstracts to retrieve initially
 TOP_ABSTRACTS = 10  # Number of top semantic abstracts to keep per claim
 NLI_MODEL_NAME = "pritamdeka/PubMedBERT-MNLI-MedNLI"
+SBERT_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
 NLI_LABELS = ['CONTRADICTION', 'NEUTRAL', 'ENTAILMENT']
+# --------- Summarizer model options ---------
+model_options = {
+    "Llama-3.2-1B-Instruct (Meta, gated)": "meta-llama/Llama-3.2-1B-Instruct",
+    "Gemma-3-1B-it (Google, gated)": "google/gemma-3-1b-it",
+    "TinyLlama-1.1B-Chat (Open)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+}
 # --------- Indicator Phrases for Claim Extraction ---------
 indicator_phrases = [
     "found that", "findings suggest", "shows that", "showed that", "demonstrated", "demonstrates",
 nli_tokenizer = AutoTokenizer.from_pretrained(NLI_MODEL_NAME)
 nli_model = AutoModelForSequenceClassification.from_pretrained(NLI_MODEL_NAME)
 sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
+pipe_cache = {}  # cache summarization pipelines
 def extract_claims_pattern(article_text):
     sentences = sent_tokenize(article_text)
         })
     return evidence
+def get_summarizer(model_choice):
+    model_id = model_options[model_choice]
+    if model_id in pipe_cache:
+        return pipe_cache[model_id]
+    kwargs = {
+        "model": model_id,
+        "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+        "device_map": "auto",
+        "max_new_tokens": 128
+    }
+    # Add token for gated models (Gemma, Llama)
+    if any(gated in model_id for gated in ["meta-llama", "gemma"]):
+        hf_token = os.environ.get("HF_TOKEN", None)
+        if hf_token:
+            kwargs["token"] = hf_token
+        else:
+            raise RuntimeError(f"Model '{model_choice}' requires a Hugging Face access token. Please set 'HF_TOKEN' as a Space secret or environment variable.")
+    pipe_cache[model_id] = pipeline("text-generation", **kwargs)
+    return pipe_cache[model_id]
+def summarize_evidence_llm(claim, evidence_list, model_choice):
     support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
     contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
     messages = [
         {"role": "system", "content": "You are a helpful biomedical assistant. Summarize scientific evidence in plain English for the general public."},
         {"role": "user", "content":
         }
     ]
     try:
+        pipe = get_summarizer(model_choice)
         outputs = pipe(
             messages,
             max_new_tokens=96,
             temperature=0.1,
         )
         out = outputs[0]["generated_text"]
         if isinstance(out, list) and "content" in out[-1]:
             return out[-1]["content"].strip()
         return out.strip()
         )
     return html
+def factcheck_app(article_url, model_choice):
     try:
         art = Article(article_url)
         art.download()
             control_ev = extract_evidence_nli(claim, titles[idx_non_top], abstracts[idx_non_top])
             evidence_results.append({"title": f"(Control) {titles[idx_non_top]}", "evidence": control_ev})
         all_evidence_sentences = [ev for abs_res in evidence_results for ev in abs_res["evidence"]]
+        summary = summarize_evidence_llm(claim, all_evidence_sentences, model_choice)
         results_html += f"<hr><b>Claim:</b> {claim}<br><b>Layman summary:</b> {summary}<br>"
         for abs_res in evidence_results:
             results_html += f"<br><b>Abstract:</b> {abs_res['title']}<br>{format_evidence_html(abs_res['evidence'])}"
 This app extracts key scientific claims from a news article, finds the most relevant PubMed biomedical research papers, checks which sentences in those papers support or contradict each claim, and gives you a plain-English summary verdict.<br><br>
 <b>How to use it:</b><br>
 1. Paste the link to a biomedical news article.<br>
+2. Choose an AI summarizer model below. If you have no special access, use 'TinyLlama' (works for everyone).<br>
+3. Wait for the results.<br>
+4. For each claim, you will see:<br>
 - A plain summary of what research says.<br>
 - Color-coded evidence sentences (green=support, red=contradict, gray=neutral).<br>
 - The titles of the most relevant PubMed articles.<br><br>
 iface = gr.Interface(
     fn=factcheck_app,
+    inputs=[
+        gr.Textbox(lines=2, label="Paste a news article URL"),
+        gr.Dropdown(
+            choices=list(model_options.keys()),
+            value="TinyLlama-1.1B-Chat (Open)",
+            label="Choose summarizer model"
+        )
+    ],
     outputs=[gr.HTML(label="Fact-Check Results (Summary & Evidence)"), gr.JSON(label="All Results (JSON)")],
     title="BioMedical News Fact-Checking & Research Evidence Finder",
     description=description,
+    examples=[["https://www.medicalnewstoday.com/articles/omicron-what-do-we-know-about-the-stealth-variant", "TinyLlama-1.1B-Chat (Open)"]],
     allow_flagging="never"
 )