Spaces:

pritamdeka
/

Biomedical-Fact-Checker

Sleeping

App Files Files Community

pritamdeka commited on Jul 4

Commit

2ff0c49

verified ·

1 Parent(s): d5d1153

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -22

app.py CHANGED Viewed

@@ -52,28 +52,23 @@ def get_keybert_query(text, top_n=10):
     return query
 # --- PubMed retrieval ---
-def retrieve_pubmed_abstracts_simple(text, n=PUBMED_N, fallback_headline=None):
-    query = get_keybert_query(text, top_n=10)
-    print("PubMed Query:", query)
     ncbi_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
-    for q in [query, fallback_headline, text]:
-        if not q:
-            continue
-        search_url = f"{ncbi_url}esearch.fcgi?db=pubmed&term={q}&retmax={n}&sort=relevance"
-        r = requests.get(search_url)
-        pmids = re.findall(r"<Id>(\d+)</Id>", r.text)
-        if pmids:
-            ids = ','.join(pmids)
-            fetch_url = f"{ncbi_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml&retmax={n}"
-            resp = requests.get(fetch_url)
-            titles = re.findall(r"<ArticleTitle>(.*?)</ArticleTitle>", resp.text, flags=re.DOTALL)
-            abstracts = re.findall(r"<AbstractText.*?>(.*?)</AbstractText>", resp.text, flags=re.DOTALL)
-            if not abstracts:
-                abstracts = [""] * len(titles)
-            titles = [re.sub(r"\s+", " ", t).strip() for t in titles]
-            abstracts = [re.sub(r"\s+", " ", a).strip() for a in abstracts]
-            return titles, abstracts
-    return [], []
 # --- Claim extraction ---
 indicator_phrases = [
@@ -189,7 +184,7 @@ def summarize_evidence_llm(claim, evidence_list, model_choice):
         pipe = get_summarizer(model_choice)
         outputs = pipe(
             messages,
-            max_new_tokens=96,
             do_sample=False,
             temperature=0.1,
         )

     return query
 # --- PubMed retrieval ---
+def retrieve_pubmed_abstracts_vintage(query, n=100):
     ncbi_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
+    search_url = f"{ncbi_url}esearch.fcgi?db=pubmed&term={query}&retmax={n}&sort=relevance&retmode=xml"
+    r = requests.get(search_url)
+    root = ET.fromstring(r.text)
+    pmids = [el.text for el in root.findall('.//Id')]
+    if not pmids:
+        return [], []
+    ids = ','.join(pmids)
+    fetch_url = f"{ncbi_url}efetch.fcgi?db=pubmed&id={ids}&rettype=abstract&retmode=xml&retmax={n}&sort=relevance"
+    resp = requests.get(fetch_url)
+    root2 = ET.fromstring(resp.text)
+    titles = [a.text for a in root2.findall('.//ArticleTitle')]
+    abstracts = [b.text for b in root2.findall('.//AbstractText')]
+    return titles, abstracts
+    #return [], []
 # --- Claim extraction ---
 indicator_phrases = [
         pipe = get_summarizer(model_choice)
         outputs = pipe(
             messages,
+            max_new_tokens=128,
             do_sample=False,
             temperature=0.1,
         )