pritamdeka commited on
Commit
2ff0c49
·
verified ·
1 Parent(s): d5d1153

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -22
app.py CHANGED
@@ -52,28 +52,23 @@ def get_keybert_query(text, top_n=10):
52
  return query
53
 
54
  # --- PubMed retrieval ---
55
- def retrieve_pubmed_abstracts_simple(text, n=PUBMED_N, fallback_headline=None):
56
- query = get_keybert_query(text, top_n=10)
57
- print("PubMed Query:", query)
58
  ncbi_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
59
- for q in [query, fallback_headline, text]:
60
- if not q:
61
- continue
62
- search_url = f"{ncbi_url}esearch.fcgi?db=pubmed&term={q}&retmax={n}&sort=relevance"
63
- r = requests.get(search_url)
64
- pmids = re.findall(r"<Id>(\d+)</Id>", r.text)
65
- if pmids:
66
- ids = ','.join(pmids)
67
- fetch_url = f"{ncbi_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml&retmax={n}"
68
- resp = requests.get(fetch_url)
69
- titles = re.findall(r"<ArticleTitle>(.*?)</ArticleTitle>", resp.text, flags=re.DOTALL)
70
- abstracts = re.findall(r"<AbstractText.*?>(.*?)</AbstractText>", resp.text, flags=re.DOTALL)
71
- if not abstracts:
72
- abstracts = [""] * len(titles)
73
- titles = [re.sub(r"\s+", " ", t).strip() for t in titles]
74
- abstracts = [re.sub(r"\s+", " ", a).strip() for a in abstracts]
75
- return titles, abstracts
76
- return [], []
77
 
78
  # --- Claim extraction ---
79
  indicator_phrases = [
@@ -189,7 +184,7 @@ def summarize_evidence_llm(claim, evidence_list, model_choice):
189
  pipe = get_summarizer(model_choice)
190
  outputs = pipe(
191
  messages,
192
- max_new_tokens=96,
193
  do_sample=False,
194
  temperature=0.1,
195
  )
 
52
  return query
53
 
54
  # --- PubMed retrieval ---
55
+ def retrieve_pubmed_abstracts_vintage(query, n=100):
 
 
56
  ncbi_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
57
+ search_url = f"{ncbi_url}esearch.fcgi?db=pubmed&term={query}&retmax={n}&sort=relevance&retmode=xml"
58
+ r = requests.get(search_url)
59
+ root = ET.fromstring(r.text)
60
+ pmids = [el.text for el in root.findall('.//Id')]
61
+ if not pmids:
62
+ return [], []
63
+ ids = ','.join(pmids)
64
+ fetch_url = f"{ncbi_url}efetch.fcgi?db=pubmed&id={ids}&rettype=abstract&retmode=xml&retmax={n}&sort=relevance"
65
+ resp = requests.get(fetch_url)
66
+ root2 = ET.fromstring(resp.text)
67
+ titles = [a.text for a in root2.findall('.//ArticleTitle')]
68
+ abstracts = [b.text for b in root2.findall('.//AbstractText')]
69
+ return titles, abstracts
70
+
71
+ #return [], []
 
 
 
72
 
73
  # --- Claim extraction ---
74
  indicator_phrases = [
 
184
  pipe = get_summarizer(model_choice)
185
  outputs = pipe(
186
  messages,
187
+ max_new_tokens=128,
188
  do_sample=False,
189
  temperature=0.1,
190
  )