Spaces:
Sleeping
Sleeping
Update genesis/api_clients/pubmed_api.py
Browse files
genesis/api_clients/pubmed_api.py
CHANGED
@@ -1,92 +1,88 @@
|
|
1 |
# genesis/api_clients/pubmed_api.py
|
|
|
2 |
import os
|
3 |
import requests
|
4 |
-
import
|
5 |
-
from
|
6 |
|
7 |
-
|
8 |
-
PUBMED_API_KEY = os.getenv("PUBMED_API_KEY") # Optional, set in Hugging Face / .env
|
9 |
|
10 |
-
|
11 |
-
# Core Helpers
|
12 |
-
# -------------------------
|
13 |
-
def pubmed_search(query: str, max_results: int = 10) -> List[str]:
|
14 |
"""
|
15 |
-
Search PubMed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"""
|
|
|
|
|
|
|
|
|
17 |
params = {
|
18 |
"db": "pubmed",
|
19 |
"term": query,
|
20 |
"retmax": max_results,
|
21 |
-
"
|
22 |
}
|
23 |
-
|
24 |
-
|
25 |
-
root = ET.fromstring(r.text)
|
26 |
-
return [id_elem.text for id_elem in root.findall(".//Id")]
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
"id": ",".join(pubmed_ids),
|
38 |
-
"retmode": "xml",
|
39 |
-
"api_key": PUBMED_API_KEY
|
40 |
-
}
|
41 |
-
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
|
42 |
-
r.raise_for_status()
|
43 |
-
|
44 |
-
articles = []
|
45 |
-
root = ET.fromstring(r.text)
|
46 |
-
for article in root.findall(".//PubmedArticle"):
|
47 |
-
title_elem = article.find(".//ArticleTitle")
|
48 |
-
abstract_elem = article.find(".//Abstract/AbstractText")
|
49 |
-
pmid_elem = article.find(".//PMID")
|
50 |
-
authors = [
|
51 |
-
f"{a.find('LastName').text} {a.find('ForeName').text}"
|
52 |
-
for a in article.findall(".//Author")
|
53 |
-
if a.find("LastName") is not None and a.find("ForeName") is not None
|
54 |
-
]
|
55 |
-
|
56 |
-
articles.append({
|
57 |
-
"pmid": pmid_elem.text if pmid_elem is not None else "",
|
58 |
-
"title": title_elem.text if title_elem is not None else "",
|
59 |
-
"abstract": abstract_elem.text if abstract_elem is not None else "",
|
60 |
-
"authors": authors,
|
61 |
-
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid_elem.text}/" if pmid_elem is not None else ""
|
62 |
-
})
|
63 |
-
return articles
|
64 |
-
|
65 |
-
# -------------------------
|
66 |
-
# High-Level Search + Fetch
|
67 |
-
# -------------------------
|
68 |
-
def search_pubmed(query: str, max_results: int = 10) -> List[Dict]:
|
69 |
-
"""
|
70 |
-
Search and return structured PubMed results.
|
71 |
-
"""
|
72 |
-
ids = pubmed_search(query, max_results)
|
73 |
-
return fetch_details(ids)
|
74 |
|
75 |
-
#
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
91 |
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# genesis/api_clients/pubmed_api.py
|
2 |
+
|
3 |
import os
|
4 |
import requests
|
5 |
+
import html
|
6 |
+
from xml.etree import ElementTree as ET
|
7 |
|
8 |
+
PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
|
9 |
|
10 |
+
def search_pubmed_literature(query: str, max_results: int = 10):
|
|
|
|
|
|
|
11 |
"""
|
12 |
+
Search PubMed literature using NCBI E-utilities API.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
query (str): Search query (e.g. "synthetic biology cancer therapy")
|
16 |
+
max_results (int): Maximum number of results to return.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
list[dict]: Each dict has 'title', 'authors', 'link'
|
20 |
"""
|
21 |
+
if not query.strip():
|
22 |
+
return []
|
23 |
+
|
24 |
+
api_key = os.getenv("PUBMED_API_KEY") # optional
|
25 |
params = {
|
26 |
"db": "pubmed",
|
27 |
"term": query,
|
28 |
"retmax": max_results,
|
29 |
+
"retmode": "xml"
|
30 |
}
|
31 |
+
if api_key:
|
32 |
+
params["api_key"] = api_key
|
|
|
|
|
33 |
|
34 |
+
try:
|
35 |
+
# Step 1: Search for IDs
|
36 |
+
search_url = f"{PUBMED_BASE}/esearch.fcgi"
|
37 |
+
search_res = requests.get(search_url, params=params, timeout=10)
|
38 |
+
search_res.raise_for_status()
|
39 |
+
root = ET.fromstring(search_res.text)
|
40 |
+
ids = [id_elem.text for id_elem in root.findall(".//Id")]
|
41 |
|
42 |
+
if not ids:
|
43 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Step 2: Fetch article details
|
46 |
+
fetch_url = f"{PUBMED_BASE}/efetch.fcgi"
|
47 |
+
fetch_params = {
|
48 |
+
"db": "pubmed",
|
49 |
+
"id": ",".join(ids),
|
50 |
+
"retmode": "xml"
|
51 |
+
}
|
52 |
+
if api_key:
|
53 |
+
fetch_params["api_key"] = api_key
|
54 |
+
|
55 |
+
fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10)
|
56 |
+
fetch_res.raise_for_status()
|
57 |
+
fetch_root = ET.fromstring(fetch_res.text)
|
58 |
+
|
59 |
+
results = []
|
60 |
+
for article in fetch_root.findall(".//PubmedArticle"):
|
61 |
+
title_elem = article.find(".//ArticleTitle")
|
62 |
+
title = html.unescape(title_elem.text) if title_elem is not None else "No title"
|
63 |
|
64 |
+
authors = []
|
65 |
+
for author in article.findall(".//Author"):
|
66 |
+
last = author.find("LastName")
|
67 |
+
fore = author.find("ForeName")
|
68 |
+
if last is not None and fore is not None:
|
69 |
+
authors.append(f"{fore.text} {last.text}")
|
70 |
+
|
71 |
+
pmid_elem = article.find(".//PMID")
|
72 |
+
pmid = pmid_elem.text if pmid_elem is not None else ""
|
73 |
+
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else ""
|
74 |
+
|
75 |
+
results.append({
|
76 |
+
"title": title,
|
77 |
+
"authors": ", ".join(authors) if authors else "N/A",
|
78 |
+
"link": link
|
79 |
+
})
|
80 |
+
|
81 |
+
return results
|
82 |
+
|
83 |
+
except requests.exceptions.RequestException as e:
|
84 |
+
print(f"[PubMed API Error] {e}")
|
85 |
+
return []
|
86 |
+
except ET.ParseError as e:
|
87 |
+
print(f"[PubMed Parse Error] {e}")
|
88 |
+
return []
|