mgbam's picture
Update genesis/api_clients/pubmed_api.py
587c291 verified
# genesis/api_clients/pubmed_api.py
import os
import requests
import html
from xml.etree import ElementTree as ET
PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
def search_pubmed_literature(query: str, max_results: int = 10):
"""
Search PubMed literature using NCBI E-utilities API.
Args:
query (str): Search query (e.g. "synthetic biology cancer therapy")
max_results (int): Maximum number of results to return.
Returns:
list[dict]: Each dict has 'title', 'authors', 'link'
"""
if not query.strip():
return []
api_key = os.getenv("PUBMED_API_KEY") # optional
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "xml"
}
if api_key:
params["api_key"] = api_key
try:
# Step 1: Search for IDs
search_url = f"{PUBMED_BASE}/esearch.fcgi"
search_res = requests.get(search_url, params=params, timeout=10)
search_res.raise_for_status()
root = ET.fromstring(search_res.text)
ids = [id_elem.text for id_elem in root.findall(".//Id")]
if not ids:
return []
# Step 2: Fetch article details
fetch_url = f"{PUBMED_BASE}/efetch.fcgi"
fetch_params = {
"db": "pubmed",
"id": ",".join(ids),
"retmode": "xml"
}
if api_key:
fetch_params["api_key"] = api_key
fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10)
fetch_res.raise_for_status()
fetch_root = ET.fromstring(fetch_res.text)
results = []
for article in fetch_root.findall(".//PubmedArticle"):
title_elem = article.find(".//ArticleTitle")
title = html.unescape(title_elem.text) if title_elem is not None else "No title"
authors = []
for author in article.findall(".//Author"):
last = author.find("LastName")
fore = author.find("ForeName")
if last is not None and fore is not None:
authors.append(f"{fore.text} {last.text}")
pmid_elem = article.find(".//PMID")
pmid = pmid_elem.text if pmid_elem is not None else ""
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else ""
results.append({
"title": title,
"authors": ", ".join(authors) if authors else "N/A",
"link": link
})
return results
except requests.exceptions.RequestException as e:
print(f"[PubMed API Error] {e}")
return []
except ET.ParseError as e:
print(f"[PubMed Parse Error] {e}")
return []