Spaces:
Sleeping
Sleeping
# genesis/api_clients/pubmed_api.py | |
import os | |
import requests | |
import html | |
from xml.etree import ElementTree as ET | |
PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" | |
def search_pubmed_literature(query: str, max_results: int = 10): | |
""" | |
Search PubMed literature using NCBI E-utilities API. | |
Args: | |
query (str): Search query (e.g. "synthetic biology cancer therapy") | |
max_results (int): Maximum number of results to return. | |
Returns: | |
list[dict]: Each dict has 'title', 'authors', 'link' | |
""" | |
if not query.strip(): | |
return [] | |
api_key = os.getenv("PUBMED_API_KEY") # optional | |
params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": max_results, | |
"retmode": "xml" | |
} | |
if api_key: | |
params["api_key"] = api_key | |
try: | |
# Step 1: Search for IDs | |
search_url = f"{PUBMED_BASE}/esearch.fcgi" | |
search_res = requests.get(search_url, params=params, timeout=10) | |
search_res.raise_for_status() | |
root = ET.fromstring(search_res.text) | |
ids = [id_elem.text for id_elem in root.findall(".//Id")] | |
if not ids: | |
return [] | |
# Step 2: Fetch article details | |
fetch_url = f"{PUBMED_BASE}/efetch.fcgi" | |
fetch_params = { | |
"db": "pubmed", | |
"id": ",".join(ids), | |
"retmode": "xml" | |
} | |
if api_key: | |
fetch_params["api_key"] = api_key | |
fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10) | |
fetch_res.raise_for_status() | |
fetch_root = ET.fromstring(fetch_res.text) | |
results = [] | |
for article in fetch_root.findall(".//PubmedArticle"): | |
title_elem = article.find(".//ArticleTitle") | |
title = html.unescape(title_elem.text) if title_elem is not None else "No title" | |
authors = [] | |
for author in article.findall(".//Author"): | |
last = author.find("LastName") | |
fore = author.find("ForeName") | |
if last is not None and fore is not None: | |
authors.append(f"{fore.text} {last.text}") | |
pmid_elem = article.find(".//PMID") | |
pmid = pmid_elem.text if pmid_elem is not None else "" | |
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "" | |
results.append({ | |
"title": title, | |
"authors": ", ".join(authors) if authors else "N/A", | |
"link": link | |
}) | |
return results | |
except requests.exceptions.RequestException as e: | |
print(f"[PubMed API Error] {e}") | |
return [] | |
except ET.ParseError as e: | |
print(f"[PubMed Parse Error] {e}") | |
return [] | |