Spaces:
Sleeping
Sleeping
# genesis/api_clients/pubmed_api.py | |
import requests | |
from xml.etree import ElementTree as ET | |
def search_pubmed(query: str, max_results: int = 10): | |
""" | |
Search PubMed using the NCBI E-utilities API. | |
Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'. | |
""" | |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" | |
# Step 1: Search PubMed IDs | |
search_params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": max_results, | |
"retmode": "json" | |
} | |
search_res = requests.get(base_url, params=search_params) | |
search_res.raise_for_status() | |
id_list = search_res.json().get("esearchresult", {}).get("idlist", []) | |
if not id_list: | |
return [] | |
# Step 2: Fetch summaries for IDs | |
summary_params = { | |
"db": "pubmed", | |
"id": ",".join(id_list), | |
"retmode": "xml" | |
} | |
summary_res = requests.get(summary_url, params=summary_params) | |
summary_res.raise_for_status() | |
root = ET.fromstring(summary_res.text) | |
papers = [] | |
for docsum in root.findall(".//DocSum"): | |
paper = {"title": None, "authors": [], "pub_date": None, "link": None} | |
for item in docsum.findall("Item"): | |
if item.attrib.get("Name") == "Title": | |
paper["title"] = item.text | |
elif item.attrib.get("Name") == "PubDate": | |
paper["pub_date"] = item.text | |
elif item.attrib.get("Name") == "AuthorList": | |
paper["authors"] = [author.text for author in item.findall("Item")] | |
uid_elem = docsum.find("Id") | |
if uid_elem is not None: | |
paper["link"] = f"https://pubmed.ncbi.nlm.nih.gov/{uid_elem.text}/" | |
papers.append(paper) | |
return papers | |