mgbam's picture
Update genesis/api_clients/pubmed_api.py
b94ef99 verified
raw
history blame
1.83 kB
# genesis/api_clients/pubmed_api.py
import requests
from xml.etree import ElementTree as ET
def search_pubmed(query: str, max_results: int = 10):
"""
Search PubMed using the NCBI E-utilities API.
Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'.
"""
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
# Step 1: Search PubMed IDs
search_params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json"
}
search_res = requests.get(base_url, params=search_params)
search_res.raise_for_status()
id_list = search_res.json().get("esearchresult", {}).get("idlist", [])
if not id_list:
return []
# Step 2: Fetch summaries for IDs
summary_params = {
"db": "pubmed",
"id": ",".join(id_list),
"retmode": "xml"
}
summary_res = requests.get(summary_url, params=summary_params)
summary_res.raise_for_status()
root = ET.fromstring(summary_res.text)
papers = []
for docsum in root.findall(".//DocSum"):
paper = {"title": None, "authors": [], "pub_date": None, "link": None}
for item in docsum.findall("Item"):
if item.attrib.get("Name") == "Title":
paper["title"] = item.text
elif item.attrib.get("Name") == "PubDate":
paper["pub_date"] = item.text
elif item.attrib.get("Name") == "AuthorList":
paper["authors"] = [author.text for author in item.findall("Item")]
uid_elem = docsum.find("Id")
if uid_elem is not None:
paper["link"] = f"https://pubmed.ncbi.nlm.nih.gov/{uid_elem.text}/"
papers.append(paper)
return papers