mgbam's picture
Update genesis/api_clients/pubmed_api.py
94b3916 verified
raw
history blame
2.22 kB
# genesis/api_clients/pubmed_api.py
import requests
from xml.etree import ElementTree as ET
NCBI_EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
def search_pubmed(query: str, max_results: int = 10, api_key: str = None):
"""
Search PubMed for a given query and return a list of PMIDs.
"""
params = {
"db": "pubmed",
"term": query,
"retmode": "json",
"retmax": max_results,
}
if api_key:
params["api_key"] = api_key
response = requests.get(f"{NCBI_EUTILS_BASE}/esearch.fcgi", params=params)
response.raise_for_status()
data = response.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_pubmed_details(pmids: list, api_key: str = None):
"""
Fetch detailed article data for given PMIDs.
"""
if not pmids:
return []
params = {
"db": "pubmed",
"id": ",".join(pmids),
"retmode": "xml"
}
if api_key:
params["api_key"] = api_key
response = requests.get(f"{NCBI_EUTILS_BASE}/efetch.fcgi", params=params)
response.raise_for_status()
return parse_pubmed_xml(response.text)
def parse_pubmed_xml(xml_text: str):
"""
Parse PubMed XML into structured dicts.
"""
root = ET.fromstring(xml_text)
articles = []
for article in root.findall(".//PubmedArticle"):
title_el = article.find(".//ArticleTitle")
abstract_el = article.find(".//AbstractText")
journal_el = article.find(".//Journal/Title")
year_el = article.find(".//PubDate/Year")
articles.append({
"title": title_el.text if title_el is not None else None,
"abstract": abstract_el.text if abstract_el is not None else None,
"journal": journal_el.text if journal_el is not None else None,
"year": year_el.text if year_el is not None else None,
})
return articles
def search_and_fetch(query: str, max_results: int = 5, api_key: str = None):
"""
Convenience function: search and fetch article details.
"""
pmids = search_pubmed(query, max_results=max_results, api_key=api_key)
return fetch_pubmed_details(pmids, api_key=api_key)