mgbam's picture
Update genesis/api_clients/pubmed_api.py
026bd38 verified
raw
history blame
2.56 kB
# genesis/api_clients/pubmed_api.py
import os
import requests
from typing import List, Dict, Optional
from xml.etree import ElementTree as ET
NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional but increases rate limits
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
def search_pubmed(query: str, max_results: int = 10) -> List[str]:
"""
Search PubMed and return a list of PMIDs.
"""
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"api_key": NCBI_API_KEY
}
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
r.raise_for_status()
root = ET.fromstring(r.text)
return [id_tag.text for id_tag in root.findall(".//Id")]
def fetch_pubmed_details(pmids: List[str]) -> List[Dict]:
"""
Fetch detailed information for a list of PMIDs.
"""
if not pmids:
return []
params = {
"db": "pubmed",
"id": ",".join(pmids),
"retmode": "xml",
"api_key": NCBI_API_KEY
}
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
r.raise_for_status()
root = ET.fromstring(r.text)
articles = []
for article in root.findall(".//PubmedArticle"):
title = article.findtext(".//ArticleTitle", default="No title")
abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text])
journal = article.findtext(".//Title", default="Unknown Journal")
pub_date = article.findtext(".//PubDate/Year", default="Unknown Year")
doi = None
for id_tag in article.findall(".//ArticleId"):
if id_tag.attrib.get("IdType") == "doi":
doi = id_tag.text
authors = []
for author in article.findall(".//Author"):
last = author.findtext("LastName")
fore = author.findtext("ForeName")
if last and fore:
authors.append(f"{fore} {last}")
pmid = article.findtext(".//PMID")
articles.append({
"pmid": pmid,
"title": title,
"abstract": abstract,
"journal": journal,
"pub_date": pub_date,
"doi": doi,
"authors": authors,
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
})
return articles
def search_and_fetch(query: str, max_results: int = 10) -> List[Dict]:
"""
Convenience function: Search and fetch results in one step.
"""
pmids = search_pubmed(query, max_results)
return fetch_pubmed_details(pmids)