# genesis/api_clients/pubmed_api.py import os import requests from typing import List, Dict, Optional from xml.etree import ElementTree as ET NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional but increases rate limits NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" def search_pubmed(query: str, max_results: int = 10) -> List[str]: """ Search PubMed and return a list of PMIDs. """ params = { "db": "pubmed", "term": query, "retmax": max_results, "api_key": NCBI_API_KEY } r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params) r.raise_for_status() root = ET.fromstring(r.text) return [id_tag.text for id_tag in root.findall(".//Id")] def fetch_pubmed_details(pmids: List[str]) -> List[Dict]: """ Fetch detailed information for a list of PMIDs. """ if not pmids: return [] params = { "db": "pubmed", "id": ",".join(pmids), "retmode": "xml", "api_key": NCBI_API_KEY } r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params) r.raise_for_status() root = ET.fromstring(r.text) articles = [] for article in root.findall(".//PubmedArticle"): title = article.findtext(".//ArticleTitle", default="No title") abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text]) journal = article.findtext(".//Title", default="Unknown Journal") pub_date = article.findtext(".//PubDate/Year", default="Unknown Year") doi = None for id_tag in article.findall(".//ArticleId"): if id_tag.attrib.get("IdType") == "doi": doi = id_tag.text authors = [] for author in article.findall(".//Author"): last = author.findtext("LastName") fore = author.findtext("ForeName") if last and fore: authors.append(f"{fore} {last}") pmid = article.findtext(".//PMID") articles.append({ "pmid": pmid, "title": title, "abstract": abstract, "journal": journal, "pub_date": pub_date, "doi": doi, "authors": authors, "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" }) return articles def search_and_fetch(query: str, max_results: int = 10) -> List[Dict]: """ Convenience function: Search and fetch results in one step. """ pmids = search_pubmed(query, max_results) return fetch_pubmed_details(pmids)