# genesis/api_clients/pubmed_api.py import os import requests import html from xml.etree import ElementTree as ET PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" def search_pubmed_literature(query: str, max_results: int = 10): """ Search PubMed literature using NCBI E-utilities API. Args: query (str): Search query (e.g. "synthetic biology cancer therapy") max_results (int): Maximum number of results to return. Returns: list[dict]: Each dict has 'title', 'authors', 'link' """ if not query.strip(): return [] api_key = os.getenv("PUBMED_API_KEY") # optional params = { "db": "pubmed", "term": query, "retmax": max_results, "retmode": "xml" } if api_key: params["api_key"] = api_key try: # Step 1: Search for IDs search_url = f"{PUBMED_BASE}/esearch.fcgi" search_res = requests.get(search_url, params=params, timeout=10) search_res.raise_for_status() root = ET.fromstring(search_res.text) ids = [id_elem.text for id_elem in root.findall(".//Id")] if not ids: return [] # Step 2: Fetch article details fetch_url = f"{PUBMED_BASE}/efetch.fcgi" fetch_params = { "db": "pubmed", "id": ",".join(ids), "retmode": "xml" } if api_key: fetch_params["api_key"] = api_key fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10) fetch_res.raise_for_status() fetch_root = ET.fromstring(fetch_res.text) results = [] for article in fetch_root.findall(".//PubmedArticle"): title_elem = article.find(".//ArticleTitle") title = html.unescape(title_elem.text) if title_elem is not None else "No title" authors = [] for author in article.findall(".//Author"): last = author.find("LastName") fore = author.find("ForeName") if last is not None and fore is not None: authors.append(f"{fore.text} {last.text}") pmid_elem = article.find(".//PMID") pmid = pmid_elem.text if pmid_elem is not None else "" link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "" results.append({ "title": title, "authors": ", ".join(authors) if authors else "N/A", "link": link }) return results except requests.exceptions.RequestException as e: print(f"[PubMed API Error] {e}") return [] except ET.ParseError as e: print(f"[PubMed Parse Error] {e}") return []