# genesis/api_clients/pubmed_api.py import requests import xml.etree.ElementTree as ET from typing import List, Dict, Optional from datetime import datetime PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" NCBI_API_KEY = None # Optional: Set in Hugging Face secrets for higher rate limits def search_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[str]: """ Search PubMed for a given query and return a list of PubMed IDs. Optionally filter by start_date and end_date (YYYY/MM/DD format). """ params = { "db": "pubmed", "term": query, "retmax": max_results, "retmode": "json", "api_key": NCBI_API_KEY } if start_date and end_date: params["mindate"] = start_date params["maxdate"] = end_date params["datetype"] = "pdat" r = requests.get(PUBMED_SEARCH_URL, params=params) r.raise_for_status() data = r.json() return data.get("esearchresult", {}).get("idlist", []) def fetch_pubmed_details(pmid_list: List[str]) -> List[Dict]: """ Fetch detailed metadata for a list of PubMed IDs. Returns title, abstract, authors, journal, and publication date. """ if not pmid_list: return [] params = { "db": "pubmed", "id": ",".join(pmid_list), "retmode": "xml", "api_key": NCBI_API_KEY } r = requests.get(PUBMED_FETCH_URL, params=params) r.raise_for_status() root = ET.fromstring(r.text) results = [] for article in root.findall(".//PubmedArticle"): try: title = article.find(".//ArticleTitle").text or "No title" abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text]) or "No abstract" authors = [] for a in article.findall(".//Author"): last = a.findtext("LastName", "") first = a.findtext("ForeName", "") if last or first: authors.append(f"{first} {last}".strip()) journal = article.findtext(".//Journal/Title", "Unknown Journal") pub_date = article.find(".//PubDate") if pub_date is not None: year = pub_date.findtext("Year", "") month = pub_date.findtext("Month", "") day = pub_date.findtext("Day", "") date_str = f"{year}-{month}-{day}" if year else "Unknown" else: date_str = "Unknown" results.append({ "title": title, "abstract": abstract, "authors": authors, "journal": journal, "publication_date": date_str, "pubmed_link": f"https://pubmed.ncbi.nlm.nih.gov/{article.findtext('.//PMID')}/" }) except Exception: continue return results def search_and_fetch_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[Dict]: """ Search and fetch PubMed results in one call. """ pmids = search_pubmed(query, max_results, start_date, end_date) return fetch_pubmed_details(pmids)