Spaces:
Sleeping
Sleeping
# genesis/api_clients/pubmed_api.py | |
import requests | |
import xml.etree.ElementTree as ET | |
from typing import List, Dict, Optional | |
from datetime import datetime | |
PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" | |
NCBI_API_KEY = None # Optional: Set in Hugging Face secrets for higher rate limits | |
def search_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[str]: | |
""" | |
Search PubMed for a given query and return a list of PubMed IDs. | |
Optionally filter by start_date and end_date (YYYY/MM/DD format). | |
""" | |
params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": max_results, | |
"retmode": "json", | |
"api_key": NCBI_API_KEY | |
} | |
if start_date and end_date: | |
params["mindate"] = start_date | |
params["maxdate"] = end_date | |
params["datetype"] = "pdat" | |
r = requests.get(PUBMED_SEARCH_URL, params=params) | |
r.raise_for_status() | |
data = r.json() | |
return data.get("esearchresult", {}).get("idlist", []) | |
def fetch_pubmed_details(pmid_list: List[str]) -> List[Dict]: | |
""" | |
Fetch detailed metadata for a list of PubMed IDs. | |
Returns title, abstract, authors, journal, and publication date. | |
""" | |
if not pmid_list: | |
return [] | |
params = { | |
"db": "pubmed", | |
"id": ",".join(pmid_list), | |
"retmode": "xml", | |
"api_key": NCBI_API_KEY | |
} | |
r = requests.get(PUBMED_FETCH_URL, params=params) | |
r.raise_for_status() | |
root = ET.fromstring(r.text) | |
results = [] | |
for article in root.findall(".//PubmedArticle"): | |
try: | |
title = article.find(".//ArticleTitle").text or "No title" | |
abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text]) or "No abstract" | |
authors = [] | |
for a in article.findall(".//Author"): | |
last = a.findtext("LastName", "") | |
first = a.findtext("ForeName", "") | |
if last or first: | |
authors.append(f"{first} {last}".strip()) | |
journal = article.findtext(".//Journal/Title", "Unknown Journal") | |
pub_date = article.find(".//PubDate") | |
if pub_date is not None: | |
year = pub_date.findtext("Year", "") | |
month = pub_date.findtext("Month", "") | |
day = pub_date.findtext("Day", "") | |
date_str = f"{year}-{month}-{day}" if year else "Unknown" | |
else: | |
date_str = "Unknown" | |
results.append({ | |
"title": title, | |
"abstract": abstract, | |
"authors": authors, | |
"journal": journal, | |
"publication_date": date_str, | |
"pubmed_link": f"https://pubmed.ncbi.nlm.nih.gov/{article.findtext('.//PMID')}/" | |
}) | |
except Exception: | |
continue | |
return results | |
def search_and_fetch_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[Dict]: | |
""" | |
Search and fetch PubMed results in one call. | |
""" | |
pmids = search_pubmed(query, max_results, start_date, end_date) | |
return fetch_pubmed_details(pmids) | |