Spaces:
Sleeping
Sleeping
# genesis/api_clients/pubmed_api.py | |
import os | |
import requests | |
from typing import List, Dict, Optional | |
from xml.etree import ElementTree as ET | |
NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional but increases rate limits | |
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" | |
def search_pubmed(query: str, max_results: int = 10) -> List[str]: | |
""" | |
Search PubMed and return a list of PMIDs. | |
""" | |
params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": max_results, | |
"api_key": NCBI_API_KEY | |
} | |
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params) | |
r.raise_for_status() | |
root = ET.fromstring(r.text) | |
return [id_tag.text for id_tag in root.findall(".//Id")] | |
def fetch_pubmed_details(pmids: List[str]) -> List[Dict]: | |
""" | |
Fetch detailed information for a list of PMIDs. | |
""" | |
if not pmids: | |
return [] | |
params = { | |
"db": "pubmed", | |
"id": ",".join(pmids), | |
"retmode": "xml", | |
"api_key": NCBI_API_KEY | |
} | |
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params) | |
r.raise_for_status() | |
root = ET.fromstring(r.text) | |
articles = [] | |
for article in root.findall(".//PubmedArticle"): | |
title = article.findtext(".//ArticleTitle", default="No title") | |
abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text]) | |
journal = article.findtext(".//Title", default="Unknown Journal") | |
pub_date = article.findtext(".//PubDate/Year", default="Unknown Year") | |
doi = None | |
for id_tag in article.findall(".//ArticleId"): | |
if id_tag.attrib.get("IdType") == "doi": | |
doi = id_tag.text | |
authors = [] | |
for author in article.findall(".//Author"): | |
last = author.findtext("LastName") | |
fore = author.findtext("ForeName") | |
if last and fore: | |
authors.append(f"{fore} {last}") | |
pmid = article.findtext(".//PMID") | |
articles.append({ | |
"pmid": pmid, | |
"title": title, | |
"abstract": abstract, | |
"journal": journal, | |
"pub_date": pub_date, | |
"doi": doi, | |
"authors": authors, | |
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" | |
}) | |
return articles | |
def search_and_fetch(query: str, max_results: int = 10) -> List[Dict]: | |
""" | |
Convenience function: Search and fetch results in one step. | |
""" | |
pmids = search_pubmed(query, max_results) | |
return fetch_pubmed_details(pmids) | |