Spaces:
Sleeping
Sleeping
# genesis/api_clients/pubmed_api.py | |
import requests | |
from xml.etree import ElementTree as ET | |
NCBI_EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" | |
def search_pubmed(query: str, max_results: int = 10, api_key: str = None): | |
""" | |
Search PubMed for a given query and return a list of PMIDs. | |
""" | |
params = { | |
"db": "pubmed", | |
"term": query, | |
"retmode": "json", | |
"retmax": max_results, | |
} | |
if api_key: | |
params["api_key"] = api_key | |
response = requests.get(f"{NCBI_EUTILS_BASE}/esearch.fcgi", params=params) | |
response.raise_for_status() | |
data = response.json() | |
return data.get("esearchresult", {}).get("idlist", []) | |
def fetch_pubmed_details(pmids: list, api_key: str = None): | |
""" | |
Fetch detailed article data for given PMIDs. | |
""" | |
if not pmids: | |
return [] | |
params = { | |
"db": "pubmed", | |
"id": ",".join(pmids), | |
"retmode": "xml" | |
} | |
if api_key: | |
params["api_key"] = api_key | |
response = requests.get(f"{NCBI_EUTILS_BASE}/efetch.fcgi", params=params) | |
response.raise_for_status() | |
return parse_pubmed_xml(response.text) | |
def parse_pubmed_xml(xml_text: str): | |
""" | |
Parse PubMed XML into structured dicts. | |
""" | |
root = ET.fromstring(xml_text) | |
articles = [] | |
for article in root.findall(".//PubmedArticle"): | |
title_el = article.find(".//ArticleTitle") | |
abstract_el = article.find(".//AbstractText") | |
journal_el = article.find(".//Journal/Title") | |
year_el = article.find(".//PubDate/Year") | |
articles.append({ | |
"title": title_el.text if title_el is not None else None, | |
"abstract": abstract_el.text if abstract_el is not None else None, | |
"journal": journal_el.text if journal_el is not None else None, | |
"year": year_el.text if year_el is not None else None, | |
}) | |
return articles | |
def search_and_fetch(query: str, max_results: int = 5, api_key: str = None): | |
""" | |
Convenience function: search and fetch article details. | |
""" | |
pmids = search_pubmed(query, max_results=max_results, api_key=api_key) | |
return fetch_pubmed_details(pmids, api_key=api_key) | |