Spaces:
Sleeping
Sleeping
File size: 3,358 Bytes
1411075 ec0d077 9fbaf8f ec0d077 1411075 9fbaf8f 94b3916 9fbaf8f b94ef99 9fbaf8f b94ef99 94b3916 1411075 ec0d077 9fbaf8f 1411075 9fbaf8f 94b3916 b94ef99 94b3916 9fbaf8f 94b3916 9fbaf8f 94b3916 9fbaf8f b94ef99 1411075 94b3916 1411075 9fbaf8f ec0d077 9fbaf8f 1411075 9fbaf8f 94b3916 9fbaf8f 94b3916 ec0d077 9fbaf8f 94b3916 9fbaf8f 94b3916 9fbaf8f ec0d077 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# genesis/api_clients/pubmed_api.py
import requests
import xml.etree.ElementTree as ET
from typing import List, Dict, Optional
from datetime import datetime
PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
NCBI_API_KEY = None # Optional: Set in Hugging Face secrets for higher rate limits
def search_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[str]:
"""
Search PubMed for a given query and return a list of PubMed IDs.
Optionally filter by start_date and end_date (YYYY/MM/DD format).
"""
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json",
"api_key": NCBI_API_KEY
}
if start_date and end_date:
params["mindate"] = start_date
params["maxdate"] = end_date
params["datetype"] = "pdat"
r = requests.get(PUBMED_SEARCH_URL, params=params)
r.raise_for_status()
data = r.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_pubmed_details(pmid_list: List[str]) -> List[Dict]:
"""
Fetch detailed metadata for a list of PubMed IDs.
Returns title, abstract, authors, journal, and publication date.
"""
if not pmid_list:
return []
params = {
"db": "pubmed",
"id": ",".join(pmid_list),
"retmode": "xml",
"api_key": NCBI_API_KEY
}
r = requests.get(PUBMED_FETCH_URL, params=params)
r.raise_for_status()
root = ET.fromstring(r.text)
results = []
for article in root.findall(".//PubmedArticle"):
try:
title = article.find(".//ArticleTitle").text or "No title"
abstract = " ".join([t.text for t in article.findall(".//AbstractText") if t.text]) or "No abstract"
authors = []
for a in article.findall(".//Author"):
last = a.findtext("LastName", "")
first = a.findtext("ForeName", "")
if last or first:
authors.append(f"{first} {last}".strip())
journal = article.findtext(".//Journal/Title", "Unknown Journal")
pub_date = article.find(".//PubDate")
if pub_date is not None:
year = pub_date.findtext("Year", "")
month = pub_date.findtext("Month", "")
day = pub_date.findtext("Day", "")
date_str = f"{year}-{month}-{day}" if year else "Unknown"
else:
date_str = "Unknown"
results.append({
"title": title,
"abstract": abstract,
"authors": authors,
"journal": journal,
"publication_date": date_str,
"pubmed_link": f"https://pubmed.ncbi.nlm.nih.gov/{article.findtext('.//PMID')}/"
})
except Exception:
continue
return results
def search_and_fetch_pubmed(query: str, max_results: int = 20, start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[Dict]:
"""
Search and fetch PubMed results in one call.
"""
pmids = search_pubmed(query, max_results, start_date, end_date)
return fetch_pubmed_details(pmids)
|