Spaces:
Sleeping
Sleeping
# genesis/literature.py | |
import os | |
import requests | |
from typing import List, Dict | |
NCBI_API_KEY = os.getenv("NCBI_API_KEY") | |
NCBI_EMAIL = os.getenv("NCBI_EMAIL") | |
def search_pubmed(query: str, max_results: int = 5, sort: str = "relevance") -> List[Dict]: | |
""" | |
Search PubMed for research articles. | |
sort: "relevance", "pub date", or "most recent" | |
Returns: List of dicts with title, authors, journal, year, and link. | |
""" | |
try: | |
# Step 1: Search PubMed for IDs | |
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
search_params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": max_results, | |
"api_key": NCBI_API_KEY, | |
"email": NCBI_EMAIL, | |
"sort": "relevance" if sort == "relevance" else "pub+date" if sort == "most recent" else "pub+date", | |
"retmode": "json" | |
} | |
r = requests.get(search_url, params=search_params, timeout=10) | |
r.raise_for_status() | |
ids = r.json().get("esearchresult", {}).get("idlist", []) | |
if not ids: | |
return [] | |
# Step 2: Fetch summaries for IDs | |
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" | |
summary_params = { | |
"db": "pubmed", | |
"id": ",".join(ids), | |
"retmode": "json" | |
} | |
s = requests.get(summary_url, params=summary_params, timeout=10) | |
s.raise_for_status() | |
summaries = s.json().get("result", {}) | |
papers = [] | |
for pmid in ids: | |
paper = summaries.get(pmid, {}) | |
authors = [a["name"] for a in paper.get("authors", [])] | |
papers.append({ | |
"title": paper.get("title", "N/A"), | |
"authors": ", ".join(authors) if authors else "N/A", | |
"journal": paper.get("fulljournalname", "N/A"), | |
"year": paper.get("pubdate", "N/A"), | |
"pmid": pmid, | |
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" | |
}) | |
return papers | |
except Exception as e: | |
print(f"[PubMed] Failed: {e}") | |
return [] | |
def format_literature_markdown(papers: List[Dict]) -> str: | |
"""Format literature list into markdown table.""" | |
if not papers: | |
return "No relevant papers found." | |
md = "| Title | Authors | Journal | Year |\n|-------|---------|---------|------|\n" | |
for p in papers: | |
md += f"| [{p['title']}]({p['url']}) | {p['authors']} | {p['journal']} | {p['year']} |\n" | |
return md | |