Synthetic_Biology / genesis /literature.py
mgbam's picture
Create literature.py
469e58f verified
raw
history blame
2.56 kB
# genesis/literature.py
import os
import requests
from typing import List, Dict
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
NCBI_EMAIL = os.getenv("NCBI_EMAIL")
def search_pubmed(query: str, max_results: int = 5, sort: str = "relevance") -> List[Dict]:
"""
Search PubMed for research articles.
sort: "relevance", "pub date", or "most recent"
Returns: List of dicts with title, authors, journal, year, and link.
"""
try:
# Step 1: Search PubMed for IDs
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"api_key": NCBI_API_KEY,
"email": NCBI_EMAIL,
"sort": "relevance" if sort == "relevance" else "pub+date" if sort == "most recent" else "pub+date",
"retmode": "json"
}
r = requests.get(search_url, params=search_params, timeout=10)
r.raise_for_status()
ids = r.json().get("esearchresult", {}).get("idlist", [])
if not ids:
return []
# Step 2: Fetch summaries for IDs
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
summary_params = {
"db": "pubmed",
"id": ",".join(ids),
"retmode": "json"
}
s = requests.get(summary_url, params=summary_params, timeout=10)
s.raise_for_status()
summaries = s.json().get("result", {})
papers = []
for pmid in ids:
paper = summaries.get(pmid, {})
authors = [a["name"] for a in paper.get("authors", [])]
papers.append({
"title": paper.get("title", "N/A"),
"authors": ", ".join(authors) if authors else "N/A",
"journal": paper.get("fulljournalname", "N/A"),
"year": paper.get("pubdate", "N/A"),
"pmid": pmid,
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
})
return papers
except Exception as e:
print(f"[PubMed] Failed: {e}")
return []
def format_literature_markdown(papers: List[Dict]) -> str:
"""Format literature list into markdown table."""
if not papers:
return "No relevant papers found."
md = "| Title | Authors | Journal | Year |\n|-------|---------|---------|------|\n"
for p in papers:
md += f"| [{p['title']}]({p['url']}) | {p['authors']} | {p['journal']} | {p['year']} |\n"
return md