Spaces:
Running
Running
# genesis/api_clients/ncbi_api.py | |
import os | |
import requests | |
from typing import List, Dict | |
from datetime import datetime | |
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" | |
NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional β speeds up requests | |
session = requests.Session() | |
# ------------------------- | |
# Generic NCBI Search | |
# ------------------------- | |
def ncbi_search(db: str, term: str, retmax: int = 10) -> List[str]: | |
""" | |
Search an NCBI database and return a list of IDs. | |
""" | |
params = { | |
"db": db, | |
"term": term, | |
"retmode": "json", | |
"retmax": retmax | |
} | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
r = session.get(f"{NCBI_BASE}/esearch.fcgi", params=params) | |
r.raise_for_status() | |
return r.json().get("esearchresult", {}).get("idlist", []) | |
# ------------------------- | |
# Generic NCBI Fetch | |
# ------------------------- | |
def ncbi_fetch(db: str, ids: List[str], rettype: str = "abstract", retmode: str = "text") -> str: | |
""" | |
Fetch detailed records from an NCBI database. | |
""" | |
params = { | |
"db": db, | |
"id": ",".join(ids), | |
"rettype": rettype, | |
"retmode": retmode | |
} | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
r = session.get(f"{NCBI_BASE}/efetch.fcgi", params=params) | |
r.raise_for_status() | |
return r.text | |
# ------------------------- | |
# PubMed Literature Search | |
# ------------------------- | |
def search_pubmed(term: str, retmax: int = 5) -> List[Dict]: | |
""" | |
Search PubMed for biomedical literature. | |
""" | |
ids = ncbi_search("pubmed", term, retmax) | |
if not ids: | |
return [] | |
params = { | |
"db": "pubmed", | |
"id": ",".join(ids), | |
"retmode": "json" | |
} | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params) | |
r.raise_for_status() | |
records = r.json().get("result", {}) | |
papers = [] | |
for pid in ids: | |
rec = records.get(pid, {}) | |
papers.append({ | |
"title": rec.get("title"), | |
"authors": [a["name"] for a in rec.get("authors", [])], | |
"pubdate": rec.get("pubdate"), | |
"journal": rec.get("fulljournalname"), | |
"uid": pid, | |
"link": f"https://pubmed.ncbi.nlm.nih.gov/{pid}/" | |
}) | |
return papers | |
# ------------------------- | |
# Gene Search | |
# ------------------------- | |
def search_genes(term: str, retmax: int = 5) -> List[Dict]: | |
""" | |
Search NCBI Gene database for gene information. | |
""" | |
ids = ncbi_search("gene", term, retmax) | |
if not ids: | |
return [] | |
params = { | |
"db": "gene", | |
"id": ",".join(ids), | |
"retmode": "json" | |
} | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params) | |
r.raise_for_status() | |
records = r.json().get("result", {}) | |
genes = [] | |
for gid in ids: | |
rec = records.get(gid, {}) | |
genes.append({ | |
"symbol": rec.get("name"), | |
"description": rec.get("description"), | |
"organism": rec.get("organism", {}).get("scientificname"), | |
"uid": gid, | |
"link": f"https://www.ncbi.nlm.nih.gov/gene/{gid}" | |
}) | |
return genes | |
# ------------------------- | |
# Protein Search | |
# ------------------------- | |
def search_proteins(term: str, retmax: int = 5) -> List[Dict]: | |
""" | |
Search NCBI Protein database for protein sequences. | |
""" | |
ids = ncbi_search("protein", term, retmax) | |
if not ids: | |
return [] | |
fasta_data = ncbi_fetch("protein", ids, rettype="fasta", retmode="text") | |
proteins = [{"id": pid, "fasta": fasta_data} for pid in ids] | |
return proteins | |
# ------------------------- | |
# Structure Search | |
# ------------------------- | |
def fetch_ncbi_structure(term: str, retmax: int = 5) -> List[Dict]: | |
""" | |
Search NCBI Structure database and return structure metadata. | |
""" | |
ids = ncbi_search("structure", term, retmax) | |
if not ids: | |
return [] | |
params = { | |
"db": "structure", | |
"id": ",".join(ids), | |
"retmode": "json" | |
} | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params) | |
r.raise_for_status() | |
records = r.json().get("result", {}) | |
structures = [] | |
for sid in ids: | |
rec = records.get(sid, {}) | |
structures.append({ | |
"structure_id": sid, | |
"title": rec.get("title"), | |
"organism": rec.get("organism"), | |
"release_date": rec.get("releasedate"), | |
"link": f"https://www.ncbi.nlm.nih.gov/structure/{sid}" | |
}) | |
return structures | |
# ------------------------- | |
# Build Cross-Database Profile | |
# ------------------------- | |
def ncbi_cross_profile(term: str) -> Dict: | |
""" | |
Given a term, pull literature, genes, proteins, and structures for unified output. | |
""" | |
return { | |
"term": term, | |
"timestamp": datetime.utcnow().isoformat(), | |
"literature": search_pubmed(term, retmax=5), | |
"genes": search_genes(term, retmax=5), | |
"proteins": search_proteins(term, retmax=2), | |
"structures": fetch_ncbi_structure(term, retmax=3) | |
} | |