Spaces:
Sleeping
Sleeping
# genesis/api_clients/ncbi_api.py | |
import os | |
import requests | |
from typing import List, Dict, Optional | |
NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional, for higher request limits | |
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" | |
def _add_api_key(params: Dict) -> Dict: | |
"""Attach API key if available.""" | |
if NCBI_API_KEY: | |
params["api_key"] = NCBI_API_KEY | |
return params | |
# ------------------------- | |
# SEARCH FUNCTIONS | |
# ------------------------- | |
def search_ncbi(db: str, term: str, max_results: int = 10) -> List[str]: | |
""" | |
Search an NCBI database and return a list of IDs. | |
db examples: gene, protein, pubmed, taxonomy | |
""" | |
params = _add_api_key({ | |
"db": db, | |
"term": term, | |
"retmax": max_results, | |
"retmode": "json" | |
}) | |
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params) | |
r.raise_for_status() | |
data = r.json() | |
return data.get("esearchresult", {}).get("idlist", []) | |
def fetch_ncbi_summary(db: str, ids: List[str]) -> List[Dict]: | |
""" | |
Fetch summaries for a list of IDs from NCBI. | |
""" | |
params = _add_api_key({ | |
"db": db, | |
"id": ",".join(ids), | |
"retmode": "json" | |
}) | |
r = requests.get(f"{NCBI_BASE}/esummary.fcgi", params=params) | |
r.raise_for_status() | |
data = r.json() | |
summaries = [] | |
for uid, summary in data.get("result", {}).items(): | |
if uid != "uids": | |
summaries.append(summary) | |
return summaries | |
def fetch_ncbi_details(db: str, ids: List[str]) -> str: | |
""" | |
Fetch full XML/FASTA/GenBank record for IDs. | |
""" | |
params = _add_api_key({ | |
"db": db, | |
"id": ",".join(ids), | |
"retmode": "text" | |
}) | |
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params) | |
r.raise_for_status() | |
return r.text | |
# ------------------------- | |
# GENE + PATHWAY HELPERS | |
# ------------------------- | |
def search_gene(term: str, max_results: int = 10) -> List[Dict]: | |
""" | |
Search for genes and return gene IDs + names. | |
""" | |
ids = search_ncbi("gene", term, max_results) | |
if not ids: | |
return [] | |
summaries = fetch_ncbi_summary("gene", ids) | |
return [{"uid": s.get("uid"), "name": s.get("name"), "description": s.get("description")} for s in summaries] | |
def get_protein_from_gene(gene_id: str) -> List[Dict]: | |
""" | |
Get protein products from a given gene ID. | |
""" | |
link_params = _add_api_key({ | |
"dbfrom": "gene", | |
"db": "protein", | |
"id": gene_id, | |
"retmode": "json" | |
}) | |
r = requests.get(f"{NCBI_BASE}/elink.fcgi", params=link_params) | |
r.raise_for_status() | |
data = r.json() | |
protein_ids = [] | |
for linkset in data.get("linksets", []): | |
for link in linkset.get("linksetdbs", []): | |
protein_ids.extend(link.get("links", [])) | |
if not protein_ids: | |
return [] | |
return fetch_ncbi_summary("protein", protein_ids) | |
def search_taxonomy(term: str) -> List[Dict]: | |
""" | |
Search taxonomy database for species/strain info. | |
""" | |
ids = search_ncbi("taxonomy", term, max_results=5) | |
return fetch_ncbi_summary("taxonomy", ids) | |