# genesis/api_clients/ncbi_api.py import os import requests from typing import List, Dict, Optional NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional, for higher request limits NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" def _add_api_key(params: Dict) -> Dict: """Attach API key if available.""" if NCBI_API_KEY: params["api_key"] = NCBI_API_KEY return params # ------------------------- # SEARCH FUNCTIONS # ------------------------- def search_ncbi(db: str, term: str, max_results: int = 10) -> List[str]: """ Search an NCBI database and return a list of IDs. db examples: gene, protein, pubmed, taxonomy """ params = _add_api_key({ "db": db, "term": term, "retmax": max_results, "retmode": "json" }) r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params) r.raise_for_status() data = r.json() return data.get("esearchresult", {}).get("idlist", []) def fetch_ncbi_summary(db: str, ids: List[str]) -> List[Dict]: """ Fetch summaries for a list of IDs from NCBI. """ params = _add_api_key({ "db": db, "id": ",".join(ids), "retmode": "json" }) r = requests.get(f"{NCBI_BASE}/esummary.fcgi", params=params) r.raise_for_status() data = r.json() summaries = [] for uid, summary in data.get("result", {}).items(): if uid != "uids": summaries.append(summary) return summaries def fetch_ncbi_details(db: str, ids: List[str]) -> str: """ Fetch full XML/FASTA/GenBank record for IDs. """ params = _add_api_key({ "db": db, "id": ",".join(ids), "retmode": "text" }) r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params) r.raise_for_status() return r.text # ------------------------- # GENE + PATHWAY HELPERS # ------------------------- def search_gene(term: str, max_results: int = 10) -> List[Dict]: """ Search for genes and return gene IDs + names. """ ids = search_ncbi("gene", term, max_results) if not ids: return [] summaries = fetch_ncbi_summary("gene", ids) return [{"uid": s.get("uid"), "name": s.get("name"), "description": s.get("description")} for s in summaries] def get_protein_from_gene(gene_id: str) -> List[Dict]: """ Get protein products from a given gene ID. """ link_params = _add_api_key({ "dbfrom": "gene", "db": "protein", "id": gene_id, "retmode": "json" }) r = requests.get(f"{NCBI_BASE}/elink.fcgi", params=link_params) r.raise_for_status() data = r.json() protein_ids = [] for linkset in data.get("linksets", []): for link in linkset.get("linksetdbs", []): protein_ids.extend(link.get("links", [])) if not protein_ids: return [] return fetch_ncbi_summary("protein", protein_ids) def search_taxonomy(term: str) -> List[Dict]: """ Search taxonomy database for species/strain info. """ ids = search_ncbi("taxonomy", term, max_results=5) return fetch_ncbi_summary("taxonomy", ids)