Spaces:
Sleeping
Sleeping
File size: 3,137 Bytes
9bee6a4 117cd77 9bee6a4 ee653fa 9bee6a4 ee653fa 7227edd 9bee6a4 ee653fa 7227edd 117cd77 ee653fa ef76eaa ee653fa ef76eaa 7227edd ee653fa 7227edd 117cd77 ee653fa 117cd77 ee653fa 117cd77 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 117cd77 ee653fa 117cd77 ee653fa 117cd77 ee653fa 117cd77 ee653fa 117cd77 ee653fa 117cd77 ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa 7227edd ee653fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# genesis/api_clients/ncbi_api.py
import os
import requests
from typing import List, Dict, Optional
NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Optional, for higher request limits
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
def _add_api_key(params: Dict) -> Dict:
"""Attach API key if available."""
if NCBI_API_KEY:
params["api_key"] = NCBI_API_KEY
return params
# -------------------------
# SEARCH FUNCTIONS
# -------------------------
def search_ncbi(db: str, term: str, max_results: int = 10) -> List[str]:
"""
Search an NCBI database and return a list of IDs.
db examples: gene, protein, pubmed, taxonomy
"""
params = _add_api_key({
"db": db,
"term": term,
"retmax": max_results,
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
r.raise_for_status()
data = r.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_ncbi_summary(db: str, ids: List[str]) -> List[Dict]:
"""
Fetch summaries for a list of IDs from NCBI.
"""
params = _add_api_key({
"db": db,
"id": ",".join(ids),
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
r.raise_for_status()
data = r.json()
summaries = []
for uid, summary in data.get("result", {}).items():
if uid != "uids":
summaries.append(summary)
return summaries
def fetch_ncbi_details(db: str, ids: List[str]) -> str:
"""
Fetch full XML/FASTA/GenBank record for IDs.
"""
params = _add_api_key({
"db": db,
"id": ",".join(ids),
"retmode": "text"
})
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
r.raise_for_status()
return r.text
# -------------------------
# GENE + PATHWAY HELPERS
# -------------------------
def search_gene(term: str, max_results: int = 10) -> List[Dict]:
"""
Search for genes and return gene IDs + names.
"""
ids = search_ncbi("gene", term, max_results)
if not ids:
return []
summaries = fetch_ncbi_summary("gene", ids)
return [{"uid": s.get("uid"), "name": s.get("name"), "description": s.get("description")} for s in summaries]
def get_protein_from_gene(gene_id: str) -> List[Dict]:
"""
Get protein products from a given gene ID.
"""
link_params = _add_api_key({
"dbfrom": "gene",
"db": "protein",
"id": gene_id,
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/elink.fcgi", params=link_params)
r.raise_for_status()
data = r.json()
protein_ids = []
for linkset in data.get("linksets", []):
for link in linkset.get("linksetdbs", []):
protein_ids.extend(link.get("links", []))
if not protein_ids:
return []
return fetch_ncbi_summary("protein", protein_ids)
def search_taxonomy(term: str) -> List[Dict]:
"""
Search taxonomy database for species/strain info.
"""
ids = search_ncbi("taxonomy", term, max_results=5)
return fetch_ncbi_summary("taxonomy", ids)
|