File size: 3,137 Bytes
9bee6a4
117cd77
9bee6a4
ee653fa
9bee6a4
ee653fa
7227edd
9bee6a4
ee653fa
 
7227edd
 
 
117cd77
ee653fa
 
 
 
ef76eaa
ee653fa
 
ef76eaa
7227edd
ee653fa
 
7227edd
 
 
 
 
 
 
117cd77
ee653fa
117cd77
ee653fa
117cd77
7227edd
ee653fa
 
 
7227edd
ee653fa
7227edd
ee653fa
 
 
 
 
 
7227edd
ee653fa
7227edd
ee653fa
7227edd
 
ee653fa
 
 
7227edd
 
 
ee653fa
117cd77
ee653fa
 
 
 
117cd77
ee653fa
117cd77
ee653fa
 
 
 
 
117cd77
ee653fa
117cd77
ee653fa
117cd77
ee653fa
 
7227edd
ee653fa
 
7227edd
ee653fa
7227edd
ee653fa
7227edd
ee653fa
 
 
 
 
 
 
 
7227edd
ee653fa
7227edd
ee653fa
7227edd
ee653fa
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# genesis/api_clients/ncbi_api.py
import os
import requests
from typing import List, Dict, Optional

NCBI_API_KEY = os.getenv("NCBI_API_KEY")  # Optional, for higher request limits
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"

def _add_api_key(params: Dict) -> Dict:
    """Attach API key if available."""
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY
    return params

# -------------------------
# SEARCH FUNCTIONS
# -------------------------
def search_ncbi(db: str, term: str, max_results: int = 10) -> List[str]:
    """
    Search an NCBI database and return a list of IDs.
    db examples: gene, protein, pubmed, taxonomy
    """
    params = _add_api_key({
        "db": db,
        "term": term,
        "retmax": max_results,
        "retmode": "json"
    })
    r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
    r.raise_for_status()
    data = r.json()
    return data.get("esearchresult", {}).get("idlist", [])

def fetch_ncbi_summary(db: str, ids: List[str]) -> List[Dict]:
    """
    Fetch summaries for a list of IDs from NCBI.
    """
    params = _add_api_key({
        "db": db,
        "id": ",".join(ids),
        "retmode": "json"
    })
    r = requests.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()
    data = r.json()
    summaries = []
    for uid, summary in data.get("result", {}).items():
        if uid != "uids":
            summaries.append(summary)
    return summaries

def fetch_ncbi_details(db: str, ids: List[str]) -> str:
    """
    Fetch full XML/FASTA/GenBank record for IDs.
    """
    params = _add_api_key({
        "db": db,
        "id": ",".join(ids),
        "retmode": "text"
    })
    r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
    r.raise_for_status()
    return r.text

# -------------------------
# GENE + PATHWAY HELPERS
# -------------------------
def search_gene(term: str, max_results: int = 10) -> List[Dict]:
    """
    Search for genes and return gene IDs + names.
    """
    ids = search_ncbi("gene", term, max_results)
    if not ids:
        return []
    summaries = fetch_ncbi_summary("gene", ids)
    return [{"uid": s.get("uid"), "name": s.get("name"), "description": s.get("description")} for s in summaries]

def get_protein_from_gene(gene_id: str) -> List[Dict]:
    """
    Get protein products from a given gene ID.
    """
    link_params = _add_api_key({
        "dbfrom": "gene",
        "db": "protein",
        "id": gene_id,
        "retmode": "json"
    })
    r = requests.get(f"{NCBI_BASE}/elink.fcgi", params=link_params)
    r.raise_for_status()
    data = r.json()

    protein_ids = []
    for linkset in data.get("linksets", []):
        for link in linkset.get("linksetdbs", []):
            protein_ids.extend(link.get("links", []))
    
    if not protein_ids:
        return []
    return fetch_ncbi_summary("protein", protein_ids)

def search_taxonomy(term: str) -> List[Dict]:
    """
    Search taxonomy database for species/strain info.
    """
    ids = search_ncbi("taxonomy", term, max_results=5)
    return fetch_ncbi_summary("taxonomy", ids)