File size: 5,274 Bytes
9bee6a4
117cd77
9bee6a4
eea1b53
800d67f
9bee6a4
eea1b53
800d67f
 
 
9bee6a4
9b0c279
800d67f
9b0c279
800d67f
9b0c279
800d67f
9b0c279
eea1b53
800d67f
 
 
 
eea1b53
800d67f
 
eea1b53
800d67f
eea1b53
 
800d67f
eea1b53
 
800d67f
eea1b53
800d67f
9b0c279
800d67f
9b0c279
eea1b53
800d67f
 
 
 
eea1b53
800d67f
 
 
 
eea1b53
117cd77
800d67f
eea1b53
800d67f
 
 
 
117cd77
800d67f
117cd77
800d67f
 
9b0c279
eea1b53
 
800d67f
 
 
eea1b53
800d67f
 
 
 
eea1b53
 
800d67f
 
 
 
 
 
 
 
 
 
 
eea1b53
800d67f
117cd77
ee653fa
800d67f
ee653fa
800d67f
117cd77
800d67f
117cd77
800d67f
 
 
 
eea1b53
800d67f
 
 
eea1b53
800d67f
 
 
 
eea1b53
 
800d67f
 
 
 
 
 
 
 
 
 
 
 
117cd77
800d67f
 
 
 
117cd77
800d67f
117cd77
800d67f
 
eea1b53
 
800d67f
 
 
eea1b53
39e5a96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eea1b53
800d67f
eea1b53
800d67f
7227edd
39e5a96
7227edd
eea1b53
800d67f
 
 
 
39e5a96
 
eea1b53
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# genesis/api_clients/ncbi_api.py
import os
import requests
from typing import List, Dict
from datetime import datetime

NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
NCBI_API_KEY = os.getenv("NCBI_API_KEY")  # Optional β€” speeds up requests

session = requests.Session()

# -------------------------
# Generic NCBI Search
# -------------------------
def ncbi_search(db: str, term: str, retmax: int = 10) -> List[str]:
    """
    Search an NCBI database and return a list of IDs.
    """
    params = {
        "db": db,
        "term": term,
        "retmode": "json",
        "retmax": retmax
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
    r.raise_for_status()

    return r.json().get("esearchresult", {}).get("idlist", [])

# -------------------------
# Generic NCBI Fetch
# -------------------------
def ncbi_fetch(db: str, ids: List[str], rettype: str = "abstract", retmode: str = "text") -> str:
    """
    Fetch detailed records from an NCBI database.
    """
    params = {
        "db": db,
        "id": ",".join(ids),
        "rettype": rettype,
        "retmode": retmode
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
    r.raise_for_status()

    return r.text

# -------------------------
# PubMed Literature Search
# -------------------------
def search_pubmed(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search PubMed for biomedical literature.
    """
    ids = ncbi_search("pubmed", term, retmax)
    if not ids:
        return []

    params = {
        "db": "pubmed",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    papers = []
    for pid in ids:
        rec = records.get(pid, {})
        papers.append({
            "title": rec.get("title"),
            "authors": [a["name"] for a in rec.get("authors", [])],
            "pubdate": rec.get("pubdate"),
            "journal": rec.get("fulljournalname"),
            "uid": pid,
            "link": f"https://pubmed.ncbi.nlm.nih.gov/{pid}/"
        })
    return papers

# -------------------------
# Gene Search
# -------------------------
def search_genes(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Gene database for gene information.
    """
    ids = ncbi_search("gene", term, retmax)
    if not ids:
        return []

    params = {
        "db": "gene",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    genes = []
    for gid in ids:
        rec = records.get(gid, {})
        genes.append({
            "symbol": rec.get("name"),
            "description": rec.get("description"),
            "organism": rec.get("organism", {}).get("scientificname"),
            "uid": gid,
            "link": f"https://www.ncbi.nlm.nih.gov/gene/{gid}"
        })
    return genes

# -------------------------
# Protein Search
# -------------------------
def search_proteins(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Protein database for protein sequences.
    """
    ids = ncbi_search("protein", term, retmax)
    if not ids:
        return []

    fasta_data = ncbi_fetch("protein", ids, rettype="fasta", retmode="text")
    proteins = [{"id": pid, "fasta": fasta_data} for pid in ids]
    return proteins

# -------------------------
# Structure Search
# -------------------------
def fetch_ncbi_structure(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Structure database and return structure metadata.
    """
    ids = ncbi_search("structure", term, retmax)
    if not ids:
        return []

    params = {
        "db": "structure",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    structures = []
    for sid in ids:
        rec = records.get(sid, {})
        structures.append({
            "structure_id": sid,
            "title": rec.get("title"),
            "organism": rec.get("organism"),
            "release_date": rec.get("releasedate"),
            "link": f"https://www.ncbi.nlm.nih.gov/structure/{sid}"
        })
    return structures

# -------------------------
# Build Cross-Database Profile
# -------------------------
def ncbi_cross_profile(term: str) -> Dict:
    """
    Given a term, pull literature, genes, proteins, and structures for unified output.
    """
    return {
        "term": term,
        "timestamp": datetime.utcnow().isoformat(),
        "literature": search_pubmed(term, retmax=5),
        "genes": search_genes(term, retmax=5),
        "proteins": search_proteins(term, retmax=2),
        "structures": fetch_ncbi_structure(term, retmax=3)
    }