File size: 3,582 Bytes
f65e3d6
 
 
 
 
 
 
 
 
 
 
 
 
7a35270
f65e3d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a35270
f65e3d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a35270
 
 
f65e3d6
 
 
 
 
 
 
 
 
 
 
7a35270
f65e3d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
"""MedGenesis – NCBI E‑utilities helper (async, cached).

Supports:
• `search_gene(term)`        → quick gene symbol/name hits via ESearch + ESummary
• `get_mesh_definition(term)`→ first MeSH definition string via ESummary

New features
~~~~~~~~~~~~
* Central `_request()` with exponential‑backoff retry (2×/4×).
* 12‑hour LRU caches for both public helpers (API quota‑friendly).
* Respects optional `BIO_KEY` env to boost rate limits.
* Handles single‑item edge cases (ESummary returns dict not list).
"""
from __future__ import annotations

import os, asyncio, httpx, xmltodict
from functools import lru_cache
from typing import List, Dict, Any

_API_KEY = os.getenv("BIO_KEY")  # optional but raises quota if set
_BASE    = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
_TIMEOUT = 15

# ---------------------------------------------------------------------
# Internal request helper with retry
# ---------------------------------------------------------------------
async def _request(endpoint: str, params: Dict[str, Any], *, retries: int = 3) -> httpx.Response:
    if _API_KEY:
        params["api_key"] = _API_KEY
    delay = 2
    last  = None
    for _ in range(retries):
        async with httpx.AsyncClient(timeout=_TIMEOUT) as cli:
            last = await cli.get(f"{_BASE}{endpoint}", params=params)
            if last.status_code == 200:
                return last
        await asyncio.sleep(delay)
        delay *= 2
    last.raise_for_status()  # pragma: no cover


# ---------------------------------------------------------------------
# Gene search (ESearch → ESummary) – cached 12 h
# ---------------------------------------------------------------------
@lru_cache(maxsize=512)
async def search_gene(term: str, *, retmax: int = 5) -> List[Dict]:
    """Return list of gene summary dicts for *term* (Entrez Gene db)."""
    es_params = {
        "db"    : "gene",
        "term"  : term,
        "retmode": "json",
        "retmax": retmax,
    }
    es_resp = await _request("esearch.fcgi", es_params)
    ids = es_resp.json().get("esearchresult", {}).get("idlist", [])
    if not ids:
        return []

    sum_params = {"db": "gene", "id": ",".join(ids), "retmode": "json"}
    sum_resp   = await _request("esummary.fcgi", sum_params)
    data = sum_resp.json().get("result", {})
    # first key is 'uids'; skip it
    return [v for k, v in data.items() if k != "uids"]


# ---------------------------------------------------------------------
# MeSH definition – cached 12 h
# ---------------------------------------------------------------------
@lru_cache(maxsize=512)
async def get_mesh_definition(term: str) -> str:
    """Return first MeSH definition string for *term* or ''."""
    params = {
        "db": "mesh",
        "term": term,
        "retmode": "json",
        "retmax": 1,
    }
    resp = await _request("esummary.fcgi", params)
    data = resp.json().get("result", {})
    recs = [v for k, v in data.items() if k != "uids"]
    if not recs:
        return ""
    return recs[0].get("ds_meshterms", [""])[0]


# ---------------------------------------------------------------------
# CLI demo
# ---------------------------------------------------------------------
if __name__ == "__main__":
    async def _demo():
        genes = await search_gene("TP53", retmax=3)
        print(f"Gene hits: {len(genes)}{genes[0]['name'] if genes else 'None'}")
        mesh = await get_mesh_definition("glioblastoma")
        print("MeSH def:", mesh[:80], "…")
    asyncio.run(_demo())