|
|
|
"""MedGenesis – NCBI E‑utilities helper (async, cached). |
|
|
|
Supports: |
|
• `search_gene(term)` → quick gene symbol/name hits via ESearch + ESummary |
|
• `get_mesh_definition(term)`→ first MeSH definition string via ESummary |
|
|
|
New features |
|
~~~~~~~~~~~~ |
|
* Central `_request()` with exponential‑backoff retry (2×/4×). |
|
* 12‑hour LRU caches for both public helpers (API quota‑friendly). |
|
* Respects optional `BIO_KEY` env to boost rate limits. |
|
* Handles single‑item edge cases (ESummary returns dict not list). |
|
""" |
|
from __future__ import annotations |
|
|
|
import os, asyncio, httpx, xmltodict |
|
from functools import lru_cache |
|
from typing import List, Dict, Any |
|
|
|
_API_KEY = os.getenv("BIO_KEY") |
|
_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" |
|
_TIMEOUT = 15 |
|
|
|
|
|
|
|
|
|
async def _request(endpoint: str, params: Dict[str, Any], *, retries: int = 3) -> httpx.Response: |
|
if _API_KEY: |
|
params["api_key"] = _API_KEY |
|
delay = 2 |
|
last = None |
|
for _ in range(retries): |
|
async with httpx.AsyncClient(timeout=_TIMEOUT) as cli: |
|
last = await cli.get(f"{_BASE}{endpoint}", params=params) |
|
if last.status_code == 200: |
|
return last |
|
await asyncio.sleep(delay) |
|
delay *= 2 |
|
last.raise_for_status() |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=512) |
|
async def search_gene(term: str, *, retmax: int = 5) -> List[Dict]: |
|
"""Return list of gene summary dicts for *term* (Entrez Gene db).""" |
|
es_params = { |
|
"db" : "gene", |
|
"term" : term, |
|
"retmode": "json", |
|
"retmax": retmax, |
|
} |
|
es_resp = await _request("esearch.fcgi", es_params) |
|
ids = es_resp.json().get("esearchresult", {}).get("idlist", []) |
|
if not ids: |
|
return [] |
|
|
|
sum_params = {"db": "gene", "id": ",".join(ids), "retmode": "json"} |
|
sum_resp = await _request("esummary.fcgi", sum_params) |
|
data = sum_resp.json().get("result", {}) |
|
|
|
return [v for k, v in data.items() if k != "uids"] |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=512) |
|
async def get_mesh_definition(term: str) -> str: |
|
"""Return first MeSH definition string for *term* or ''.""" |
|
params = { |
|
"db": "mesh", |
|
"term": term, |
|
"retmode": "json", |
|
"retmax": 1, |
|
} |
|
resp = await _request("esummary.fcgi", params) |
|
data = resp.json().get("result", {}) |
|
recs = [v for k, v in data.items() if k != "uids"] |
|
if not recs: |
|
return "" |
|
return recs[0].get("ds_meshterms", [""])[0] |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
async def _demo(): |
|
genes = await search_gene("TP53", retmax=3) |
|
print(f"Gene hits: {len(genes)} – {genes[0]['name'] if genes else 'None'}") |
|
mesh = await get_mesh_definition("glioblastoma") |
|
print("MeSH def:", mesh[:80], "…") |
|
asyncio.run(_demo()) |
|
|