|
|
|
"""MedGenesis – MyGene.info async helper (timeout‑safe). |
|
|
|
Provides `fetch_gene_info(query)` → first hit dict or `{}`. |
|
Adds: |
|
* 3‑step back‑off retry (2 s → 4 s → 8 s) on time‑outs/429/5xx. |
|
* 10 s client timeout. |
|
* 24 h LRU cache (128 queries). |
|
* Returns empty dict on any failure so orchestrator can’t crash. |
|
""" |
|
from __future__ import annotations |
|
|
|
import asyncio, httpx |
|
from functools import lru_cache |
|
from typing import Dict |
|
|
|
_BASE = "https://mygene.info/v3" |
|
_TIMEOUT = 10 |
|
_MAX_RETRIES = 3 |
|
|
|
async def _query_mygene(params: Dict) -> Dict: |
|
delay = 2 |
|
for attempt in range(_MAX_RETRIES): |
|
try: |
|
async with httpx.AsyncClient(timeout=_TIMEOUT) as client: |
|
resp = await client.get(f"{_BASE}/query", params=params) |
|
if resp.status_code == 200: |
|
hits = resp.json().get("hits", []) |
|
return hits[0] if hits else {} |
|
if resp.status_code in {429, 500, 502, 503, 504}: |
|
raise httpx.HTTPStatusError("retry", request=resp.request, response=resp) |
|
except (httpx.HTTPStatusError, httpx.ReadTimeout): |
|
if attempt == _MAX_RETRIES - 1: |
|
return {} |
|
await asyncio.sleep(delay) |
|
delay *= 2 |
|
except Exception: |
|
return {} |
|
return {} |
|
|
|
@lru_cache(maxsize=128) |
|
async def fetch_gene_info(query: str) -> Dict: |
|
"""Return MyGene.info top hit or empty dict (never raises).""" |
|
params = { |
|
"q": query, |
|
"fields": "symbol,name,summary,alias,entrezgene,clinvar,location,go", |
|
"size": 1, |
|
} |
|
return await _query_mygene(params) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
import json, asyncio |
|
async def _demo(): |
|
print(json.dumps(await fetch_gene_info("TP53"), indent=2)) |
|
asyncio.run(_demo()) |
|
|