#!/usr/bin/env python3 """MedGenesis – minimal **Wikidata** lookup helper (async). Features ~~~~~~~~ * `simple_search(term)` – return first matching entity dict `{id, label, description}`. * `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.). * Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`. * Least‑recently‑used cache (128) to avoid repeated hits when the same concept appears across multiple papers. """ from __future__ import annotations import httpx, asyncio from functools import lru_cache from typing import Dict, Optional _API = "https://www.wikidata.org/w/api.php" _TIMEOUT = 15 _HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"} # --------------------------------------------------------------------- # Public helpers # --------------------------------------------------------------------- @lru_cache(maxsize=128) async def simple_search(term: str) -> Optional[Dict]: """Return top search hit for *term* or None.""" params = { "action": "wbsearchentities", "search": term, "language": "en", "format": "json", "limit": 1, } async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: resp = await client.get(_API, params=params) resp.raise_for_status() hits = resp.json().get("search", []) return hits[0] if hits else None @lru_cache(maxsize=128) async def fetch_entity(qid: str) -> Dict: """Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`).""" params = { "action": "wbgetentities", "ids": qid, "format": "json", "languages": "en", } async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: resp = await client.get(_API, params=params) resp.raise_for_status() return resp.json().get("entities", {}).get(qid, {}) # --------------------------------------------------------------------- # Demo / manual test # --------------------------------------------------------------------- if __name__ == "__main__": async def _demo(): hit = await simple_search("glioblastoma") print("Top hit:", hit) if hit: full = await fetch_entity(hit["id"]) print("Labels:", full.get("labels", {}).get("en", {})) asyncio.run(_demo())