|
|
|
"""MedGenesis – minimal **Wikidata** lookup helper (async). |
|
|
|
Features |
|
~~~~~~~~ |
|
* `simple_search(term)` – return first matching entity dict `{id, label, description}`. |
|
* `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.). |
|
* Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`. |
|
* Least‑recently‑used cache (128) to avoid repeated hits when the same |
|
concept appears across multiple papers. |
|
""" |
|
from __future__ import annotations |
|
|
|
import httpx, asyncio |
|
from functools import lru_cache |
|
from typing import Dict, Optional |
|
|
|
_API = "https://www.wikidata.org/w/api.php" |
|
_TIMEOUT = 15 |
|
_HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"} |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=128) |
|
async def simple_search(term: str) -> Optional[Dict]: |
|
"""Return top search hit for *term* or None.""" |
|
params = { |
|
"action": "wbsearchentities", |
|
"search": term, |
|
"language": "en", |
|
"format": "json", |
|
"limit": 1, |
|
} |
|
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: |
|
resp = await client.get(_API, params=params) |
|
resp.raise_for_status() |
|
hits = resp.json().get("search", []) |
|
return hits[0] if hits else None |
|
|
|
|
|
@lru_cache(maxsize=128) |
|
async def fetch_entity(qid: str) -> Dict: |
|
"""Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`).""" |
|
params = { |
|
"action": "wbgetentities", |
|
"ids": qid, |
|
"format": "json", |
|
"languages": "en", |
|
} |
|
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: |
|
resp = await client.get(_API, params=params) |
|
resp.raise_for_status() |
|
return resp.json().get("entities", {}).get(qid, {}) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
async def _demo(): |
|
hit = await simple_search("glioblastoma") |
|
print("Top hit:", hit) |
|
if hit: |
|
full = await fetch_entity(hit["id"]) |
|
print("Labels:", full.get("labels", {}).get("en", {})) |
|
asyncio.run(_demo()) |
|
|