MCP_Res / mcp /wikidata.py
mgbam's picture
Update mcp/wikidata.py
cf5709a verified
#!/usr/bin/env python3
"""MedGenesis – minimal **Wikidata** lookup helper (async).
Features
~~~~~~~~
* `simple_search(term)` – return first matching entity dict `{id, label, description}`.
* `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.).
* Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`.
* Least‑recently‑used cache (128) to avoid repeated hits when the same
concept appears across multiple papers.
"""
from __future__ import annotations
import httpx, asyncio
from functools import lru_cache
from typing import Dict, Optional
_API = "https://www.wikidata.org/w/api.php"
_TIMEOUT = 15
_HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
# ---------------------------------------------------------------------
# Public helpers
# ---------------------------------------------------------------------
@lru_cache(maxsize=128)
async def simple_search(term: str) -> Optional[Dict]:
"""Return top search hit for *term* or None."""
params = {
"action": "wbsearchentities",
"search": term,
"language": "en",
"format": "json",
"limit": 1,
}
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
resp = await client.get(_API, params=params)
resp.raise_for_status()
hits = resp.json().get("search", [])
return hits[0] if hits else None
@lru_cache(maxsize=128)
async def fetch_entity(qid: str) -> Dict:
"""Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`)."""
params = {
"action": "wbgetentities",
"ids": qid,
"format": "json",
"languages": "en",
}
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
resp = await client.get(_API, params=params)
resp.raise_for_status()
return resp.json().get("entities", {}).get(qid, {})
# ---------------------------------------------------------------------
# Demo / manual test
# ---------------------------------------------------------------------
if __name__ == "__main__":
async def _demo():
hit = await simple_search("glioblastoma")
print("Top hit:", hit)
if hit:
full = await fetch_entity(hit["id"])
print("Labels:", full.get("labels", {}).get("en", {}))
asyncio.run(_demo())