Update mcp/wikidata.py
Browse files- mcp/wikidata.py +56 -11
mcp/wikidata.py
CHANGED
|
@@ -1,23 +1,68 @@
|
|
| 1 |
-
|
| 2 |
-
"""
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"""
|
|
|
|
| 5 |
|
| 6 |
-
import httpx
|
|
|
|
| 7 |
from typing import Dict, Optional
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
| 11 |
async def simple_search(term: str) -> Optional[Dict]:
|
|
|
|
| 12 |
params = {
|
| 13 |
"action": "wbsearchentities",
|
| 14 |
"search": term,
|
| 15 |
"language": "en",
|
| 16 |
"format": "json",
|
| 17 |
-
"limit": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
| 19 |
-
async with httpx.AsyncClient(timeout=
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""MedGenesis – minimal **Wikidata** lookup helper (async).
|
| 3 |
+
|
| 4 |
+
Features
|
| 5 |
+
~~~~~~~~
|
| 6 |
+
* `simple_search(term)` – return first matching entity dict `{id, label, description}`.
|
| 7 |
+
* `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.).
|
| 8 |
+
* Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`.
|
| 9 |
+
* Least‑recently‑used cache (128) to avoid repeated hits when the same
|
| 10 |
+
concept appears across multiple papers.
|
| 11 |
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
|
| 14 |
+
import httpx, asyncio
|
| 15 |
+
from functools import lru_cache
|
| 16 |
from typing import Dict, Optional
|
| 17 |
|
| 18 |
+
_API = "https://www.wikidata.org/w/api.php"
|
| 19 |
+
_TIMEOUT = 15
|
| 20 |
+
_HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------
|
| 23 |
+
# Public helpers
|
| 24 |
+
# ---------------------------------------------------------------------
|
| 25 |
|
| 26 |
+
@lru_cache(maxsize=128)
|
| 27 |
async def simple_search(term: str) -> Optional[Dict]:
|
| 28 |
+
"""Return top search hit for *term* or None."""
|
| 29 |
params = {
|
| 30 |
"action": "wbsearchentities",
|
| 31 |
"search": term,
|
| 32 |
"language": "en",
|
| 33 |
"format": "json",
|
| 34 |
+
"limit": 1,
|
| 35 |
+
}
|
| 36 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
|
| 37 |
+
resp = await client.get(_API, params=params)
|
| 38 |
+
resp.raise_for_status()
|
| 39 |
+
hits = resp.json().get("search", [])
|
| 40 |
+
return hits[0] if hits else None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@lru_cache(maxsize=128)
|
| 44 |
+
async def fetch_entity(qid: str) -> Dict:
|
| 45 |
+
"""Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`)."""
|
| 46 |
+
params = {
|
| 47 |
+
"action": "wbgetentities",
|
| 48 |
+
"ids": qid,
|
| 49 |
+
"format": "json",
|
| 50 |
+
"languages": "en",
|
| 51 |
}
|
| 52 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
|
| 53 |
+
resp = await client.get(_API, params=params)
|
| 54 |
+
resp.raise_for_status()
|
| 55 |
+
return resp.json().get("entities", {}).get(qid, {})
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ---------------------------------------------------------------------
|
| 59 |
+
# Demo / manual test
|
| 60 |
+
# ---------------------------------------------------------------------
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
async def _demo():
|
| 63 |
+
hit = await simple_search("glioblastoma")
|
| 64 |
+
print("Top hit:", hit)
|
| 65 |
+
if hit:
|
| 66 |
+
full = await fetch_entity(hit["id"])
|
| 67 |
+
print("Labels:", full.get("labels", {}).get("en", {}))
|
| 68 |
+
asyncio.run(_demo())
|