Update mcp/wikidata.py
Browse files- mcp/wikidata.py +56 -11
mcp/wikidata.py
CHANGED
@@ -1,23 +1,68 @@
|
|
1 |
-
|
2 |
-
"""
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"""
|
|
|
5 |
|
6 |
-
import httpx
|
|
|
7 |
from typing import Dict, Optional
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
|
|
11 |
async def simple_search(term: str) -> Optional[Dict]:
|
|
|
12 |
params = {
|
13 |
"action": "wbsearchentities",
|
14 |
"search": term,
|
15 |
"language": "en",
|
16 |
"format": "json",
|
17 |
-
"limit": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
-
async with httpx.AsyncClient(timeout=
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""MedGenesis – minimal **Wikidata** lookup helper (async).
|
3 |
+
|
4 |
+
Features
|
5 |
+
~~~~~~~~
|
6 |
+
* `simple_search(term)` – return first matching entity dict `{id, label, description}`.
|
7 |
+
* `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.).
|
8 |
+
* Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`.
|
9 |
+
* Least‑recently‑used cache (128) to avoid repeated hits when the same
|
10 |
+
concept appears across multiple papers.
|
11 |
"""
|
12 |
+
from __future__ import annotations
|
13 |
|
14 |
+
import httpx, asyncio
|
15 |
+
from functools import lru_cache
|
16 |
from typing import Dict, Optional
|
17 |
|
18 |
+
_API = "https://www.wikidata.org/w/api.php"
|
19 |
+
_TIMEOUT = 15
|
20 |
+
_HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
|
21 |
+
|
22 |
+
# ---------------------------------------------------------------------
|
23 |
+
# Public helpers
|
24 |
+
# ---------------------------------------------------------------------
|
25 |
|
26 |
+
@lru_cache(maxsize=128)
|
27 |
async def simple_search(term: str) -> Optional[Dict]:
|
28 |
+
"""Return top search hit for *term* or None."""
|
29 |
params = {
|
30 |
"action": "wbsearchentities",
|
31 |
"search": term,
|
32 |
"language": "en",
|
33 |
"format": "json",
|
34 |
+
"limit": 1,
|
35 |
+
}
|
36 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
|
37 |
+
resp = await client.get(_API, params=params)
|
38 |
+
resp.raise_for_status()
|
39 |
+
hits = resp.json().get("search", [])
|
40 |
+
return hits[0] if hits else None
|
41 |
+
|
42 |
+
|
43 |
+
@lru_cache(maxsize=128)
|
44 |
+
async def fetch_entity(qid: str) -> Dict:
|
45 |
+
"""Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`)."""
|
46 |
+
params = {
|
47 |
+
"action": "wbgetentities",
|
48 |
+
"ids": qid,
|
49 |
+
"format": "json",
|
50 |
+
"languages": "en",
|
51 |
}
|
52 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
|
53 |
+
resp = await client.get(_API, params=params)
|
54 |
+
resp.raise_for_status()
|
55 |
+
return resp.json().get("entities", {}).get(qid, {})
|
56 |
+
|
57 |
+
|
58 |
+
# ---------------------------------------------------------------------
|
59 |
+
# Demo / manual test
|
60 |
+
# ---------------------------------------------------------------------
|
61 |
+
if __name__ == "__main__":
|
62 |
+
async def _demo():
|
63 |
+
hit = await simple_search("glioblastoma")
|
64 |
+
print("Top hit:", hit)
|
65 |
+
if hit:
|
66 |
+
full = await fetch_entity(hit["id"])
|
67 |
+
print("Labels:", full.get("labels", {}).get("en", {}))
|
68 |
+
asyncio.run(_demo())
|