mgbam commited on
Commit
cf5709a
·
verified ·
1 Parent(s): f30f44c

Update mcp/wikidata.py

Browse files
Files changed (1) hide show
  1. mcp/wikidata.py +56 -11
mcp/wikidata.py CHANGED
@@ -1,23 +1,68 @@
1
- # mcp/wikidata.py
2
- """
3
- Minimal Wikidata entity lookup for biomedical concepts.
 
 
 
 
 
 
 
4
  """
 
5
 
6
- import httpx
 
7
  from typing import Dict, Optional
8
 
9
- API = "https://www.wikidata.org/w/api.php"
 
 
 
 
 
 
10
 
 
11
  async def simple_search(term: str) -> Optional[Dict]:
 
12
  params = {
13
  "action": "wbsearchentities",
14
  "search": term,
15
  "language": "en",
16
  "format": "json",
17
- "limit": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
- async with httpx.AsyncClient(timeout=15) as client:
20
- r = await client.get(API, params=params)
21
- r.raise_for_status()
22
- data = r.json()["search"]
23
- return data[0] if data else None
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """MedGenesis – minimal **Wikidata** lookup helper (async).
3
+
4
+ Features
5
+ ~~~~~~~~
6
+ * `simple_search(term)` – return first matching entity dict `{id, label, description}`.
7
+ * `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.).
8
+ * Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`.
9
+ * Least‑recently‑used cache (128) to avoid repeated hits when the same
10
+ concept appears across multiple papers.
11
  """
12
+ from __future__ import annotations
13
 
14
+ import httpx, asyncio
15
+ from functools import lru_cache
16
  from typing import Dict, Optional
17
 
18
+ _API = "https://www.wikidata.org/w/api.php"
19
+ _TIMEOUT = 15
20
+ _HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
21
+
22
+ # ---------------------------------------------------------------------
23
+ # Public helpers
24
+ # ---------------------------------------------------------------------
25
 
26
+ @lru_cache(maxsize=128)
27
  async def simple_search(term: str) -> Optional[Dict]:
28
+ """Return top search hit for *term* or None."""
29
  params = {
30
  "action": "wbsearchentities",
31
  "search": term,
32
  "language": "en",
33
  "format": "json",
34
+ "limit": 1,
35
+ }
36
+ async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
37
+ resp = await client.get(_API, params=params)
38
+ resp.raise_for_status()
39
+ hits = resp.json().get("search", [])
40
+ return hits[0] if hits else None
41
+
42
+
43
+ @lru_cache(maxsize=128)
44
+ async def fetch_entity(qid: str) -> Dict:
45
+ """Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`)."""
46
+ params = {
47
+ "action": "wbgetentities",
48
+ "ids": qid,
49
+ "format": "json",
50
+ "languages": "en",
51
  }
52
+ async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
53
+ resp = await client.get(_API, params=params)
54
+ resp.raise_for_status()
55
+ return resp.json().get("entities", {}).get(qid, {})
56
+
57
+
58
+ # ---------------------------------------------------------------------
59
+ # Demo / manual test
60
+ # ---------------------------------------------------------------------
61
+ if __name__ == "__main__":
62
+ async def _demo():
63
+ hit = await simple_search("glioblastoma")
64
+ print("Top hit:", hit)
65
+ if hit:
66
+ full = await fetch_entity(hit["id"])
67
+ print("Labels:", full.get("labels", {}).get("en", {}))
68
+ asyncio.run(_demo())