File size: 2,377 Bytes
cf5709a
 
 
 
 
 
 
 
 
 
1acc892
cf5709a
1acc892
cf5709a
 
1acc892
 
cf5709a
 
 
 
 
 
 
1acc892
cf5709a
1acc892
cf5709a
1acc892
 
 
 
 
cf5709a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1acc892
cf5709a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
"""MedGenesis – minimal **Wikidata** lookup helper (async).

Features
~~~~~~~~
* `simple_search(term)` – return first matching entity dict `{id, label, description}`.
* `fetch_entity(qid)`  – return full entity data (`claims`, `labels`, etc.).
* Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`.
* Least‑recently‑used cache (128) to avoid repeated hits when the same
  concept appears across multiple papers.
"""
from __future__ import annotations

import httpx, asyncio
from functools import lru_cache
from typing import Dict, Optional

_API = "https://www.wikidata.org/w/api.php"
_TIMEOUT = 15
_HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}

# ---------------------------------------------------------------------
# Public helpers
# ---------------------------------------------------------------------

@lru_cache(maxsize=128)
async def simple_search(term: str) -> Optional[Dict]:
    """Return top search hit for *term* or None."""
    params = {
        "action": "wbsearchentities",
        "search": term,
        "language": "en",
        "format": "json",
        "limit": 1,
    }
    async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
        resp = await client.get(_API, params=params)
        resp.raise_for_status()
        hits = resp.json().get("search", [])
        return hits[0] if hits else None


@lru_cache(maxsize=128)
async def fetch_entity(qid: str) -> Dict:
    """Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`)."""
    params = {
        "action": "wbgetentities",
        "ids": qid,
        "format": "json",
        "languages": "en",
    }
    async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client:
        resp = await client.get(_API, params=params)
        resp.raise_for_status()
        return resp.json().get("entities", {}).get(qid, {})


# ---------------------------------------------------------------------
# Demo / manual test
# ---------------------------------------------------------------------
if __name__ == "__main__":
    async def _demo():
        hit = await simple_search("glioblastoma")
        print("Top hit:", hit)
        if hit:
            full = await fetch_entity(hit["id"])
            print("Labels:", full.get("labels", {}).get("en", {}))
    asyncio.run(_demo())