Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 7 days ago

Commit

633ba95

verified ·

1 Parent(s): 1bc973b

Update mcp/knowledge_graph.py

Browse files

Files changed (1) hide show

mcp/knowledge_graph.py +141 -41

mcp/knowledge_graph.py CHANGED Viewed

@@ -1,61 +1,161 @@
-# mcp/knowledge_graph.py
-from streamlit_agraph import Node, Edge, Config
 import re
-def build_agraph(papers, umls, drug_safety):
-    """
-    Build interactive agraph nodes and edges.
-    Handles drug_safety entries that may be dict or list.
     """
-    nodes, edges = [], []
-    # Add UMLS concept nodes
     for c in umls:
-        cui = c.get("cui")
-        name = c.get("name", "")
         if cui and name:
-            nid = f"concept_{cui}"
-            nodes.append(Node(id=nid, label=name, size=25, color="#00b894"))
-    # Add drug nodes, handling list or dict
-    drug_names = []
-    for i, dr in enumerate(drug_safety):
-        if not dr:
-            continue
-        # Normalize to single dict
         recs = dr if isinstance(dr, list) else [dr]
         for j, rec in enumerate(recs):
-            # Attempt to extract a drug name
-            dn = rec.get("drug_name") or rec.get("patient", {}).get("drug", "") or rec.get("medicinalproduct", "")
-            dn = dn or f"drug_{i}_{j}"
             did = f"drug_{i}_{j}"
-            drug_names.append((did, dn))
-            nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
-    # Add paper nodes and link to concepts & drugs
-    for pi, p in enumerate(papers):
-        pid = f"paper_{pi}"
-        nodes.append(Node(id=pid, label=f"P{pi+1}", tooltip=p["title"], size=15, color="#0984e3"))
-        text = f"{p.get('title','')} {p.get('summary','')}".lower()
-        # Link to concepts
         for c in umls:
-            cname = c.get("name", "")
-            cui = c.get("cui")
-            if cname and cui and cname.lower() in text:
-                edges.append(Edge(source=pid, target=f"concept_{cui}", label="mentions"))
-        # Link to drugs
-        for did, dn in drug_names:
-            if dn.lower() in text:
                 edges.append(Edge(source=pid, target=did, label="mentions"))
-    config = Config(
-        width="100%", height="600", directed=False,
-        nodeHighlightBehavior=True, highlightColor="#f1c40f",
         collapsible=True,
-        node={"labelProperty": "label"}
     )
-    return nodes, edges, config

+#!/usr/bin/env python3
+"""MedGenesis – knowledge‑graph builder for Streamlit‑Agraph.
+This version recognises **all new enrichment layers** introduced in the
+latest orchestrator:
+    • UMLS concepts                → green nodes
+    • MyGene / NCBI gene hits      → purple nodes
+    • openFDA / DrugCentral drugs  → orange nodes
+    • ClinicalTrials.gov studies   → pink nodes
+    • Open Targets associations    → red drug–gene / gene–disease edges
+    • Literature papers            → blue nodes (tooltip = title)
+The entry‑point `build_agraph` now receives a richer payload and returns
+*(nodes, edges, config)* ready for `streamlit_agraph.agraph`.
+"""
+from __future__ import annotations
 import re
+from typing import List, Dict, Tuple
+from streamlit_agraph import Node, Edge, Config
+# ---------------------------------------------------------------------
+# Colour palette (flat‑UI)
+# ---------------------------------------------------------------------
+C_PAPER   = "#0984e3"
+C_CONCEPT = "#00b894"
+C_GENE    = "#6c5ce7"
+C_DRUG    = "#d35400"
+C_TRIAL   = "#fd79a8"
+C_OT_EDGE = "#c0392b"
+# ---------------------------------------------------------------------
+# Helper builders
+# ---------------------------------------------------------------------
+def _add_node(nodes: List[Node], node_id: str, label: str, color: str, tooltip: str | None = None, size: int = 25):
+    """Append Node only if id not yet present (agraph duplicates crash)."""
+    if any(n.id == node_id for n in nodes):
+        return
+    nodes.append(Node(id=node_id, label=label, color=color, size=size, tooltip=tooltip))
+def _match(text: str, pattern: str) -> bool:
+    return bool(re.search(re.escape(pattern), text, flags=re.I))
+# ---------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------
+def build_agraph(
+    papers: List[Dict],
+    umls: List[Dict],
+    drug_safety: List[Dict],
+    genes: List[Dict] | None = None,
+    trials: List[Dict] | None = None,
+    ot_associations: List[Dict] | None = None,
+):
+    """Return (nodes, edges, config) for streamlit_agraph. Safe‑duplicates.
+    Parameters
+    ----------
+    papers          : PubMed / arXiv merged list (dicts with title & summary).
+    umls            : List of UMLS concept dicts `{cui, name}`.
+    drug_safety     : openFDA / DrugCentral outputs (mixed dict / list).
+    genes           : Optional list with MyGene/NCBI dicts (symbol, name,...).
+    trials          : Optional ClinicalTrials.gov v2 studies list.
+    ot_associations : Optional list from Open Targets.
     """
+    nodes: List[Node] = []
+    edges: List[Edge] = []
+    # 1️⃣ Concepts ----------------------------------------------------
     for c in umls:
+        cui, name = c.get("cui"), c.get("name", "")
         if cui and name:
+            cid = f"concept_{cui}"
+            _add_node(nodes, cid, name, C_CONCEPT)
+    # 2️⃣ Genes -------------------------------------------------------
+    genes = genes or []
+    for g in genes:
+        sym = g.get("symbol") or g.get("name")
+        gid = f"gene_{sym}"
+        tooltip = g.get("summary", "")
+        _add_node(nodes, gid, sym, C_GENE, tooltip=tooltip)
+    # 3️⃣ Drugs (normalize mixed structures) -------------------------
+    drug_tuples: List[Tuple[str, str]] = []  # (node_id, drug_name)
+    for i, dr in enumerate(drug_safety):
         recs = dr if isinstance(dr, list) else [dr]
         for j, rec in enumerate(recs):
+            name = (
+                rec.get("drug_name") or
+                rec.get("patient", {}).get("drug") or
+                rec.get("medicinalproduct") or
+                f"drug_{i}_{j}"
+            )
             did = f"drug_{i}_{j}"
+            drug_tuples.append((did, name))
+            _add_node(nodes, did, name, C_DRUG)
+    # 4️⃣ Trials ------------------------------------------------------
+    trials = trials or []
+    for t in trials:
+        nct  = t.get("nctId") or t.get("nctid")
+        if not nct:
+            continue
+        tid  = f"trial_{nct}"
+        label = nct
+        tooltip = t.get("briefTitle") or "Clinical trial"
+        _add_node(nodes, tid, label, C_TRIAL, tooltip=tooltip, size=20)
+    # 5️⃣ Papers & mention edges -------------------------------------
+    for idx, p in enumerate(papers):
+        pid = f"paper_{idx}"
+        _add_node(nodes, pid, f"P{idx+1}", C_PAPER, tooltip=p.get("title", ""), size=15)
+        text_blob = f"{p.get('title','')} {p.get('summary','')}".lower()
+        # concept links
         for c in umls:
+            if c.get("name") and _match(text_blob, c["name"]):
+                edges.append(Edge(source=pid, target=f"concept_{c['cui']}", label="mentions"))
+        # gene links
+        for g in genes:
+            if g.get("symbol") and _match(text_blob, g["symbol"]):
+                edges.append(Edge(source=pid, target=f"gene_{g['symbol']}", label="mentions"))
+        # drug links
+        for did, dname in drug_tuples:
+            if _match(text_blob, dname):
                 edges.append(Edge(source=pid, target=did, label="mentions"))
+    # 6️⃣ Open Targets edges (drug–gene / gene–disease) --------------
+    if ot_associations:
+        for row in ot_associations:
+            gsym = row.get("target", {}).get("symbol")
+            dis  = row.get("disease", {}).get("name")
+            score = row.get("score", 0)
+            if gsym and dis:
+                gid = f"gene_{gsym}"
+                did = f"disease_{dis}"
+                _add_node(nodes, did, dis, C_CONCEPT, size=20)
+                edges.append(Edge(source=gid, target=did, color=C_OT_EDGE, label=f"OT {score:.2f}"))
+    # 7️⃣ Config ------------------------------------------------------
+    cfg = Config(
+        directed=False,
+        width="100%",
+        height="600",
+        nodeHighlightBehavior=True,
+        highlightColor="#f1c40f",
         collapsible=True,
+        showLegend=False,
+        node={"labelProperty": "label"},
     )
+    return nodes, edges, cfg