mgbam commited on
Commit
08a3e96
·
verified ·
1 Parent(s): d3ef28f

Rename mcp/graph_utils.py to mcp/graph_metrics.py

Browse files
Files changed (2) hide show
  1. mcp/graph_metrics.py +69 -0
  2. mcp/graph_utils.py +0 -110
mcp/graph_metrics.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ graph_metrics.py · Lightweight NetworkX helpers for MedGenesis
3
+
4
+ Key features
5
+ ────────────
6
+ • Accepts edge dictionaries in either Streamlit-agraph or PyVis style:
7
+ {"source": "n1", "target": "n2"} ← agraph
8
+ {"from": "n1", "to": "n2"} ← PyVis
9
+ • Silently skips malformed edges (no KeyError).
10
+ • Provides three public helpers:
11
+ build_nx(nodes, edges) → networkx.Graph
12
+ get_top_hubs(G, k=5) → List[(node_id, degree_centrality)]
13
+ get_density(G) → float (0–1)
14
+ """
15
+
16
+ from __future__ import annotations
17
+ from typing import List, Dict, Tuple
18
+ import networkx as nx
19
+
20
+
21
+ # ────────────────────────────────────────────────────────────────────
22
+ # Internal helpers
23
+ # ────────────────────────────────────────────────────────────────────
24
+ def _edge_ends(e: Dict) -> Tuple[str, str] | None:
25
+ """Return (src, dst) tuple if both ends exist; else None."""
26
+ src = e.get("source") or e.get("from")
27
+ dst = e.get("target") or e.get("to")
28
+ if src and dst:
29
+ return src, dst
30
+ return None
31
+
32
+
33
+ # ────────────────────────────────────────────────────────────────────
34
+ # Public API
35
+ # ────────────────────────────────────────────────────────────────────
36
+ def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph:
37
+ """
38
+ Convert agraph / PyVis node+edge dicts into a NetworkX Graph.
39
+
40
+ Nodes: must contain "id" (a unique string)
41
+ Edges: accepted shapes → {"source":, "target":} or {"from":, "to":}
42
+ """
43
+ G = nx.Graph()
44
+
45
+ # Add nodes with label attribute (used by Metrics tab)
46
+ for n in nodes:
47
+ G.add_node(n["id"], label=n.get("label", n["id"]))
48
+
49
+ # Add edges (skip malformed)
50
+ for e in edges:
51
+ ends = _edge_ends(e)
52
+ if ends:
53
+ G.add_edge(*ends)
54
+
55
+ return G
56
+
57
+
58
+ def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
59
+ """
60
+ Return top-k nodes by degree-centrality.
61
+ Example output: [('TP53', 0.42), ('EGFR', 0.36), ...]
62
+ """
63
+ dc = nx.degree_centrality(G)
64
+ return sorted(dc.items(), key=lambda x: x[1], reverse=True)[:k]
65
+
66
+
67
+ def get_density(G: nx.Graph) -> float:
68
+ """Graph density in [0, 1]."""
69
+ return nx.density(G)
mcp/graph_utils.py DELETED
@@ -1,110 +0,0 @@
1
- #!/usr/bin/env python3
2
- """MedGenesis – NetworkX helpers (robust version)
3
-
4
- Key upgrades over the legacy helper:
5
-
6
- 1. **Edge‑key flexibility** – `build_nx` now recognises *four* common
7
- schemas produced by Streamlit‑agraph, PyVis, Neo4j exports or OT graphs:
8
-
9
- • `{"source": "n1", "target": "n2"}` (agraph)
10
- • `{"from": "n1", "to": "n2"}` (PyVis)
11
- • `{"src": "n1", "dst": "n2"}` (neo4j/json)
12
- • `{"u": "n1", "v": "n2"}` (NetworkX native)
13
-
14
- 2. **Weight aware** – optional numeric `weight` (or `value`) field becomes
15
- an edge attribute (defaults to 1).
16
-
17
- 3. **Self‑loop skip** – ignores self‑edges to keep density sensible.
18
-
19
- 4. **Utility metrics** – adds `betweenness` & `clustering` helpers in
20
- addition to top‑hub degree ranking.
21
- """
22
-
23
- from __future__ import annotations
24
-
25
- from typing import Dict, List, Tuple
26
- import networkx as nx
27
-
28
- __all__ = [
29
- "build_nx",
30
- "get_top_hubs",
31
- "get_density",
32
- "get_betweenness",
33
- "get_clustering_coeff",
34
- ]
35
-
36
- # ---------------------------------------------------------------------
37
- # Internal helpers
38
- # ---------------------------------------------------------------------
39
-
40
- def _edge_endpoints(e: Dict) -> Tuple[str, str] | None:
41
- """Return (src, dst) if both ends exist; else None."""
42
- src = e.get("source") or e.get("from") or e.get("src") or e.get("u")
43
- dst = e.get("target") or e.get("to") or e.get("dst") or e.get("v")
44
- if src and dst and src != dst:
45
- return str(src), str(dst)
46
- return None
47
-
48
- # ---------------------------------------------------------------------
49
- # Public API
50
- # ---------------------------------------------------------------------
51
-
52
- def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph:
53
- """Convert heterogeneous node/edge dicts into an undirected NetworkX graph.
54
-
55
- Parameters
56
- ----------
57
- nodes : list of node dicts – each must contain an `id` key; other keys
58
- are copied as attributes.
59
- edges : list of edge dicts – keys can be any of the recognised schemas.
60
-
61
- Returns
62
- -------
63
- nx.Graph – ready for downstream centrality / drawing.
64
- """
65
- G = nx.Graph()
66
-
67
- # Nodes ----------------------------------------------------------------
68
- for n in nodes:
69
- node_id = str(n["id"])
70
- attrs = {k: v for k, v in n.items() if k != "id"}
71
- G.add_node(node_id, **attrs)
72
-
73
- # Edges ----------------------------------------------------------------
74
- for e in edges:
75
- endpoints = _edge_endpoints(e)
76
- if not endpoints:
77
- continue
78
- u, v = endpoints
79
- w = e.get("weight") or e.get("value") or 1
80
- G.add_edge(u, v, weight=float(w))
81
-
82
- return G
83
-
84
- # ---------------------------------------------------------------------
85
- # Metrics helpers
86
- # ---------------------------------------------------------------------
87
-
88
- def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
89
- """Return top‑*k* nodes by **degree centrality**."""
90
- dc = nx.degree_centrality(G)
91
- return sorted(dc.items(), key=lambda kv: kv[1], reverse=True)[:k]
92
-
93
-
94
- def get_betweenness(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
95
- """Top‑*k* nodes by betweenness centrality (approx if |V| > 500)."""
96
- if G.number_of_nodes() > 500:
97
- bc = nx.betweenness_centrality(G, k=200, seed=42)
98
- else:
99
- bc = nx.betweenness_centrality(G)
100
- return sorted(bc.items(), key=lambda kv: kv[1], reverse=True)[:k]
101
-
102
-
103
- def get_clustering_coeff(G: nx.Graph) -> float:
104
- """Return average clustering coefficient (0‑1)."""
105
- return nx.average_clustering(G)
106
-
107
-
108
- def get_density(G: nx.Graph) -> float:
109
- """Graph density in [0, 1]."""
110
- return nx.density(G)