|
|
|
"""MedGenesis – NetworkX helpers (robust version) |
|
|
|
Key upgrades over the legacy helper: |
|
|
|
1. **Edge‑key flexibility** – `build_nx` now recognises *four* common |
|
schemas produced by Streamlit‑agraph, PyVis, Neo4j exports or OT graphs: |
|
|
|
• `{"source": "n1", "target": "n2"}` (agraph) |
|
• `{"from": "n1", "to": "n2"}` (PyVis) |
|
• `{"src": "n1", "dst": "n2"}` (neo4j/json) |
|
• `{"u": "n1", "v": "n2"}` (NetworkX native) |
|
|
|
2. **Weight aware** – optional numeric `weight` (or `value`) field becomes |
|
an edge attribute (defaults to 1). |
|
|
|
3. **Self‑loop skip** – ignores self‑edges to keep density sensible. |
|
|
|
4. **Utility metrics** – adds `betweenness` & `clustering` helpers in |
|
addition to top‑hub degree ranking. |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
from typing import Dict, List, Tuple |
|
import networkx as nx |
|
|
|
__all__ = [ |
|
"build_nx", |
|
"get_top_hubs", |
|
"get_density", |
|
"get_betweenness", |
|
"get_clustering_coeff", |
|
] |
|
|
|
|
|
|
|
|
|
|
|
def _edge_endpoints(e: Dict) -> Tuple[str, str] | None: |
|
"""Return (src, dst) if both ends exist; else None.""" |
|
src = e.get("source") or e.get("from") or e.get("src") or e.get("u") |
|
dst = e.get("target") or e.get("to") or e.get("dst") or e.get("v") |
|
if src and dst and src != dst: |
|
return str(src), str(dst) |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph: |
|
"""Convert heterogeneous node/edge dicts into an undirected NetworkX graph. |
|
|
|
Parameters |
|
---------- |
|
nodes : list of node dicts – each must contain an `id` key; other keys |
|
are copied as attributes. |
|
edges : list of edge dicts – keys can be any of the recognised schemas. |
|
|
|
Returns |
|
------- |
|
nx.Graph – ready for downstream centrality / drawing. |
|
""" |
|
G = nx.Graph() |
|
|
|
|
|
for n in nodes: |
|
node_id = str(n["id"]) |
|
attrs = {k: v for k, v in n.items() if k != "id"} |
|
G.add_node(node_id, **attrs) |
|
|
|
|
|
for e in edges: |
|
endpoints = _edge_endpoints(e) |
|
if not endpoints: |
|
continue |
|
u, v = endpoints |
|
w = e.get("weight") or e.get("value") or 1 |
|
G.add_edge(u, v, weight=float(w)) |
|
|
|
return G |
|
|
|
|
|
|
|
|
|
|
|
def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]: |
|
"""Return top‑*k* nodes by **degree centrality**.""" |
|
dc = nx.degree_centrality(G) |
|
return sorted(dc.items(), key=lambda kv: kv[1], reverse=True)[:k] |
|
|
|
|
|
def get_betweenness(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]: |
|
"""Top‑*k* nodes by betweenness centrality (approx if |V| > 500).""" |
|
if G.number_of_nodes() > 500: |
|
bc = nx.betweenness_centrality(G, k=200, seed=42) |
|
else: |
|
bc = nx.betweenness_centrality(G) |
|
return sorted(bc.items(), key=lambda kv: kv[1], reverse=True)[:k] |
|
|
|
|
|
def get_clustering_coeff(G: nx.Graph) -> float: |
|
"""Return average clustering coefficient (0‑1).""" |
|
return nx.average_clustering(G) |
|
|
|
|
|
def get_density(G: nx.Graph) -> float: |
|
"""Graph density in [0, 1].""" |
|
return nx.density(G) |