File size: 3,771 Bytes
1bc973b
 
 
 
 
 
 
7117e78
1bc973b
 
 
 
7117e78
1bc973b
 
7117e78
1bc973b
 
 
 
3f06a92
 
1bc973b
 
 
3f06a92
 
1bc973b
 
 
 
 
 
 
 
 
 
 
7117e78
 
1bc973b
 
 
 
 
7117e78
 
1bc973b
 
 
7117e78
3f06a92
1bc973b
 
 
 
 
 
 
7117e78
1bc973b
 
 
7117e78
3f06a92
7117e78
1bc973b
3f06a92
1bc973b
 
 
7117e78
1bc973b
3f06a92
7117e78
1bc973b
 
 
 
 
7117e78
3f06a92
 
1bc973b
 
 
3f06a92
 
1bc973b
3f06a92
1bc973b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f06a92
 
 
1bc973b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
"""MedGenesis – NetworkX helpers (robust version)

Key upgrades over the legacy helper:

1. **Edge‑key flexibility** – `build_nx` now recognises *four* common
   schemas produced by Streamlit‑agraph, PyVis, Neo4j exports or OT graphs:

      • `{"source": "n1", "target": "n2"}`              (agraph)
      • `{"from":   "n1", "to":     "n2"}`              (PyVis)
      • `{"src":    "n1", "dst":    "n2"}`              (neo4j/json)
      • `{"u":      "n1", "v":      "n2"}`              (NetworkX native)

2. **Weight aware** – optional numeric `weight` (or `value`) field becomes
   an edge attribute (defaults to 1).

3. **Self‑loop skip** – ignores self‑edges to keep density sensible.

4. **Utility metrics** – adds `betweenness` & `clustering` helpers in
   addition to top‑hub degree ranking.
"""

from __future__ import annotations

from typing import Dict, List, Tuple
import networkx as nx

__all__ = [
    "build_nx",
    "get_top_hubs",
    "get_density",
    "get_betweenness",
    "get_clustering_coeff",
]

# ---------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------

def _edge_endpoints(e: Dict) -> Tuple[str, str] | None:
    """Return (src, dst) if both ends exist; else None."""
    src = e.get("source") or e.get("from") or e.get("src") or e.get("u")
    dst = e.get("target") or e.get("to")   or e.get("dst") or e.get("v")
    if src and dst and src != dst:
        return str(src), str(dst)
    return None

# ---------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------

def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph:
    """Convert heterogeneous node/edge dicts into an undirected NetworkX graph.

    Parameters
    ----------
    nodes : list of node dicts – each must contain an `id` key; other keys
            are copied as attributes.
    edges : list of edge dicts – keys can be any of the recognised schemas.

    Returns
    -------
    nx.Graph – ready for downstream centrality / drawing.
    """
    G = nx.Graph()

    # Nodes ----------------------------------------------------------------
    for n in nodes:
        node_id = str(n["id"])
        attrs   = {k: v for k, v in n.items() if k != "id"}
        G.add_node(node_id, **attrs)

    # Edges ----------------------------------------------------------------
    for e in edges:
        endpoints = _edge_endpoints(e)
        if not endpoints:
            continue
        u, v = endpoints
        w = e.get("weight") or e.get("value") or 1
        G.add_edge(u, v, weight=float(w))

    return G

# ---------------------------------------------------------------------
# Metrics helpers
# ---------------------------------------------------------------------

def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
    """Return top‑*k* nodes by **degree centrality**."""
    dc = nx.degree_centrality(G)
    return sorted(dc.items(), key=lambda kv: kv[1], reverse=True)[:k]


def get_betweenness(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
    """Top‑*k* nodes by betweenness centrality (approx if |V| > 500)."""
    if G.number_of_nodes() > 500:
        bc = nx.betweenness_centrality(G, k=200, seed=42)
    else:
        bc = nx.betweenness_centrality(G)
    return sorted(bc.items(), key=lambda kv: kv[1], reverse=True)[:k]


def get_clustering_coeff(G: nx.Graph) -> float:
    """Return average clustering coefficient (0‑1)."""
    return nx.average_clustering(G)


def get_density(G: nx.Graph) -> float:
    """Graph density in [0, 1]."""
    return nx.density(G)