Spaces:
Paused
Paused
| """Functions for measuring the quality of a partition (into | |
| communities). | |
| """ | |
| from itertools import combinations | |
| import networkx as nx | |
| from networkx import NetworkXError | |
| from networkx.algorithms.community.community_utils import is_partition | |
| from networkx.utils.decorators import argmap | |
| __all__ = ["modularity", "partition_quality"] | |
| class NotAPartition(NetworkXError): | |
| """Raised if a given collection is not a partition.""" | |
| def __init__(self, G, collection): | |
| msg = f"{collection} is not a valid partition of the graph {G}" | |
| super().__init__(msg) | |
| def _require_partition(G, partition): | |
| """Decorator to check that a valid partition is input to a function | |
| Raises :exc:`networkx.NetworkXError` if the partition is not valid. | |
| This decorator should be used on functions whose first two arguments | |
| are a graph and a partition of the nodes of that graph (in that | |
| order):: | |
| >>> @require_partition | |
| ... def foo(G, partition): | |
| ... print("partition is valid!") | |
| ... | |
| >>> G = nx.complete_graph(5) | |
| >>> partition = [{0, 1}, {2, 3}, {4}] | |
| >>> foo(G, partition) | |
| partition is valid! | |
| >>> partition = [{0}, {2, 3}, {4}] | |
| >>> foo(G, partition) | |
| Traceback (most recent call last): | |
| ... | |
| networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G | |
| >>> partition = [{0, 1}, {1, 2, 3}, {4}] | |
| >>> foo(G, partition) | |
| Traceback (most recent call last): | |
| ... | |
| networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G | |
| """ | |
| if is_partition(G, partition): | |
| return G, partition | |
| raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") | |
| require_partition = argmap(_require_partition, (0, 1)) | |
| def intra_community_edges(G, partition): | |
| """Returns the number of intra-community edges for a partition of `G`. | |
| Parameters | |
| ---------- | |
| G : NetworkX graph. | |
| partition : iterable of sets of nodes | |
| This must be a partition of the nodes of `G`. | |
| The "intra-community edges" are those edges joining a pair of nodes | |
| in the same block of the partition. | |
| """ | |
| return sum(G.subgraph(block).size() for block in partition) | |
| def inter_community_edges(G, partition): | |
| """Returns the number of inter-community edges for a partition of `G`. | |
| according to the given | |
| partition of the nodes of `G`. | |
| Parameters | |
| ---------- | |
| G : NetworkX graph. | |
| partition : iterable of sets of nodes | |
| This must be a partition of the nodes of `G`. | |
| The *inter-community edges* are those edges joining a pair of nodes | |
| in different blocks of the partition. | |
| Implementation note: this function creates an intermediate graph | |
| that may require the same amount of memory as that of `G`. | |
| """ | |
| # Alternate implementation that does not require constructing a new | |
| # graph object (but does require constructing an affiliation | |
| # dictionary): | |
| # | |
| # aff = dict(chain.from_iterable(((v, block) for v in block) | |
| # for block in partition)) | |
| # return sum(1 for u, v in G.edges() if aff[u] != aff[v]) | |
| # | |
| MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph | |
| return nx.quotient_graph(G, partition, create_using=MG).size() | |
| def inter_community_non_edges(G, partition): | |
| """Returns the number of inter-community non-edges according to the | |
| given partition of the nodes of `G`. | |
| Parameters | |
| ---------- | |
| G : NetworkX graph. | |
| partition : iterable of sets of nodes | |
| This must be a partition of the nodes of `G`. | |
| A *non-edge* is a pair of nodes (undirected if `G` is undirected) | |
| that are not adjacent in `G`. The *inter-community non-edges* are | |
| those non-edges on a pair of nodes in different blocks of the | |
| partition. | |
| Implementation note: this function creates two intermediate graphs, | |
| which may require up to twice the amount of memory as required to | |
| store `G`. | |
| """ | |
| # Alternate implementation that does not require constructing two | |
| # new graph objects (but does require constructing an affiliation | |
| # dictionary): | |
| # | |
| # aff = dict(chain.from_iterable(((v, block) for v in block) | |
| # for block in partition)) | |
| # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v]) | |
| # | |
| return inter_community_edges(nx.complement(G), partition) | |
| def modularity(G, communities, weight="weight", resolution=1): | |
| r"""Returns the modularity of the given partition of the graph. | |
| Modularity is defined in [1]_ as | |
| .. math:: | |
| Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) | |
| \delta(c_i,c_j) | |
| where $m$ is the number of edges (or sum of all edge weights as in [5]_), | |
| $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, | |
| $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and | |
| $j$ are in the same community else 0. | |
| According to [2]_ (and verified by some algebra) this can be reduced to | |
| .. math:: | |
| Q = \sum_{c=1}^{n} | |
| \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] | |
| where the sum iterates over all communities $c$, $m$ is the number of edges, | |
| $L_c$ is the number of intra-community links for community $c$, | |
| $k_c$ is the sum of degrees of the nodes in community $c$, | |
| and $\gamma$ is the resolution parameter. | |
| The resolution parameter sets an arbitrary tradeoff between intra-group | |
| edges and inter-group edges. More complex grouping patterns can be | |
| discovered by analyzing the same network with multiple values of gamma | |
| and then combining the results [3]_. That said, it is very common to | |
| simply use gamma=1. More on the choice of gamma is in [4]_. | |
| The second formula is the one actually used in calculation of the modularity. | |
| For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. | |
| Parameters | |
| ---------- | |
| G : NetworkX Graph | |
| communities : list or iterable of set of nodes | |
| These node sets must represent a partition of G's nodes. | |
| weight : string or None, optional (default="weight") | |
| The edge attribute that holds the numerical value used | |
| as a weight. If None or an edge does not have that attribute, | |
| then that edge has weight 1. | |
| resolution : float (default=1) | |
| If resolution is less than 1, modularity favors larger communities. | |
| Greater than 1 favors smaller communities. | |
| Returns | |
| ------- | |
| Q : float | |
| The modularity of the partition. | |
| Raises | |
| ------ | |
| NotAPartition | |
| If `communities` is not a partition of the nodes of `G`. | |
| Examples | |
| -------- | |
| >>> G = nx.barbell_graph(3, 0) | |
| >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) | |
| 0.35714285714285715 | |
| >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) | |
| 0.35714285714285715 | |
| References | |
| ---------- | |
| .. [1] M. E. J. Newman "Networks: An Introduction", page 224. | |
| Oxford University Press, 2011. | |
| .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. | |
| "Finding community structure in very large networks." | |
| Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187> | |
| .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" | |
| Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 | |
| .. [4] M. E. J. Newman, "Equivalence between modularity optimization and | |
| maximum likelihood methods for community detection" | |
| Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 | |
| .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large | |
| networks" J. Stat. Mech 10008, 1-12 (2008). | |
| https://doi.org/10.1088/1742-5468/2008/10/P10008 | |
| """ | |
| if not isinstance(communities, list): | |
| communities = list(communities) | |
| if not is_partition(G, communities): | |
| raise NotAPartition(G, communities) | |
| directed = G.is_directed() | |
| if directed: | |
| out_degree = dict(G.out_degree(weight=weight)) | |
| in_degree = dict(G.in_degree(weight=weight)) | |
| m = sum(out_degree.values()) | |
| norm = 1 / m**2 | |
| else: | |
| out_degree = in_degree = dict(G.degree(weight=weight)) | |
| deg_sum = sum(out_degree.values()) | |
| m = deg_sum / 2 | |
| norm = 1 / deg_sum**2 | |
| def community_contribution(community): | |
| comm = set(community) | |
| L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) | |
| out_degree_sum = sum(out_degree[u] for u in comm) | |
| in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum | |
| return L_c / m - resolution * out_degree_sum * in_degree_sum * norm | |
| return sum(map(community_contribution, communities)) | |
| def partition_quality(G, partition): | |
| """Returns the coverage and performance of a partition of G. | |
| The *coverage* of a partition is the ratio of the number of | |
| intra-community edges to the total number of edges in the graph. | |
| The *performance* of a partition is the number of | |
| intra-community edges plus inter-community non-edges divided by the total | |
| number of potential edges. | |
| This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links. | |
| Parameters | |
| ---------- | |
| G : NetworkX graph | |
| partition : sequence | |
| Partition of the nodes of `G`, represented as a sequence of | |
| sets of nodes (blocks). Each block of the partition represents a | |
| community. | |
| Returns | |
| ------- | |
| (float, float) | |
| The (coverage, performance) tuple of the partition, as defined above. | |
| Raises | |
| ------ | |
| NetworkXError | |
| If `partition` is not a valid partition of the nodes of `G`. | |
| Notes | |
| ----- | |
| If `G` is a multigraph; | |
| - for coverage, the multiplicity of edges is counted | |
| - for performance, the result is -1 (total number of possible edges is not defined) | |
| References | |
| ---------- | |
| .. [1] Santo Fortunato. | |
| "Community Detection in Graphs". | |
| *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 | |
| <https://arxiv.org/abs/0906.0612> | |
| """ | |
| node_community = {} | |
| for i, community in enumerate(partition): | |
| for node in community: | |
| node_community[node] = i | |
| # `performance` is not defined for multigraphs | |
| if not G.is_multigraph(): | |
| # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges` | |
| possible_inter_community_edges = sum( | |
| len(p1) * len(p2) for p1, p2 in combinations(partition, 2) | |
| ) | |
| if G.is_directed(): | |
| possible_inter_community_edges *= 2 | |
| else: | |
| possible_inter_community_edges = 0 | |
| # Compute the number of edges in the complete graph -- `n` nodes, | |
| # directed or undirected, depending on `G` | |
| n = len(G) | |
| total_pairs = n * (n - 1) | |
| if not G.is_directed(): | |
| total_pairs //= 2 | |
| intra_community_edges = 0 | |
| inter_community_non_edges = possible_inter_community_edges | |
| # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges` | |
| for e in G.edges(): | |
| if node_community[e[0]] == node_community[e[1]]: | |
| intra_community_edges += 1 | |
| else: | |
| inter_community_non_edges -= 1 | |
| coverage = intra_community_edges / len(G.edges) | |
| if G.is_multigraph(): | |
| performance = -1.0 | |
| else: | |
| performance = (intra_community_edges + inter_community_non_edges) / total_pairs | |
| return coverage, performance | |