220 lines
7.1 KiB
Python
220 lines
7.1 KiB
Python
"""
|
|
Local Community Detection Algorithms
|
|
|
|
Local Community Detection (LCD) aims to detected one or a few communities
|
|
starting from certain source nodes in the network. This differs from Global
|
|
Community Detection (GCD), which aims to partition an entire network into
|
|
communities.
|
|
|
|
LCD is often useful when only a portion of the graph is known or the
|
|
graph is large enough that GCD is infeasable
|
|
|
|
[1]_ Gives a good introduction and overview of LCD
|
|
|
|
References
|
|
----------
|
|
.. [1] Baltsou, Georgia, Konstantinos Christopoulos, and Konstantinos Tsichlas.
|
|
Local community detection: A survey. IEEE Access 10 (2022): 110701-110726.
|
|
https://doi.org/10.1109/ACCESS.2022.3213980
|
|
|
|
|
|
"""
|
|
|
|
__all__ = ["greedy_source_expansion"]
|
|
|
|
|
|
def _clauset_greedy_source_expansion(G, *, source, cutoff=None):
|
|
if cutoff is None:
|
|
cutoff = float("inf")
|
|
C = {source}
|
|
B = {source}
|
|
U = G[source].keys() - C
|
|
T = {frozenset([node, nbr]) for node in B for nbr in G.neighbors(node)}
|
|
I = {edge for edge in T if all(node in C for node in edge)}
|
|
|
|
R_value = 0
|
|
while len(C) < cutoff:
|
|
if not U:
|
|
break
|
|
|
|
max_R = 0
|
|
best_node = None
|
|
best_node_B = best_node_T = best_node_I = set()
|
|
|
|
for v in U:
|
|
R_tmp, B_tmp, T_tmp, I_tmp = _calculate_local_modularity_for_candidate(
|
|
G, v, C, B, T, I
|
|
)
|
|
if R_tmp > max_R:
|
|
max_R = R_tmp
|
|
best_node = v
|
|
best_node_B = B_tmp
|
|
best_node_T = T_tmp
|
|
best_node_I = I_tmp
|
|
|
|
C = C | {best_node}
|
|
U.update(G[best_node].keys() - C)
|
|
U.remove(best_node)
|
|
B = best_node_B
|
|
T = best_node_T
|
|
I = best_node_I
|
|
if max_R < R_value:
|
|
break
|
|
R_value = max_R
|
|
|
|
return C
|
|
|
|
|
|
def _calculate_local_modularity_for_candidate(G, v, C, B, T, I):
|
|
"""
|
|
Compute the local modularity R and updated variables when adding node v to the community.
|
|
|
|
Parameters
|
|
----------
|
|
G : NetworkX graph
|
|
The input graph.
|
|
v : node
|
|
The candidate node to add to the community.
|
|
C : set
|
|
The current set of community nodes.
|
|
B : set
|
|
The current set of boundary nodes.
|
|
T : set of frozenset
|
|
The current set of boundary edges.
|
|
I : set of frozenset
|
|
The current set of internal boundary edges.
|
|
|
|
Returns
|
|
-------
|
|
R_tmp : float
|
|
The local modularity after adding node v.
|
|
B_tmp : set
|
|
The updated set of boundary nodes.
|
|
T_tmp : set of frozenset
|
|
The updated set of boundary edges.
|
|
I_tmp : set of frozenset
|
|
The updated set of internal boundary edges.
|
|
"""
|
|
C_tmp = C | {v}
|
|
B_tmp = B.copy()
|
|
T_tmp = T.copy()
|
|
I_tmp = I.copy()
|
|
removed_B_nodes = set()
|
|
|
|
# Update boundary nodes and edges
|
|
for nbr in G[v]:
|
|
if nbr not in C_tmp:
|
|
# v has nbrs not in the community, so it remains a boundary node
|
|
B_tmp.add(v)
|
|
# Add edge between v and nbr to boundary edges
|
|
T_tmp.add(frozenset([v, nbr]))
|
|
|
|
if nbr in B:
|
|
# Check if nbr should be removed from boundary nodes
|
|
# Go through nbrs nbrs to see if it is still a boundary node
|
|
nbr_still_in_B = any(nbr_nbr not in C_tmp for nbr_nbr in G[nbr])
|
|
if not nbr_still_in_B:
|
|
B_tmp.remove(nbr)
|
|
removed_B_nodes.add(nbr)
|
|
|
|
if nbr in C_tmp:
|
|
# Add edge between v and nbr to internal edges
|
|
I_tmp.add(frozenset([v, nbr]))
|
|
|
|
# Remove edges no longer in the boundary
|
|
for removed_node in removed_B_nodes:
|
|
for removed_node_nbr in G[removed_node]:
|
|
if removed_node_nbr not in B_tmp:
|
|
T_tmp.discard(frozenset([removed_node_nbr, removed_node]))
|
|
I_tmp.discard(frozenset([removed_node_nbr, removed_node]))
|
|
|
|
R_tmp = len(I_tmp) / len(T_tmp) if len(T_tmp) > 0 else 1
|
|
return R_tmp, B_tmp, T_tmp, I_tmp
|
|
|
|
|
|
ALGORITHMS = {
|
|
"clauset": _clauset_greedy_source_expansion,
|
|
}
|
|
|
|
|
|
def greedy_source_expansion(G, *, source, cutoff=None, method="clauset"):
|
|
r"""Find the local community around a source node.
|
|
|
|
Find the local community around a source node using Greedy Source
|
|
Expansion. Greedy Source Expansion generally identifies a local community
|
|
starting from the source node and expands it based on the criteria of the
|
|
chosen algorithm.
|
|
|
|
The algorithm is specified with the `method` keyword argument.
|
|
|
|
* `"clauset"` [1]_ uses local modularity gain to determine local communities.
|
|
The algorithm adds nbring nodes that maximize local modularity to the
|
|
community iteratively, stopping when no additional nodes improve the modularity
|
|
or when a predefined cutoff is reached.
|
|
|
|
Local modularity measures the density of edges within a community relative
|
|
to the total graph. By focusing on local modularity, the algorithm efficiently
|
|
uncovers communities around a specific node without requiring global
|
|
optimization over the entire graph.
|
|
|
|
The algorithm assumes that the graph $G$ consists of a known community $C$ and
|
|
an unknown set of nodes $U$, which are adjacent to $C$ . The boundary of the
|
|
community $B$, consists of nodes in $C$ that have at least one nbr in $U$.
|
|
|
|
Mathematically, the local modularity is expressed as:
|
|
|
|
.. math::
|
|
R = \frac{I}{T}
|
|
|
|
where $T$ is the number of edges with one or more endpoints in $B$, and $I$ is the
|
|
number of those edges with neither endpoint in $U$.
|
|
|
|
Parameters
|
|
----------
|
|
G : NetworkX graph
|
|
The input graph.
|
|
|
|
source : node
|
|
The source node from which the community expansion begins.
|
|
|
|
cutoff : int, optional (default=None)
|
|
The maximum number of nodes to include in the community. If None, the algorithm
|
|
expands until no further modularity gain can be made.
|
|
|
|
method : string, optional (default='clauset')
|
|
The algorithm to use to carry out greedy source expansion.
|
|
Supported options: 'clauset'. Other inputs produce a ValueError
|
|
|
|
Returns
|
|
-------
|
|
set
|
|
A set of nodes representing the local community around the source node.
|
|
|
|
Examples
|
|
--------
|
|
>>> G = nx.karate_club_graph()
|
|
>>> nx.community.greedy_source_expansion(G, source=16)
|
|
{16, 0, 4, 5, 6, 10}
|
|
|
|
Notes
|
|
-----
|
|
This algorithm is designed for detecting local communities around a specific node,
|
|
which is useful for large networks where global community detection is computationally
|
|
expensive.
|
|
|
|
The result of the algorithm may vary based on the structure of the graph, the choice of
|
|
the source node, and the presence of ties between nodes during the greedy expansion process.
|
|
|
|
References
|
|
----------
|
|
.. [1] Clauset, Aaron. Finding local community structure in networks.
|
|
Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 72, no. 2 (2005): 026132.
|
|
https://arxiv.org/pdf/physics/0503036
|
|
|
|
"""
|
|
try:
|
|
algo = ALGORITHMS[method]
|
|
except KeyError as e:
|
|
raise ValueError(f"{method} is not a valid choice for an algorithm.") from e
|
|
|
|
return algo(G, source=source, cutoff=cutoff)
|