218 lines
6.3 KiB
Python
218 lines
6.3 KiB
Python
""" This module provides the functions for node classification problem.
|
|
|
|
The functions in this module are not imported
|
|
into the top level `networkx` namespace.
|
|
You can access these functions by importing
|
|
the `networkx.algorithms.node_classification` modules,
|
|
then accessing the functions as attributes of `node_classification`.
|
|
For example:
|
|
|
|
>>> from networkx.algorithms import node_classification
|
|
>>> G = nx.path_graph(4)
|
|
>>> G.edges()
|
|
EdgeView([(0, 1), (1, 2), (2, 3)])
|
|
>>> G.nodes[0]["label"] = "A"
|
|
>>> G.nodes[3]["label"] = "B"
|
|
>>> node_classification.harmonic_function(G)
|
|
['A', 'A', 'B', 'B']
|
|
|
|
References
|
|
----------
|
|
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
|
|
Semi-supervised learning using gaussian fields and harmonic functions.
|
|
In ICML (Vol. 3, pp. 912-919).
|
|
"""
|
|
import networkx as nx
|
|
|
|
__all__ = ["harmonic_function", "local_and_global_consistency"]
|
|
|
|
|
|
@nx.utils.not_implemented_for("directed")
|
|
@nx._dispatch(node_attrs="label_name")
|
|
def harmonic_function(G, max_iter=30, label_name="label"):
|
|
"""Node classification by Harmonic function
|
|
|
|
Function for computing Harmonic function algorithm by Zhu et al.
|
|
|
|
Parameters
|
|
----------
|
|
G : NetworkX Graph
|
|
max_iter : int
|
|
maximum number of iterations allowed
|
|
label_name : string
|
|
name of target labels to predict
|
|
|
|
Returns
|
|
-------
|
|
predicted : list
|
|
List of length ``len(G)`` with the predicted labels for each node.
|
|
|
|
Raises
|
|
------
|
|
NetworkXError
|
|
If no nodes in `G` have attribute `label_name`.
|
|
|
|
Examples
|
|
--------
|
|
>>> from networkx.algorithms import node_classification
|
|
>>> G = nx.path_graph(4)
|
|
>>> G.nodes[0]["label"] = "A"
|
|
>>> G.nodes[3]["label"] = "B"
|
|
>>> G.nodes(data=True)
|
|
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
|
|
>>> G.edges()
|
|
EdgeView([(0, 1), (1, 2), (2, 3)])
|
|
>>> predicted = node_classification.harmonic_function(G)
|
|
>>> predicted
|
|
['A', 'A', 'B', 'B']
|
|
|
|
References
|
|
----------
|
|
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
|
|
Semi-supervised learning using gaussian fields and harmonic functions.
|
|
In ICML (Vol. 3, pp. 912-919).
|
|
"""
|
|
import numpy as np
|
|
import scipy as sp
|
|
|
|
X = nx.to_scipy_sparse_array(G) # adjacency matrix
|
|
labels, label_dict = _get_label_info(G, label_name)
|
|
|
|
if labels.shape[0] == 0:
|
|
raise nx.NetworkXError(
|
|
f"No node on the input graph is labeled by '{label_name}'."
|
|
)
|
|
|
|
n_samples = X.shape[0]
|
|
n_classes = label_dict.shape[0]
|
|
F = np.zeros((n_samples, n_classes))
|
|
|
|
# Build propagation matrix
|
|
degrees = X.sum(axis=0)
|
|
degrees[degrees == 0] = 1 # Avoid division by 0
|
|
# TODO: csr_array
|
|
D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
|
|
P = (D @ X).tolil()
|
|
P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
|
|
# Build base matrix
|
|
B = np.zeros((n_samples, n_classes))
|
|
B[labels[:, 0], labels[:, 1]] = 1
|
|
|
|
for _ in range(max_iter):
|
|
F = (P @ F) + B
|
|
|
|
return label_dict[np.argmax(F, axis=1)].tolist()
|
|
|
|
|
|
@nx.utils.not_implemented_for("directed")
|
|
@nx._dispatch(node_attrs="label_name")
|
|
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
|
|
"""Node classification by Local and Global Consistency
|
|
|
|
Function for computing Local and global consistency algorithm by Zhou et al.
|
|
|
|
Parameters
|
|
----------
|
|
G : NetworkX Graph
|
|
alpha : float
|
|
Clamping factor
|
|
max_iter : int
|
|
Maximum number of iterations allowed
|
|
label_name : string
|
|
Name of target labels to predict
|
|
|
|
Returns
|
|
-------
|
|
predicted : list
|
|
List of length ``len(G)`` with the predicted labels for each node.
|
|
|
|
Raises
|
|
------
|
|
NetworkXError
|
|
If no nodes in `G` have attribute `label_name`.
|
|
|
|
Examples
|
|
--------
|
|
>>> from networkx.algorithms import node_classification
|
|
>>> G = nx.path_graph(4)
|
|
>>> G.nodes[0]["label"] = "A"
|
|
>>> G.nodes[3]["label"] = "B"
|
|
>>> G.nodes(data=True)
|
|
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
|
|
>>> G.edges()
|
|
EdgeView([(0, 1), (1, 2), (2, 3)])
|
|
>>> predicted = node_classification.local_and_global_consistency(G)
|
|
>>> predicted
|
|
['A', 'A', 'B', 'B']
|
|
|
|
References
|
|
----------
|
|
Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
|
|
Learning with local and global consistency.
|
|
Advances in neural information processing systems, 16(16), 321-328.
|
|
"""
|
|
import numpy as np
|
|
import scipy as sp
|
|
|
|
X = nx.to_scipy_sparse_array(G) # adjacency matrix
|
|
labels, label_dict = _get_label_info(G, label_name)
|
|
|
|
if labels.shape[0] == 0:
|
|
raise nx.NetworkXError(
|
|
f"No node on the input graph is labeled by '{label_name}'."
|
|
)
|
|
|
|
n_samples = X.shape[0]
|
|
n_classes = label_dict.shape[0]
|
|
F = np.zeros((n_samples, n_classes))
|
|
|
|
# Build propagation matrix
|
|
degrees = X.sum(axis=0)
|
|
degrees[degrees == 0] = 1 # Avoid division by 0
|
|
# TODO: csr_array
|
|
D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
|
|
P = alpha * ((D2 @ X) @ D2)
|
|
# Build base matrix
|
|
B = np.zeros((n_samples, n_classes))
|
|
B[labels[:, 0], labels[:, 1]] = 1 - alpha
|
|
|
|
for _ in range(max_iter):
|
|
F = (P @ F) + B
|
|
|
|
return label_dict[np.argmax(F, axis=1)].tolist()
|
|
|
|
|
|
def _get_label_info(G, label_name):
|
|
"""Get and return information of labels from the input graph
|
|
|
|
Parameters
|
|
----------
|
|
G : Network X graph
|
|
label_name : string
|
|
Name of the target label
|
|
|
|
Returns
|
|
-------
|
|
labels : numpy array, shape = [n_labeled_samples, 2]
|
|
Array of pairs of labeled node ID and label ID
|
|
label_dict : numpy array, shape = [n_classes]
|
|
Array of labels
|
|
i-th element contains the label corresponding label ID `i`
|
|
"""
|
|
import numpy as np
|
|
|
|
labels = []
|
|
label_to_id = {}
|
|
lid = 0
|
|
for i, n in enumerate(G.nodes(data=True)):
|
|
if label_name in n[1]:
|
|
label = n[1][label_name]
|
|
if label not in label_to_id:
|
|
label_to_id[label] = lid
|
|
lid += 1
|
|
labels.append([i, label_to_id[label]])
|
|
labels = np.array(labels)
|
|
label_dict = np.array(
|
|
[label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
|
|
)
|
|
return (labels, label_dict)
|