diff options
| author | blackhao <13851610112@163.com> | 2025-08-22 02:51:50 -0500 |
|---|---|---|
| committer | blackhao <13851610112@163.com> | 2025-08-22 02:51:50 -0500 |
| commit | 4aab4087dc97906d0b9890035401175cdaab32d4 (patch) | |
| tree | 4e2e9d88a711ec5b1cfa02e8ac72a55183b99123 /.venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py | |
| parent | afa8f50d1d21c721dabcb31ad244610946ab65a3 (diff) | |
2.0
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py')
| -rw-r--r-- | .venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py | 396 |
1 files changed, 396 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py new file mode 100644 index 0000000..7eb744c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/approximation/density.py @@ -0,0 +1,396 @@ +"""Fast algorithms for the densest subgraph problem""" + +import math + +import networkx as nx + +__all__ = ["densest_subgraph"] + + +def _greedy_plus_plus(G, iterations): + if G.number_of_edges() == 0: + return 0.0, set() + if iterations < 1: + raise ValueError( + f"The number of iterations must be an integer >= 1. Provided: {iterations}" + ) + + loads = dict.fromkeys(G.nodes, 0) # Load vector for Greedy++. + best_density = 0.0 # Highest density encountered. + best_subgraph = set() # Nodes of the best subgraph found. + + for _ in range(iterations): + # Initialize heap for fast access to minimum weighted degree. + heap = nx.utils.BinaryHeap() + + # Compute initial weighted degrees and add nodes to the heap. + for node, degree in G.degree: + heap.insert(node, loads[node] + degree) + # Set up tracking for current graph state. + remaining_nodes = set(G.nodes) + num_edges = G.number_of_edges() + current_degrees = dict(G.degree) + + while remaining_nodes: + num_nodes = len(remaining_nodes) + + # Current density of the (implicit) graph + current_density = num_edges / num_nodes + + # Update the best density. + if current_density > best_density: + best_density = current_density + best_subgraph = set(remaining_nodes) + + # Pop the node with the smallest weighted degree. + node, _ = heap.pop() + if node not in remaining_nodes: + continue # Skip nodes already removed. + + # Update the load of the popped node. + loads[node] += current_degrees[node] + + # Update neighbors' degrees and the heap. + for neighbor in G.neighbors(node): + if neighbor in remaining_nodes: + current_degrees[neighbor] -= 1 + num_edges -= 1 + heap.insert(neighbor, loads[neighbor] + current_degrees[neighbor]) + + # Remove the node from the remaining nodes. + remaining_nodes.remove(node) + + return best_density, best_subgraph + + +def _fractional_peeling(G, b, x, node_to_idx, edge_to_idx): + """ + Optimized fractional peeling using NumPy arrays. + + Parameters + ---------- + G : networkx.Graph + The input graph. + b : numpy.ndarray + Induced load vector. + x : numpy.ndarray + Fractional edge values. + node_to_idx : dict + Mapping from node to index. + edge_to_idx : dict + Mapping from edge to index. + + Returns + ------- + best_density : float + The best density found. + best_subgraph : set + The subset of nodes defining the densest subgraph. + """ + heap = nx.utils.BinaryHeap() + + remaining_nodes = set(G.nodes) + + # Initialize heap with b values + for idx in remaining_nodes: + heap.insert(idx, b[idx]) + + num_edges = G.number_of_edges() + + best_density = 0.0 + best_subgraph = set() + + while remaining_nodes: + num_nodes = len(remaining_nodes) + current_density = num_edges / num_nodes + + if current_density > best_density: + best_density = current_density + best_subgraph = set(remaining_nodes) + + # Pop the node with the smallest b + node, _ = heap.pop() + while node not in remaining_nodes: + node, _ = heap.pop() # Clean the heap from stale values + + # Update neighbors b values by subtracting fractional x value + for neighbor in G.neighbors(node): + if neighbor in remaining_nodes: + neighbor_idx = node_to_idx[neighbor] + # Take off fractional value + b[neighbor_idx] -= x[edge_to_idx[(neighbor, node)]] + num_edges -= 1 + heap.insert(neighbor, b[neighbor_idx]) + + remaining_nodes.remove(node) # peel off node + + return best_density, best_subgraph + + +def _fista(G, iterations): + if G.number_of_edges() == 0: + return 0.0, set() + if iterations < 1: + raise ValueError( + f"The number of iterations must be an integer >= 1. Provided: {iterations}" + ) + import numpy as np + + # 1. Node Mapping: Assign a unique index to each node and edge + node_to_idx = {node: idx for idx, node in enumerate(G)} + num_nodes = G.number_of_nodes() + num_undirected_edges = G.number_of_edges() + + # 2. Edge Mapping: Assign a unique index to each bidirectional edge + bidirectional_edges = [(u, v) for u, v in G.edges] + [(v, u) for u, v in G.edges] + edge_to_idx = {edge: idx for idx, edge in enumerate(bidirectional_edges)} + + num_edges = len(bidirectional_edges) + + # 3. Reverse Edge Mapping: Map each (bidirectional) edge to its reverse edge index + reverse_edge_idx = np.empty(num_edges, dtype=np.int32) + for idx in range(num_undirected_edges): + reverse_edge_idx[idx] = num_undirected_edges + idx + for idx in range(num_undirected_edges, 2 * num_undirected_edges): + reverse_edge_idx[idx] = idx - num_undirected_edges + + # 4. Initialize Variables as NumPy Arrays + x = np.full(num_edges, 0.5, dtype=np.float32) + y = x.copy() + z = np.zeros(num_edges, dtype=np.float32) + b = np.zeros(num_nodes, dtype=np.float32) # Induced load vector + tk = 1.0 # Momentum term + + # 5. Precompute Edge Source Indices + edge_src_indices = np.array( + [node_to_idx[u] for u, _ in bidirectional_edges], dtype=np.int32 + ) + + # 6. Compute Learning Rate + max_degree = max(deg for _, deg in G.degree) + # 0.9 for floating point errs when max_degree is very large + learning_rate = 0.9 / max_degree + + # 7. Iterative Updates + for _ in range(iterations): + # 7a. Update b: sum y over outgoing edges for each node + b[:] = 0.0 # Reset b to zero + np.add.at(b, edge_src_indices, y) # b_u = \sum_{v : (u,v) \in E(G)} y_{uv} + + # 7b. Compute z, z_{uv} = y_{uv} - 2 * learning_rate * b_u + z = y - 2.0 * learning_rate * b[edge_src_indices] + + # 7c. Update Momentum Term + tknew = (1.0 + math.sqrt(1 + 4.0 * tk**2)) / 2.0 + + # 7d. Update x in a vectorized manner, x_{uv} = (z_{uv} - z_{vu} + 1.0) / 2.0 + new_xuv = (z - z[reverse_edge_idx] + 1.0) / 2.0 + clamped_x = np.clip(new_xuv, 0.0, 1.0) # Clamp x_{uv} between 0 and 1 + + # Update y using the FISTA update formula (similar to gradient descent) + y = ( + clamped_x + + ((tk - 1.0) / tknew) * (clamped_x - x) + + (tk / tknew) * (clamped_x - y) + ) + + # Update x + x = clamped_x + + # Update tk, the momemntum term + tk = tknew + + # Rebalance the b values! Otherwise performance is a bit suboptimal. + b[:] = 0.0 + np.add.at(b, edge_src_indices, x) # b_u = \sum_{v : (u,v) \in E(G)} x_{uv} + + # Extract the actual (approximate) dense subgraph. + return _fractional_peeling(G, b, x, node_to_idx, edge_to_idx) + + +ALGORITHMS = {"greedy++": _greedy_plus_plus, "fista": _fista} + + +@nx.utils.not_implemented_for("directed") +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable +def densest_subgraph(G, iterations=1, *, method="fista"): + r"""Returns an approximate densest subgraph for a graph `G`. + + This function runs an iterative algorithm to find the densest subgraph, + and returns both the density and the subgraph. For a discussion on the + notion of density used and the different algorithms available on + networkx, please see the Notes section below. + + Parameters + ---------- + G : NetworkX graph + Undirected graph. + + iterations : int, optional (default=1) + Number of iterations to use for the iterative algorithm. Can be + specified positionally or as a keyword argument. + + method : string, optional (default='fista') + The algorithm to use to approximate the densest subgraph. Supported + options: 'greedy++' by Boob et al. [2]_ and 'fista' by Harb et al. [3]_. + Must be specified as a keyword argument. Other inputs produce a + ValueError. + + Returns + ------- + d : float + The density of the approximate subgraph found. + + S : set + The subset of nodes defining the approximate densest subgraph. + + Examples + -------- + >>> G = nx.star_graph(4) + >>> nx.approximation.densest_subgraph(G, iterations=1) + (0.8, {0, 1, 2, 3, 4}) + + Notes + ----- + **Problem Definition:** + The densest subgraph problem (DSG) asks to find the subgraph + $S \subseteq V(G)$ with maximum density. For a subset of the nodes of + $G$, $S \subseteq V(G)$, define $E(S) = \{ (u,v) : (u,v)\in E(G), + u\in S, v\in S \}$ as the set of edges with both endpoints in $S$. + The density of $S$ is defined as $|E(S)|/|S|$, the ratio between the + edges in the subgraph $G[S]$ and the number of nodes in that subgraph. + Note that this is different from the standard graph theoretic definition + of density, defined as $\frac{2|E(S)|}{|S|(|S|-1)}$, for historical + reasons. + + **Exact Algorithms:** + The densest subgraph problem is polynomial time solvable using maximum + flow, commonly referred to as Goldberg's algorithm. However, the + algorithm is quite involved. It first binary searches on the optimal + density, $d^\ast$. For a guess of the density $d$, it sets up a flow + network $G'$ with size $O(m)$. The maximum flow solution either + informs the algorithm that no subgraph with density $d$ exists, or it + provides a subgraph with density at least $d$. However, this is + inherently bottlenecked by the maximum flow algorithm. For example, [2]_ + notes that Goldberg’s algorithm was not feasible on many large graphs + even though they used a highly optimized maximum flow library. + + **Charikar's Greedy Peeling:** + While exact solution algorithms are quite involved, there are several + known approximation algorithms for the densest subgraph problem. + + Charikar [1]_ described a very simple 1/2-approximation algorithm for DSG + known as the greedy "peeling" algorithm. The algorithm creates an + ordering of the nodes as follows. The first node $v_1$ is the one with + the smallest degree in $G$ (ties broken arbitrarily). It selects + $v_2$ to be the smallest degree node in $G \setminus v_1$. Letting + $G_i$ be the graph after removing $v_1, ..., v_i$ (with $G_0=G$), + the algorithm returns the graph among $G_0, ..., G_n$ with the highest + density. + + **Greedy++:** + Boob et al. [2]_ generalized this algorithm into Greedy++, an iterative + algorithm that runs several rounds of "peeling". In fact, Greedy++ with 1 + iteration is precisely Charikar's algorithm. The algorithm converges to a + $(1-\epsilon)$ approximate densest subgraph in $O(\Delta(G)\log + n/\epsilon^2)$ iterations, where $\Delta(G)$ is the maximum degree, + and $n$ is the number of nodes in $G$. The algorithm also has other + desirable properties as shown by [4]_ and [5]_. + + **FISTA Algorithm:** + Harb et al. [3]_ gave a faster and more scalable algorithm using ideas + from quadratic programming for the densest subgraph, which is based on a + fast iterative shrinkage-thresholding algorithm (FISTA) algorithm. It is + known that computing the densest subgraph can be formulated as the + following convex optimization problem: + + Minimize $\sum_{u \in V(G)} b_u^2$ + + Subject to: + + $b_u = \sum_{v: \{u,v\} \in E(G)} x_{uv}$ for all $u \in V(G)$ + + $x_{uv} + x_{vu} = 1.0$ for all $\{u,v\} \in E(G)$ + + $x_{uv} \geq 0, x_{vu} \geq 0$ for all $\{u,v\} \in E(G)$ + + Here, $x_{uv}$ represents the fraction of edge $\{u,v\}$ assigned to + $u$, and $x_{vu}$ to $v$. + + The FISTA algorithm efficiently solves this convex program using gradient + descent with projections. For a learning rate $\alpha$, the algorithm + does: + + 1. **Initialization**: Set $x^{(0)}_{uv} = x^{(0)}_{vu} = 0.5$ for all + edges as a feasible solution. + + 2. **Gradient Update**: For iteration $k\geq 1$, set + $x^{(k+1)}_{uv} = x^{(k)}_{uv} - 2 \alpha \sum_{v: \{u,v\} \in E(G)} + x^{(k)}_{uv}$. However, now $x^{(k+1)}_{uv}$ might be infeasible! + To ensure feasibility, we project $x^{(k+1)}_{uv}$. + + 3. **Projection to the Feasible Set**: Compute + $b^{(k+1)}_u = \sum_{v: \{u,v\} \in E(G)} x^{(k)}_{uv}$ for all + nodes $u$. Define $z^{(k+1)}_{uv} = x^{(k+1)}_{uv} - 2 \alpha + b^{(k+1)}_u$. Update $x^{(k+1)}_{uv} = + CLAMP((z^{(k+1)}_{uv} - z^{(k+1)}_{vu} + 1.0) / 2.0)$, where + $CLAMP(x) = \max(0, \min(1, x))$. + + With a learning rate of $\alpha=1/\Delta(G)$, where $\Delta(G)$ is + the maximum degree, the algorithm converges to the optimum solution of + the convex program. + + **Fractional Peeling:** + To obtain a **discrete** subgraph, we use fractional peeling, an + adaptation of the standard peeling algorithm which peels the minimum + degree vertex in each iteration, and returns the densest subgraph found + along the way. Here, we instead peel the vertex with the smallest + induced load $b_u$: + + 1. Compute $b_u$ and $x_{uv}$. + + 2. Iteratively remove the vertex with the smallest $b_u$, updating its + neighbors' load by $x_{vu}$. + + Fractional peeling transforms the approximately optimal fractional + values $b_u, x_{uv}$ into a discrete subgraph. Unlike traditional + peeling, which removes the lowest-degree node, this method accounts for + fractional edge contributions from the convex program. + + This approach is both scalable and theoretically sound, ensuring a quick + approximation of the densest subgraph while leveraging fractional load + balancing. + + References + ---------- + .. [1] Charikar, Moses. "Greedy approximation algorithms for finding dense + components in a graph." In International workshop on approximation + algorithms for combinatorial optimization, pp. 84-95. Berlin, Heidelberg: + Springer Berlin Heidelberg, 2000. + + .. [2] Boob, Digvijay, Yu Gao, Richard Peng, Saurabh Sawlani, Charalampos + Tsourakakis, Di Wang, and Junxing Wang. "Flowless: Extracting densest + subgraphs without flow computations." In Proceedings of The Web Conference + 2020, pp. 573-583. 2020. + + .. [3] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Faster and scalable + algorithms for densest subgraph and decomposition." Advances in Neural + Information Processing Systems 35 (2022): 26966-26979. + + .. [4] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Convergence to + lexicographically optimal base in a (contra) polymatroid and applications + to densest subgraph and tree packing." arXiv preprint arXiv:2305.02987 + (2023). + + .. [5] Chekuri, Chandra, Kent Quanrud, and Manuel R. Torres. "Densest + subgraph: Supermodularity, iterative peeling, and flow." In Proceedings of + the 2022 Annual ACM-SIAM Symposium on Discrete Algorithms (SODA), pp. + 1531-1555. Society for Industrial and Applied Mathematics, 2022. + """ + try: + algo = ALGORITHMS[method] + except KeyError as e: + raise ValueError(f"{method} is not a valid choice for an algorithm.") from e + + return algo(G, iterations) |
