diff options
Diffstat (limited to 'scripts/generate_plots.py')
| -rw-r--r-- | scripts/generate_plots.py | 95 |
1 files changed, 91 insertions, 4 deletions
diff --git a/scripts/generate_plots.py b/scripts/generate_plots.py index be5db9f..d844f12 100644 --- a/scripts/generate_plots.py +++ b/scripts/generate_plots.py @@ -223,6 +223,86 @@ def plot_node_coverage_comparison(): print(" Saved node_coverage.pdf") +def plot_edge_connectivity_boxplots(): + """Boxplots of mincut/log10(n) across methods for each network.""" + stats_dir = os.path.join(RESULTS_DIR, "stats") + + for net_name in NETWORKS: + all_labels = ["ground_truth"] + METHOD_NAMES + data = [] + labels = [] + + for label in all_labels: + det_path = os.path.join(stats_dir, net_name, label, "cluster_details.json") + if not os.path.exists(det_path): + continue + with open(det_path) as f: + details = json.load(f) + vals = [d["mincut_over_log10n"] for d in details if "mincut_over_log10n" in d] + if vals: + data.append(vals) + labels.append(METHOD_LABELS.get(label, label)) + + if not data: + continue + + fig, ax = plt.subplots(figsize=(9, 4)) + bp = ax.boxplot(data, tick_labels=labels, patch_artist=True, showfliers=False) + for patch in bp["boxes"]: + patch.set_facecolor("lightyellow") + ax.axhline(y=1.0, color="red", linestyle="--", linewidth=0.8, label="well-connected threshold") + ax.set_ylabel("Min Edge Cut / log$_{10}$(n)") + ax.set_title(f"Edge Connectivity — {net_name}") + ax.legend(fontsize=8) + plt.xticks(rotation=20, ha="right") + plt.tight_layout() + plt.savefig(os.path.join(FIGURES_DIR, f"edge_connectivity_{net_name}.pdf"), + bbox_inches="tight") + plt.close() + print(f" Saved edge_connectivity_{net_name}.pdf") + + +def plot_wellconnected_bar(): + """Bar chart of fraction well-connected clusters per method/network.""" + stats_path = os.path.join(RESULTS_DIR, "stats", "cluster_stats_summary.csv") + if not os.path.exists(stats_path): + return + + df = pd.read_csv(stats_path) + + fig, ax = plt.subplots(figsize=(10, 4.5)) + net_names = list(NETWORKS.keys()) + all_methods = ["ground_truth"] + METHOD_NAMES + x = np.arange(len(net_names)) + width = 0.13 + offsets = np.arange(len(all_methods)) - len(all_methods) / 2 + 0.5 + colors = plt.cm.Set2(np.linspace(0, 1, len(all_methods))) + + for i, method in enumerate(all_methods): + vals = [] + for net in net_names: + row = df[(df["network"] == net) & (df["method"] == method)] + if len(row) > 0: + nc = row["n_clusters_non_singleton"].values[0] + nwc = row["n_wellconnected"].values[0] + vals.append(nwc / nc if nc > 0 else 0) + else: + vals.append(0) + ax.bar(x + offsets[i] * width, vals, width, label=METHOD_LABELS.get(method, method), + color=colors[i]) + + ax.set_xticks(x) + ax.set_xticklabels(net_names) + ax.set_ylabel("Fraction Well-Connected") + ax.set_title("Fraction of Well-Connected Clusters (mincut > log$_{10}$(n))") + ax.legend(fontsize=7, ncol=2) + ax.set_ylim(0, 1.05) + plt.tight_layout() + plt.savefig(os.path.join(FIGURES_DIR, "wellconnected.pdf"), bbox_inches="tight") + plt.close() + print(" Saved wellconnected.pdf") + + def generate_latex_accuracy_table(): """Generate a LaTeX accuracy table.""" acc_path = os.path.join(RESULTS_DIR, "accuracy", "accuracy_table.csv") @@ -274,9 +354,9 @@ def generate_latex_stats_table(): lines.append(r"\caption{Cluster statistics summary for each network and method.}") lines.append(r"\label{tab:cluster_stats}") lines.append(r"\footnotesize") - lines.append(r"\begin{tabular}{llrrrrrr}") + lines.append(r"\begin{tabular}{llrrrrrrr}") lines.append(r"\toprule") - lines.append(r"Network & Method & \#Clusters & Node Cov. & Mean Size & Mean Density & Mean Cond. & Mean Mix. \\") + lines.append(r"Network & Method & \#Clust. & Node Cov. & Mean Size & Mean Dens. & Mean Cond. & Mean Mix. & \%WC \\") lines.append(r"\midrule") for net_name in NETWORKS: @@ -284,11 +364,14 @@ def generate_latex_stats_table(): for _, row in df[df["network"] == net_name].iterrows(): net_disp = net_name if first else "" m_label = METHOD_LABELS.get(row["method"], row["method"]) + nc = int(row['n_clusters_non_singleton']) + nwc = int(row['n_wellconnected']) if 'n_wellconnected' in row and not pd.isna(row.get('n_wellconnected', np.nan)) else 0 + pct_wc = 100 * nwc / nc if nc > 0 else 0.0 lines.append( - f"{net_disp} & {m_label} & {int(row['n_clusters_non_singleton'])} & " + f"{net_disp} & {m_label} & {nc} & " f"{row['node_coverage']:.3f} & {row['mean_cluster_size']:.1f} & " f"{row['mean_edge_density']:.3f} & {row['mean_conductance']:.3f} & " - f"{row['mean_mixing_param']:.3f} \\\\" + f"{row['mean_mixing_param']:.3f} & {pct_wc:.0f}\\% \\\\" ) first = False lines.append(r"\midrule") @@ -310,6 +393,10 @@ def generate_all(): plot_cluster_size_distributions() print("Generating edge density boxplots...") plot_edge_density_boxplots() + print("Generating edge connectivity boxplots...") + plot_edge_connectivity_boxplots() + print("Generating well-connected fraction bar chart...") + plot_wellconnected_bar() print("Generating mixing parameter comparison...") plot_mixing_parameter_comparison() print("Generating node coverage comparison...") |
