summaryrefslogtreecommitdiff
path: root/scripts/generate_plots.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/generate_plots.py')
-rw-r--r--scripts/generate_plots.py95
1 files changed, 91 insertions, 4 deletions
diff --git a/scripts/generate_plots.py b/scripts/generate_plots.py
index be5db9f..d844f12 100644
--- a/scripts/generate_plots.py
+++ b/scripts/generate_plots.py
@@ -223,6 +223,86 @@ def plot_node_coverage_comparison():
print(" Saved node_coverage.pdf")
+def plot_edge_connectivity_boxplots():
+ """Boxplots of mincut/log10(n) across methods for each network."""
+ stats_dir = os.path.join(RESULTS_DIR, "stats")
+
+ for net_name in NETWORKS:
+ all_labels = ["ground_truth"] + METHOD_NAMES
+ data = []
+ labels = []
+
+ for label in all_labels:
+ det_path = os.path.join(stats_dir, net_name, label, "cluster_details.json")
+ if not os.path.exists(det_path):
+ continue
+ with open(det_path) as f:
+ details = json.load(f)
+ vals = [d["mincut_over_log10n"] for d in details if "mincut_over_log10n" in d]
+ if vals:
+ data.append(vals)
+ labels.append(METHOD_LABELS.get(label, label))
+
+ if not data:
+ continue
+
+ fig, ax = plt.subplots(figsize=(9, 4))
+ bp = ax.boxplot(data, tick_labels=labels, patch_artist=True, showfliers=False)
+ for patch in bp["boxes"]:
+ patch.set_facecolor("lightyellow")
+ ax.axhline(y=1.0, color="red", linestyle="--", linewidth=0.8, label="well-connected threshold")
+ ax.set_ylabel("Min Edge Cut / log$_{10}$(n)")
+ ax.set_title(f"Edge Connectivity — {net_name}")
+ ax.legend(fontsize=8)
+ plt.xticks(rotation=20, ha="right")
+ plt.tight_layout()
+ plt.savefig(os.path.join(FIGURES_DIR, f"edge_connectivity_{net_name}.pdf"),
+ bbox_inches="tight")
+ plt.close()
+ print(f" Saved edge_connectivity_{net_name}.pdf")
+
+
+def plot_wellconnected_bar():
+ """Bar chart of fraction well-connected clusters per method/network."""
+ stats_path = os.path.join(RESULTS_DIR, "stats", "cluster_stats_summary.csv")
+ if not os.path.exists(stats_path):
+ return
+
+ df = pd.read_csv(stats_path)
+
+ fig, ax = plt.subplots(figsize=(10, 4.5))
+ net_names = list(NETWORKS.keys())
+ all_methods = ["ground_truth"] + METHOD_NAMES
+ x = np.arange(len(net_names))
+ width = 0.13
+ offsets = np.arange(len(all_methods)) - len(all_methods) / 2 + 0.5
+ colors = plt.cm.Set2(np.linspace(0, 1, len(all_methods)))
+
+ for i, method in enumerate(all_methods):
+ vals = []
+ for net in net_names:
+ row = df[(df["network"] == net) & (df["method"] == method)]
+ if len(row) > 0:
+ nc = row["n_clusters_non_singleton"].values[0]
+ nwc = row["n_wellconnected"].values[0]
+ vals.append(nwc / nc if nc > 0 else 0)
+ else:
+ vals.append(0)
+ ax.bar(x + offsets[i] * width, vals, width, label=METHOD_LABELS.get(method, method),
+ color=colors[i])
+
+ ax.set_xticks(x)
+ ax.set_xticklabels(net_names)
+ ax.set_ylabel("Fraction Well-Connected")
+ ax.set_title("Fraction of Well-Connected Clusters (mincut > log$_{10}$(n))")
+ ax.legend(fontsize=7, ncol=2)
+ ax.set_ylim(0, 1.05)
+ plt.tight_layout()
+ plt.savefig(os.path.join(FIGURES_DIR, "wellconnected.pdf"), bbox_inches="tight")
+ plt.close()
+ print(" Saved wellconnected.pdf")
+
+
def generate_latex_accuracy_table():
"""Generate a LaTeX accuracy table."""
acc_path = os.path.join(RESULTS_DIR, "accuracy", "accuracy_table.csv")
@@ -274,9 +354,9 @@ def generate_latex_stats_table():
lines.append(r"\caption{Cluster statistics summary for each network and method.}")
lines.append(r"\label{tab:cluster_stats}")
lines.append(r"\footnotesize")
- lines.append(r"\begin{tabular}{llrrrrrr}")
+ lines.append(r"\begin{tabular}{llrrrrrrr}")
lines.append(r"\toprule")
- lines.append(r"Network & Method & \#Clusters & Node Cov. & Mean Size & Mean Density & Mean Cond. & Mean Mix. \\")
+ lines.append(r"Network & Method & \#Clust. & Node Cov. & Mean Size & Mean Dens. & Mean Cond. & Mean Mix. & \%WC \\")
lines.append(r"\midrule")
for net_name in NETWORKS:
@@ -284,11 +364,14 @@ def generate_latex_stats_table():
for _, row in df[df["network"] == net_name].iterrows():
net_disp = net_name if first else ""
m_label = METHOD_LABELS.get(row["method"], row["method"])
+ nc = int(row['n_clusters_non_singleton'])
+ nwc = int(row['n_wellconnected']) if 'n_wellconnected' in row and not pd.isna(row.get('n_wellconnected', np.nan)) else 0
+ pct_wc = 100 * nwc / nc if nc > 0 else 0.0
lines.append(
- f"{net_disp} & {m_label} & {int(row['n_clusters_non_singleton'])} & "
+ f"{net_disp} & {m_label} & {nc} & "
f"{row['node_coverage']:.3f} & {row['mean_cluster_size']:.1f} & "
f"{row['mean_edge_density']:.3f} & {row['mean_conductance']:.3f} & "
- f"{row['mean_mixing_param']:.3f} \\\\"
+ f"{row['mean_mixing_param']:.3f} & {pct_wc:.0f}\\% \\\\"
)
first = False
lines.append(r"\midrule")
@@ -310,6 +393,10 @@ def generate_all():
plot_cluster_size_distributions()
print("Generating edge density boxplots...")
plot_edge_density_boxplots()
+ print("Generating edge connectivity boxplots...")
+ plot_edge_connectivity_boxplots()
+ print("Generating well-connected fraction bar chart...")
+ plot_wellconnected_bar()
print("Generating mixing parameter comparison...")
plot_mixing_parameter_comparison()
print("Generating node coverage comparison...")