summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
commit1118b7457c261de36ead6103503c00c321c75f9b (patch)
tree7ea76b32f070cb58458caaa2897a5d8133561f48
parentaa73718eb6427d7da3b9cb416275802d90c4b2ed (diff)
Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10HEADmaster
Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
-rw-r--r--experiments/depth_utility_ladder.py317
-rw-r--r--experiments/frozen_init_identity_check.py82
-rw-r--r--experiments/plot_depth_ladder.py63
-rw-r--r--logs/depth_ladder.log1103
-rw-r--r--report_explore/MEMO_depth_utility_ladder.md119
-rw-r--r--results/depth_ladder/depth_ladder.pngbin0 -> 89453 bytes
-rw-r--r--results/depth_ladder/frozen_init_identity.json57
-rw-r--r--results/depth_ladder/ladder_d256_L4_cifar10.json2274
-rw-r--r--results/depth_ladder/ladder_d512_L2_cifar10.json1378
-rwxr-xr-xrun_depth_ladder.sh14
10 files changed, 5407 insertions, 0 deletions
diff --git a/experiments/depth_utility_ladder.py b/experiments/depth_utility_ladder.py
new file mode 100644
index 0000000..c9de9e9
--- /dev/null
+++ b/experiments/depth_utility_ladder.py
@@ -0,0 +1,317 @@
+"""
+Depth-utility ladder (appendix experiment for the FA-evaluation E&D paper).
+
+Turns the binary frozen-vs-trained block comparison into a CURVE: vary the number
+of trainable residual blocks k, training the LAST k blocks (output side) and
+freezing the first L-k at random init. Embedding / out_ln / out_head are ALWAYS
+trained. Credit still propagates through frozen blocks (forward + FA feedback
+matrices unchanged); only their weights stay at init.
+
+Question. As more blocks are made trainable, does test accuracy rise?
+ - BP (positive control): should climb monotonically with k.
+ - FA (Lillicrap vanilla): modest climb where depth is usable, flat where not.
+ - DFA (direct FA): flat at / below the frozen baseline (deep credit
+ is non-functional -> the D3 failure at every k).
+
+Output-side-first is deliberate: the deepest block receives the most direct
+credit (FA's last block sees the exact output gradient), so it is the BEST case
+for the method. If even these blocks add nothing, depth is unused.
+
+Recipe is identical to the main CIFAR audit (cifar_resmlp.py): AdamW, lr 1e-3,
+wd 0.01, cosine, batch 128, 100 epochs, per-block independent optimizers and
+rms-normalized local surrogate losses.
+
+k=0 reproduces the frozen-blocks baseline; k=L reproduces the full audit.
+
+Usage:
+ CUDA_VISIBLE_DEVICES=2 python experiments/depth_utility_ladder.py \
+ --d_hidden 256 --num_blocks 4 --dataset cifar10 \
+ --methods bp fa dfa --k_values 0 1 2 3 4 --seeds 42 123 456 \
+ --epochs 100 --output_dir results/depth_ladder
+"""
+import os
+import sys
+import json
+import argparse
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader
+import torchvision
+import torchvision.transforms as transforms
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from models.residual_mlp import ResidualMLP
+
+
+# ---------------------------------------------------------------------------
+# Data / eval
+# ---------------------------------------------------------------------------
+def get_data(dataset, batch_size=128):
+ if dataset == 'cifar100':
+ mean, std = (0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)
+ DatasetClass, num_classes, input_dim = torchvision.datasets.CIFAR100, 100, 32 * 32 * 3
+ else:
+ mean, std = (0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)
+ DatasetClass, num_classes, input_dim = torchvision.datasets.CIFAR10, 10, 32 * 32 * 3
+ tf_train = transforms.Compose([
+ transforms.RandomCrop(32, padding=4),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(mean, std),
+ ])
+ tf_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
+ tr = DatasetClass('./data', True, download=True, transform=tf_train)
+ te = DatasetClass('./data', False, download=True, transform=tf_test)
+ return (
+ DataLoader(tr, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True),
+ DataLoader(te, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True),
+ input_dim, num_classes,
+ )
+
+
+def evaluate(model, loader, dev):
+ model.eval()
+ c = n = 0
+ with torch.no_grad():
+ for x, y in loader:
+ x = x.view(x.size(0), -1).to(dev); y = y.to(dev)
+ c += (model(x).argmax(-1) == y).sum().item()
+ n += x.size(0)
+ return c / n
+
+
+def freeze_first(model, k):
+ """Freeze the first L-k blocks (indices 0 .. L-k-1); leave the last k trainable.
+ Returns the set of trainable block indices."""
+ L = model.num_blocks
+ n_frozen = L - k
+ trainable = set(range(n_frozen, L))
+ for l, block in enumerate(model.blocks):
+ req = l in trainable
+ for p in block.parameters():
+ p.requires_grad_(req)
+ return trainable
+
+
+# ---------------------------------------------------------------------------
+# Trainers (freeze-aware ports of cifar_resmlp.py)
+# ---------------------------------------------------------------------------
+def train_bp(model, train_loader, test_loader, dev, args, trainable):
+ """End-to-end BP; optimizer filters to requires_grad params (frozen blocks excluded).
+ Gradients still flow THROUGH frozen blocks to reach trainable blocks / embed."""
+ opt = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
+ lr=args.lr, weight_decay=args.wd)
+ sch = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=args.epochs)
+ curve = []
+ for ep in range(1, args.epochs + 1):
+ model.train()
+ for x, y in train_loader:
+ x = x.view(x.size(0), -1).to(dev); y = y.to(dev)
+ loss = F.cross_entropy(model(x), y)
+ opt.zero_grad(); loss.backward(); opt.step()
+ sch.step()
+ if ep % 10 == 0 or ep == 1 or ep == args.epochs:
+ acc = evaluate(model, test_loader, dev)
+ curve.append((ep, acc))
+ print(f" [BP k] ep {ep}: test={acc:.4f}", flush=True)
+ return curve
+
+
+def train_dfa(model, train_loader, test_loader, dev, args, trainable):
+ """DFA: each block reads output error directly via B_l (no sequential propagation).
+ Only TRAINABLE blocks are updated; embed / out_ln / out_head always trained."""
+ d, C, L = model.d_hidden, args.num_classes, model.num_blocks
+ Bs = [torch.randn(d, C, device=dev) / np.sqrt(C) for _ in range(L)]
+
+ block_opts = {l: optim.AdamW(model.blocks[l].parameters(), lr=args.lr, weight_decay=args.wd)
+ for l in sorted(trainable)}
+ embed_opt = optim.AdamW(model.embed.parameters(), lr=args.lr, weight_decay=args.wd)
+ head_opt = optim.AdamW(list(model.out_head.parameters()) + list(model.out_ln.parameters()),
+ lr=args.lr, weight_decay=args.wd)
+ scheds = [optim.lr_scheduler.CosineAnnealingLR(o, T_max=args.epochs)
+ for o in list(block_opts.values()) + [embed_opt, head_opt]]
+
+ curve = []
+ for ep in range(1, args.epochs + 1):
+ model.train()
+ for x, y in train_loader:
+ x = x.view(x.size(0), -1).to(dev); y = y.to(dev)
+ batch = x.size(0)
+ with torch.no_grad():
+ logits, hiddens = model(x, return_hidden=True)
+ e_T = logits.softmax(-1); e_T[torch.arange(batch), y] -= 1
+
+ # head: exact CE, h_L detached
+ hL = hiddens[-1].detach()
+ head_opt.zero_grad()
+ F.cross_entropy(model.out_head(model.out_ln(hL)), y).backward()
+ head_opt.step()
+
+ # trainable blocks: DFA local surrogate
+ for l in sorted(trainable):
+ a = (e_T @ Bs[l].T).detach()
+ a = a / ((a ** 2).mean(-1, keepdim=True).sqrt() + 1e-6)
+ f_l = model.blocks[l](hiddens[l].detach())
+ local = (f_l * a).sum(-1).mean()
+ block_opts[l].zero_grad(); local.backward(); block_opts[l].step()
+
+ # embed: DFA credit at h_0
+ a0 = (e_T @ Bs[0].T).detach()
+ a0 = a0 / ((a0 ** 2).mean(-1, keepdim=True).sqrt() + 1e-6)
+ embed_loss = (model.embed(x) * a0).sum(-1).mean()
+ embed_opt.zero_grad(); embed_loss.backward(); embed_opt.step()
+
+ for s in scheds:
+ s.step()
+ if ep % 10 == 0 or ep == 1 or ep == args.epochs:
+ acc = evaluate(model, test_loader, dev)
+ curve.append((ep, acc))
+ print(f" [DFA k] ep {ep}: test={acc:.4f}", flush=True)
+ return curve
+
+
+def train_fa(model, train_loader, test_loader, dev, args, trainable):
+ """Vanilla FA: credit propagates sequentially backward via fixed d×d B_l.
+ Frozen blocks STILL propagate credit (a_credit = a_credit @ B_l) so trainable
+ blocks / embed downstream receive it; only their weight update is skipped."""
+ d, C, L = model.d_hidden, args.num_classes, model.num_blocks
+ Bs = [torch.randn(d, d, device=dev) / np.sqrt(d) for _ in range(L)]
+
+ block_opts = {l: optim.AdamW(model.blocks[l].parameters(), lr=args.lr, weight_decay=args.wd)
+ for l in sorted(trainable)}
+ embed_opt = optim.AdamW(model.embed.parameters(), lr=args.lr, weight_decay=args.wd)
+ head_opt = optim.AdamW(list(model.out_head.parameters()) + list(model.out_ln.parameters()),
+ lr=args.lr, weight_decay=args.wd)
+ scheds = [optim.lr_scheduler.CosineAnnealingLR(o, T_max=args.epochs)
+ for o in list(block_opts.values()) + [embed_opt, head_opt]]
+
+ curve = []
+ for ep in range(1, args.epochs + 1):
+ model.train()
+ for x, y in train_loader:
+ x = x.view(x.size(0), -1).to(dev); y = y.to(dev)
+ batch = x.size(0)
+ with torch.no_grad():
+ logits, hiddens = model(x, return_hidden=True)
+
+ # head: exact CE; a_credit = exact gradient at h_L (FA's starting credit)
+ hL = hiddens[-1].detach().requires_grad_(True)
+ head_opt.zero_grad()
+ F.cross_entropy(model.out_head(model.out_ln(hL)), y).backward()
+ head_opt.step()
+ a_credit = hL.grad.detach()
+
+ # blocks backward: update only trainable; ALWAYS propagate credit
+ for l in range(L - 1, -1, -1):
+ if l in trainable:
+ a = a_credit / ((a_credit ** 2).mean(-1, keepdim=True).sqrt() + 1e-6)
+ f_l = model.blocks[l](hiddens[l].detach())
+ local = (f_l * a).sum(-1).mean()
+ block_opts[l].zero_grad(); local.backward(); block_opts[l].step()
+ a_credit = (a_credit @ Bs[l]).detach()
+
+ # embed: FA credit at h_0
+ a0 = a_credit / ((a_credit ** 2).mean(-1, keepdim=True).sqrt() + 1e-6)
+ embed_loss = (model.embed(x) * a0).sum(-1).mean()
+ embed_opt.zero_grad(); embed_loss.backward(); embed_opt.step()
+
+ for s in scheds:
+ s.step()
+ if ep % 10 == 0 or ep == 1 or ep == args.epochs:
+ acc = evaluate(model, test_loader, dev)
+ curve.append((ep, acc))
+ print(f" [FA k] ep {ep}: test={acc:.4f}", flush=True)
+ return curve
+
+
+TRAINERS = {'bp': train_bp, 'dfa': train_dfa, 'fa': train_fa}
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+def main():
+ p = argparse.ArgumentParser()
+ p.add_argument('--d_hidden', type=int, default=256)
+ p.add_argument('--num_blocks', type=int, default=4)
+ p.add_argument('--dataset', type=str, default='cifar10')
+ p.add_argument('--methods', type=str, nargs='+', default=['bp', 'fa', 'dfa'])
+ p.add_argument('--k_values', type=int, nargs='+', default=[0, 1, 2, 3, 4])
+ p.add_argument('--seeds', type=int, nargs='+', default=[42, 123, 456])
+ p.add_argument('--epochs', type=int, default=100)
+ p.add_argument('--lr', type=float, default=1e-3)
+ p.add_argument('--wd', type=float, default=0.01)
+ p.add_argument('--batch_size', type=int, default=128)
+ p.add_argument('--gpu', type=int, default=0)
+ p.add_argument('--output_dir', type=str, default='results/depth_ladder')
+ args = p.parse_args()
+
+ dev = torch.device(f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu')
+ os.makedirs(args.output_dir, exist_ok=True)
+ L = args.num_blocks
+ tag = f"ladder_d{args.d_hidden}_L{L}_{args.dataset}"
+ out_path = os.path.join(args.output_dir, f"{tag}.json")
+ print(f"Device={dev} {tag} methods={args.methods} k={args.k_values} seeds={args.seeds} "
+ f"epochs={args.epochs}", flush=True)
+
+ # incremental results: results[method][k][seed] = {final_acc, curve}
+ results = {}
+ if os.path.exists(out_path):
+ with open(out_path) as f:
+ results = json.load(f).get('results', {})
+ print(f"Resuming; existing keys: "
+ f"{[(m, list(results[m].keys())) for m in results]}", flush=True)
+
+ def save():
+ with open(out_path, 'w') as f:
+ json.dump({'config': vars(args), 'results': results}, f, indent=2)
+
+ for method in args.methods:
+ results.setdefault(method, {})
+ for k in args.k_values:
+ if k > L:
+ continue
+ results[method].setdefault(str(k), {})
+ for seed in args.seeds:
+ if str(seed) in results[method][str(k)]:
+ print(f" skip {method} k={k} seed={seed} (done)", flush=True)
+ continue
+ print(f"\n=== {method.upper()} k={k} (last {k} of {L} trainable) "
+ f"seed={seed} ===", flush=True)
+ torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed)
+ train_loader, test_loader, input_dim, num_classes = get_data(args.dataset, args.batch_size)
+ args.num_classes = num_classes
+
+ model = ResidualMLP(input_dim, args.d_hidden, num_classes, L).to(dev)
+ trainable = freeze_first(model, k)
+ n_train = sum(pp.numel() for pp in model.parameters() if pp.requires_grad)
+ print(f" trainable blocks: {sorted(trainable)} "
+ f"trainable params: {n_train:,}", flush=True)
+
+ curve = TRAINERS[method](model, train_loader, test_loader, dev, args, trainable)
+ final_acc = evaluate(model, test_loader, dev)
+ results[method][str(k)][str(seed)] = {'final_acc': final_acc, 'curve': curve}
+ print(f" FINAL {method} k={k} seed={seed}: {final_acc:.4f}", flush=True)
+ save()
+
+ # summary table
+ print(f"\n{'='*60}\nSUMMARY {tag} (mean ± ddof-1 std over seeds)\n{'='*60}", flush=True)
+ for method in args.methods:
+ row = []
+ for k in args.k_values:
+ if k > L:
+ continue
+ accs = [v['final_acc'] for v in results[method][str(k)].values()]
+ if accs:
+ m = float(np.mean(accs)); s = float(np.std(accs, ddof=1)) if len(accs) > 1 else 0.0
+ row.append(f"k={k}: {m:.4f}±{s:.4f}")
+ print(f" {method.upper():4s} " + " ".join(row), flush=True)
+ save()
+ print(f"\nSaved -> {out_path}", flush=True)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/experiments/frozen_init_identity_check.py b/experiments/frozen_init_identity_check.py
new file mode 100644
index 0000000..3f58d7d
--- /dev/null
+++ b/experiments/frozen_init_identity_check.py
@@ -0,0 +1,82 @@
+"""
+Frozen-init identity check (supporting measurement for the depth-utility ladder).
+
+Quantifies how close a randomly-initialized, frozen ResidualMLP block stack is to
+the identity map. This grounds the footnote explaining why the k=0 rung of the
+ladder (all blocks frozen at init) already sits well above chance: the trained
+embedding + readout are composed with a fixed, near-norm-preserving random feature
+map, i.e. effectively a trained (near-)linear classifier on pixels.
+
+Reports, at random init, on a CIFAR-10 test batch (mean over seeds):
+ - per-block residual ratio ||f_l(h_l)|| / ||h_l|| (median over batch)
+ - whole-stack deviation ||h_L - h_0|| / ||h_0|| (median over batch)
+ - whole-stack direction cos(h_L, h_0) (median over batch)
+
+Usage:
+ CUDA_VISIBLE_DEVICES=2 python experiments/frozen_init_identity_check.py
+"""
+import os, sys, json
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision
+import torchvision.transforms as transforms
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from models.residual_mlp import ResidualMLP
+
+
+def main():
+ d_hidden, L, C, n = 256, 4, 10, 256
+ seeds = [42, 123, 456]
+ tf = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.4914, 0.4822, 0.4465),
+ (0.2470, 0.2435, 0.2616))])
+ ds = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=tf)
+ x = torch.stack([ds[i][0] for i in range(n)]).view(n, -1)
+
+ per_block, rel_dev, cos_dev = [], [], []
+ seed_rows = {}
+ for seed in seeds:
+ torch.manual_seed(seed); np.random.seed(seed)
+ m = ResidualMLP(32 * 32 * 3, d_hidden, C, L).eval()
+ with torch.no_grad():
+ h0 = m.embed(x); h = h0; ratios = []
+ for blk in m.blocks:
+ f = blk(h)
+ ratios.append(float((f.norm(dim=-1) / h.norm(dim=-1)).median()))
+ h = h + f
+ rel = float(((h - h0).norm(dim=-1) / h0.norm(dim=-1)).median())
+ cos = float(F.cosine_similarity(h, h0, dim=-1).median())
+ per_block.append(ratios); rel_dev.append(rel); cos_dev.append(cos)
+ seed_rows[str(seed)] = {'per_block_ratio': ratios, 'rel_dev': rel, 'cos': cos}
+ print(f"seed {seed}: per-block ||f||/||h|| = "
+ f"{['%.4f' % r for r in ratios]} "
+ f"||h_L-h_0||/||h_0|| = {rel:.3f} cos(h_L,h_0) = {cos:.4f}", flush=True)
+
+ pb = np.array(per_block)
+ summary = {
+ 'config': {'d_hidden': d_hidden, 'L': L, 'num_classes': C, 'batch': n,
+ 'dataset': 'cifar10-test', 'seeds': seeds},
+ 'per_seed': seed_rows,
+ 'per_block_ratio_mean': pb.mean(0).tolist(),
+ 'per_block_ratio_grand_mean': float(pb.mean()),
+ 'rel_dev_mean': float(np.mean(rel_dev)),
+ 'rel_dev_std': float(np.std(rel_dev, ddof=1)),
+ 'cos_mean': float(np.mean(cos_dev)),
+ 'cos_std': float(np.std(cos_dev, ddof=1)),
+ }
+ print(f"\nMEAN over {len(seeds)} seeds: "
+ f"per-block ratio ≈ {summary['per_block_ratio_grand_mean']:.3f}, "
+ f"||h_L-h_0||/||h_0|| = {summary['rel_dev_mean']:.3f} ± {summary['rel_dev_std']:.3f}, "
+ f"cos = {summary['cos_mean']:.4f} ± {summary['cos_std']:.4f}", flush=True)
+
+ out = 'results/depth_ladder/frozen_init_identity.json'
+ os.makedirs(os.path.dirname(out), exist_ok=True)
+ with open(out, 'w') as f:
+ json.dump(summary, f, indent=2)
+ print(f"Saved -> {out}", flush=True)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/experiments/plot_depth_ladder.py b/experiments/plot_depth_ladder.py
new file mode 100644
index 0000000..a5709bf
--- /dev/null
+++ b/experiments/plot_depth_ladder.py
@@ -0,0 +1,63 @@
+"""
+Plot the depth-utility ladder: test accuracy vs number of trainable blocks k,
+one curve per method (BP / FA / DFA), one panel per architecture.
+
+Usage:
+ python experiments/plot_depth_ladder.py
+"""
+import os, sys, json
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+CONFIGS = [
+ ('results/depth_ladder/ladder_d256_L4_cifar10.json', 'ResMLP d=256, L=4', 4),
+ ('results/depth_ladder/ladder_d512_L2_cifar10.json', 'ResMLP d=512, L=2', 2),
+]
+METHODS = [('bp', 'BP', 'tab:green', 'o'),
+ ('fa', 'FA', 'tab:orange', 's'),
+ ('dfa', 'DFA', 'tab:red', '^')]
+
+
+def agg(path, L):
+ d = json.load(open(path))['results']
+ out = {}
+ for m, _, _, _ in METHODS:
+ ks, mu, sd = [], [], []
+ for k in range(L + 1):
+ a = [v['final_acc'] for v in d[m][str(k)].values()]
+ ks.append(k); mu.append(np.mean(a))
+ sd.append(np.std(a, ddof=1) if len(a) > 1 else 0.0)
+ out[m] = (np.array(ks), np.array(mu), np.array(sd))
+ return out
+
+
+def main():
+ fig, axes = plt.subplots(1, len(CONFIGS), figsize=(11, 4.2))
+ if len(CONFIGS) == 1:
+ axes = [axes]
+ for ax, (path, title, L) in zip(axes, CONFIGS):
+ data = agg(path, L)
+ for m, label, color, mk in METHODS:
+ ks, mu, sd = data[m]
+ ax.errorbar(ks, mu, yerr=sd, marker=mk, color=color, label=label,
+ capsize=3, lw=2, ms=7)
+ # frozen baseline reference (k=0, averaged across methods is ~chance-of-readout)
+ ax.axhline(0.10, ls=':', color='gray', lw=1)
+ ax.text(0.02, 0.105, 'chance', color='gray', fontsize=8, transform=ax.get_yaxis_transform())
+ ax.set_xlabel('trainable blocks $k$ (last $k$ of $L$)')
+ ax.set_ylabel('CIFAR-10 test accuracy')
+ ax.set_title(title)
+ ax.set_xticks(range(L + 1))
+ ax.grid(alpha=0.3)
+ ax.legend(loc='center right')
+ fig.suptitle('Depth-utility ladder: does training deeper blocks raise accuracy?', y=1.02)
+ fig.tight_layout()
+ out = 'results/depth_ladder/depth_ladder.png'
+ fig.savefig(out, dpi=150, bbox_inches='tight')
+ print(f"Saved -> {out}")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/logs/depth_ladder.log b/logs/depth_ladder.log
new file mode 100644
index 0000000..20af1ab
--- /dev/null
+++ b/logs/depth_ladder.log
@@ -0,0 +1,1103 @@
+[Sun Jun 14 11:29:47 AM CDT 2026] START primary d=256 L=4 ladder
+Device=cuda:0 ladder_d256_L4_cifar10 methods=['bp', 'fa', 'dfa'] k=[0, 1, 2, 3, 4] seeds=[42, 123, 456] epochs=100
+
+=== BP k=0 (last 0 of 4 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 789,770
+ [BP k] ep 1: test=0.3543
+ [BP k] ep 10: test=0.3673
+ [BP k] ep 20: test=0.3483
+ [BP k] ep 30: test=0.3498
+ [BP k] ep 40: test=0.3608
+ [BP k] ep 50: test=0.3627
+ [BP k] ep 60: test=0.3697
+ [BP k] ep 70: test=0.3803
+ [BP k] ep 80: test=0.3821
+ [BP k] ep 90: test=0.3870
+ [BP k] ep 100: test=0.3882
+ FINAL bp k=0 seed=42: 0.3882
+
+=== BP k=0 (last 0 of 4 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 789,770
+ [BP k] ep 1: test=0.3535
+ [BP k] ep 10: test=0.3654
+ [BP k] ep 20: test=0.3612
+ [BP k] ep 30: test=0.3586
+ [BP k] ep 40: test=0.3633
+ [BP k] ep 50: test=0.3608
+ [BP k] ep 60: test=0.3772
+ [BP k] ep 70: test=0.3791
+ [BP k] ep 80: test=0.3897
+ [BP k] ep 90: test=0.3884
+ [BP k] ep 100: test=0.3899
+ FINAL bp k=0 seed=123: 0.3899
+
+=== BP k=0 (last 0 of 4 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 789,770
+ [BP k] ep 1: test=0.3551
+ [BP k] ep 10: test=0.3680
+ [BP k] ep 20: test=0.3509
+ [BP k] ep 30: test=0.3655
+ [BP k] ep 40: test=0.3573
+ [BP k] ep 50: test=0.3543
+ [BP k] ep 60: test=0.3716
+ [BP k] ep 70: test=0.3824
+ [BP k] ep 80: test=0.3852
+ [BP k] ep 90: test=0.3891
+ [BP k] ep 100: test=0.3878
+ FINAL bp k=0 seed=456: 0.3878
+
+=== BP k=1 (last 1 of 4 trainable) seed=42 ===
+ trainable blocks: [3] trainable params: 921,866
+ [BP k] ep 1: test=0.3736
+ [BP k] ep 10: test=0.4890
+ [BP k] ep 20: test=0.5089
+ [BP k] ep 30: test=0.5260
+ [BP k] ep 40: test=0.5365
+ [BP k] ep 50: test=0.5486
+ [BP k] ep 60: test=0.5524
+ [BP k] ep 70: test=0.5638
+ [BP k] ep 80: test=0.5666
+ [BP k] ep 90: test=0.5678
+ [BP k] ep 100: test=0.5683
+ FINAL bp k=1 seed=42: 0.5683
+
+=== BP k=1 (last 1 of 4 trainable) seed=123 ===
+ trainable blocks: [3] trainable params: 921,866
+ [BP k] ep 1: test=0.3878
+ [BP k] ep 10: test=0.4797
+ [BP k] ep 20: test=0.5096
+ [BP k] ep 30: test=0.5209
+ [BP k] ep 40: test=0.5280
+ [BP k] ep 50: test=0.5486
+ [BP k] ep 60: test=0.5530
+ [BP k] ep 70: test=0.5564
+ [BP k] ep 80: test=0.5609
+ [BP k] ep 90: test=0.5611
+ [BP k] ep 100: test=0.5623
+ FINAL bp k=1 seed=123: 0.5623
+
+=== BP k=1 (last 1 of 4 trainable) seed=456 ===
+ trainable blocks: [3] trainable params: 921,866
+ [BP k] ep 1: test=0.3772
+ [BP k] ep 10: test=0.4853
+ [BP k] ep 20: test=0.5098
+ [BP k] ep 30: test=0.5238
+ [BP k] ep 40: test=0.5387
+ [BP k] ep 50: test=0.5488
+ [BP k] ep 60: test=0.5547
+ [BP k] ep 70: test=0.5588
+ [BP k] ep 80: test=0.5636
+ [BP k] ep 90: test=0.5637
+ [BP k] ep 100: test=0.5643
+ FINAL bp k=1 seed=456: 0.5643
+
+=== BP k=2 (last 2 of 4 trainable) seed=42 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [BP k] ep 1: test=0.3874
+ [BP k] ep 10: test=0.5157
+ [BP k] ep 20: test=0.5361
+ [BP k] ep 30: test=0.5600
+ [BP k] ep 40: test=0.5753
+ [BP k] ep 50: test=0.5802
+ [BP k] ep 60: test=0.5843
+ [BP k] ep 70: test=0.5965
+ [BP k] ep 80: test=0.5970
+ [BP k] ep 90: test=0.5979
+ [BP k] ep 100: test=0.5994
+ FINAL bp k=2 seed=42: 0.5994
+
+=== BP k=2 (last 2 of 4 trainable) seed=123 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [BP k] ep 1: test=0.3925
+ [BP k] ep 10: test=0.5148
+ [BP k] ep 20: test=0.5376
+ [BP k] ep 30: test=0.5638
+ [BP k] ep 40: test=0.5693
+ [BP k] ep 50: test=0.5784
+ [BP k] ep 60: test=0.5927
+ [BP k] ep 70: test=0.5911
+ [BP k] ep 80: test=0.5973
+ [BP k] ep 90: test=0.5986
+ [BP k] ep 100: test=0.6000
+ FINAL bp k=2 seed=123: 0.6000
+
+=== BP k=2 (last 2 of 4 trainable) seed=456 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [BP k] ep 1: test=0.3868
+ [BP k] ep 10: test=0.5103
+ [BP k] ep 20: test=0.5420
+ [BP k] ep 30: test=0.5610
+ [BP k] ep 40: test=0.5699
+ [BP k] ep 50: test=0.5789
+ [BP k] ep 60: test=0.5809
+ [BP k] ep 70: test=0.5844
+ [BP k] ep 80: test=0.5919
+ [BP k] ep 90: test=0.5919
+ [BP k] ep 100: test=0.5939
+ FINAL bp k=2 seed=456: 0.5939
+
+=== BP k=3 (last 3 of 4 trainable) seed=42 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [BP k] ep 1: test=0.3904
+ [BP k] ep 10: test=0.5218
+ [BP k] ep 20: test=0.5469
+ [BP k] ep 30: test=0.5749
+ [BP k] ep 40: test=0.5935
+ [BP k] ep 50: test=0.5950
+ [BP k] ep 60: test=0.5983
+ [BP k] ep 70: test=0.6015
+ [BP k] ep 80: test=0.6070
+ [BP k] ep 90: test=0.6057
+ [BP k] ep 100: test=0.6079
+ FINAL bp k=3 seed=42: 0.6079
+
+=== BP k=3 (last 3 of 4 trainable) seed=123 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [BP k] ep 1: test=0.3965
+ [BP k] ep 10: test=0.5240
+ [BP k] ep 20: test=0.5517
+ [BP k] ep 30: test=0.5747
+ [BP k] ep 40: test=0.5774
+ [BP k] ep 50: test=0.5927
+ [BP k] ep 60: test=0.6035
+ [BP k] ep 70: test=0.6030
+ [BP k] ep 80: test=0.6057
+ [BP k] ep 90: test=0.6073
+ [BP k] ep 100: test=0.6069
+ FINAL bp k=3 seed=123: 0.6069
+
+=== BP k=3 (last 3 of 4 trainable) seed=456 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [BP k] ep 1: test=0.3947
+ [BP k] ep 10: test=0.5148
+ [BP k] ep 20: test=0.5536
+ [BP k] ep 30: test=0.5723
+ [BP k] ep 40: test=0.5873
+ [BP k] ep 50: test=0.5861
+ [BP k] ep 60: test=0.5991
+ [BP k] ep 70: test=0.5989
+ [BP k] ep 80: test=0.6062
+ [BP k] ep 90: test=0.6093
+ [BP k] ep 100: test=0.6080
+ FINAL bp k=3 seed=456: 0.6080
+
+=== BP k=4 (last 4 of 4 trainable) seed=42 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [BP k] ep 1: test=0.3936
+ [BP k] ep 10: test=0.5235
+ [BP k] ep 20: test=0.5606
+ [BP k] ep 30: test=0.5794
+ [BP k] ep 40: test=0.5992
+ [BP k] ep 50: test=0.6044
+ [BP k] ep 60: test=0.5979
+ [BP k] ep 70: test=0.6115
+ [BP k] ep 80: test=0.6153
+ [BP k] ep 90: test=0.6177
+ [BP k] ep 100: test=0.6173
+ FINAL bp k=4 seed=42: 0.6173
+
+=== BP k=4 (last 4 of 4 trainable) seed=123 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [BP k] ep 1: test=0.3981
+ [BP k] ep 10: test=0.5257
+ [BP k] ep 20: test=0.5580
+ [BP k] ep 30: test=0.5779
+ [BP k] ep 40: test=0.5896
+ [BP k] ep 50: test=0.6023
+ [BP k] ep 60: test=0.6053
+ [BP k] ep 70: test=0.6081
+ [BP k] ep 80: test=0.6185
+ [BP k] ep 90: test=0.6174
+ [BP k] ep 100: test=0.6182
+ FINAL bp k=4 seed=123: 0.6182
+
+=== BP k=4 (last 4 of 4 trainable) seed=456 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [BP k] ep 1: test=0.3967
+ [BP k] ep 10: test=0.5255
+ [BP k] ep 20: test=0.5632
+ [BP k] ep 30: test=0.5747
+ [BP k] ep 40: test=0.5948
+ [BP k] ep 50: test=0.5954
+ [BP k] ep 60: test=0.6092
+ [BP k] ep 70: test=0.6140
+ [BP k] ep 80: test=0.6125
+ [BP k] ep 90: test=0.6145
+ [BP k] ep 100: test=0.6145
+ FINAL bp k=4 seed=456: 0.6145
+
+=== FA k=0 (last 0 of 4 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 789,770
+ [FA k] ep 1: test=0.3112
+ [FA k] ep 10: test=0.3389
+ [FA k] ep 20: test=0.3325
+ [FA k] ep 30: test=0.3495
+ [FA k] ep 40: test=0.3467
+ [FA k] ep 50: test=0.3465
+ [FA k] ep 60: test=0.3573
+ [FA k] ep 70: test=0.3542
+ [FA k] ep 80: test=0.3567
+ [FA k] ep 90: test=0.3554
+ [FA k] ep 100: test=0.3555
+ FINAL fa k=0 seed=42: 0.3555
+
+=== FA k=0 (last 0 of 4 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 789,770
+ [FA k] ep 1: test=0.3257
+ [FA k] ep 10: test=0.3409
+ [FA k] ep 20: test=0.3514
+ [FA k] ep 30: test=0.3357
+ [FA k] ep 40: test=0.3299
+ [FA k] ep 50: test=0.3495
+ [FA k] ep 60: test=0.3468
+ [FA k] ep 70: test=0.3548
+ [FA k] ep 80: test=0.3509
+ [FA k] ep 90: test=0.3536
+ [FA k] ep 100: test=0.3520
+ FINAL fa k=0 seed=123: 0.3520
+
+=== FA k=0 (last 0 of 4 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 789,770
+ [FA k] ep 1: test=0.3172
+ [FA k] ep 10: test=0.3374
+ [FA k] ep 20: test=0.3452
+ [FA k] ep 30: test=0.3431
+ [FA k] ep 40: test=0.3468
+ [FA k] ep 50: test=0.3563
+ [FA k] ep 60: test=0.3523
+ [FA k] ep 70: test=0.3578
+ [FA k] ep 80: test=0.3568
+ [FA k] ep 90: test=0.3576
+ [FA k] ep 100: test=0.3578
+ FINAL fa k=0 seed=456: 0.3578
+
+=== FA k=1 (last 1 of 4 trainable) seed=42 ===
+ trainable blocks: [3] trainable params: 921,866
+ [FA k] ep 1: test=0.2886
+ [FA k] ep 10: test=0.3301
+ [FA k] ep 20: test=0.3604
+ [FA k] ep 30: test=0.3595
+ [FA k] ep 40: test=0.3678
+ [FA k] ep 50: test=0.3779
+ [FA k] ep 60: test=0.3727
+ [FA k] ep 70: test=0.3810
+ [FA k] ep 80: test=0.3810
+ [FA k] ep 90: test=0.3821
+ [FA k] ep 100: test=0.3819
+ FINAL fa k=1 seed=42: 0.3819
+
+=== FA k=1 (last 1 of 4 trainable) seed=123 ===
+ trainable blocks: [3] trainable params: 921,866
+ [FA k] ep 1: test=0.3105
+ [FA k] ep 10: test=0.3472
+ [FA k] ep 20: test=0.3444
+ [FA k] ep 30: test=0.3604
+ [FA k] ep 40: test=0.3615
+ [FA k] ep 50: test=0.3568
+ [FA k] ep 60: test=0.3708
+ [FA k] ep 70: test=0.3723
+ [FA k] ep 80: test=0.3749
+ [FA k] ep 90: test=0.3736
+ [FA k] ep 100: test=0.3742
+ FINAL fa k=1 seed=123: 0.3742
+
+=== FA k=1 (last 1 of 4 trainable) seed=456 ===
+ trainable blocks: [3] trainable params: 921,866
+ [FA k] ep 1: test=0.2975
+ [FA k] ep 10: test=0.3481
+ [FA k] ep 20: test=0.3454
+ [FA k] ep 30: test=0.3683
+ [FA k] ep 40: test=0.3618
+ [FA k] ep 50: test=0.3675
+ [FA k] ep 60: test=0.3826
+ [FA k] ep 70: test=0.3867
+ [FA k] ep 80: test=0.3863
+ [FA k] ep 90: test=0.3899
+ [FA k] ep 100: test=0.3898
+ FINAL fa k=1 seed=456: 0.3898
+
+=== FA k=2 (last 2 of 4 trainable) seed=42 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [FA k] ep 1: test=0.2657
+ [FA k] ep 10: test=0.3431
+ [FA k] ep 20: test=0.3494
+ [FA k] ep 30: test=0.3436
+ [FA k] ep 40: test=0.3574
+ [FA k] ep 50: test=0.3388
+ [FA k] ep 60: test=0.3426
+ [FA k] ep 70: test=0.3341
+ [FA k] ep 80: test=0.3303
+ [FA k] ep 90: test=0.3310
+ [FA k] ep 100: test=0.3305
+ FINAL fa k=2 seed=42: 0.3305
+
+=== FA k=2 (last 2 of 4 trainable) seed=123 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [FA k] ep 1: test=0.2982
+ [FA k] ep 10: test=0.3524
+ [FA k] ep 20: test=0.3694
+ [FA k] ep 30: test=0.3691
+ [FA k] ep 40: test=0.3703
+ [FA k] ep 50: test=0.3605
+ [FA k] ep 60: test=0.3546
+ [FA k] ep 70: test=0.3547
+ [FA k] ep 80: test=0.3651
+ [FA k] ep 90: test=0.3565
+ [FA k] ep 100: test=0.3607
+ FINAL fa k=2 seed=123: 0.3607
+
+=== FA k=2 (last 2 of 4 trainable) seed=456 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [FA k] ep 1: test=0.2753
+ [FA k] ep 10: test=0.3386
+ [FA k] ep 20: test=0.3495
+ [FA k] ep 30: test=0.3458
+ [FA k] ep 40: test=0.3374
+ [FA k] ep 50: test=0.3333
+ [FA k] ep 60: test=0.3523
+ [FA k] ep 70: test=0.3538
+ [FA k] ep 80: test=0.3519
+ [FA k] ep 90: test=0.3555
+ [FA k] ep 100: test=0.3548
+ FINAL fa k=2 seed=456: 0.3548
+
+=== FA k=3 (last 3 of 4 trainable) seed=42 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [FA k] ep 1: test=0.2770
+ [FA k] ep 10: test=0.3554
+ [FA k] ep 20: test=0.3681
+ [FA k] ep 30: test=0.3841
+ [FA k] ep 40: test=0.3829
+ [FA k] ep 50: test=0.3847
+ [FA k] ep 60: test=0.3885
+ [FA k] ep 70: test=0.3956
+ [FA k] ep 80: test=0.3947
+ [FA k] ep 90: test=0.3916
+ [FA k] ep 100: test=0.3930
+ FINAL fa k=3 seed=42: 0.3930
+
+=== FA k=3 (last 3 of 4 trainable) seed=123 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [FA k] ep 1: test=0.2905
+ [FA k] ep 10: test=0.3495
+ [FA k] ep 20: test=0.3804
+ [FA k] ep 30: test=0.3820
+ [FA k] ep 40: test=0.3885
+ [FA k] ep 50: test=0.3950
+ [FA k] ep 60: test=0.3971
+ [FA k] ep 70: test=0.4049
+ [FA k] ep 80: test=0.4047
+ [FA k] ep 90: test=0.4075
+ [FA k] ep 100: test=0.4074
+ FINAL fa k=3 seed=123: 0.4074
+
+=== FA k=3 (last 3 of 4 trainable) seed=456 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [FA k] ep 1: test=0.2708
+ [FA k] ep 10: test=0.3511
+ [FA k] ep 20: test=0.3662
+ [FA k] ep 30: test=0.3755
+ [FA k] ep 40: test=0.3818
+ [FA k] ep 50: test=0.3828
+ [FA k] ep 60: test=0.3966
+ [FA k] ep 70: test=0.3939
+ [FA k] ep 80: test=0.3928
+ [FA k] ep 90: test=0.3933
+ [FA k] ep 100: test=0.3946
+ FINAL fa k=3 seed=456: 0.3946
+
+=== FA k=4 (last 4 of 4 trainable) seed=42 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [FA k] ep 1: test=0.2789
+ [FA k] ep 10: test=0.3498
+ [FA k] ep 20: test=0.3601
+ [FA k] ep 30: test=0.3710
+ [FA k] ep 40: test=0.3834
+ [FA k] ep 50: test=0.3923
+ [FA k] ep 60: test=0.3912
+ [FA k] ep 70: test=0.3945
+ [FA k] ep 80: test=0.3957
+ [FA k] ep 90: test=0.3944
+ [FA k] ep 100: test=0.3959
+ FINAL fa k=4 seed=42: 0.3959
+
+=== FA k=4 (last 4 of 4 trainable) seed=123 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [FA k] ep 1: test=0.2905
+ [FA k] ep 10: test=0.3596
+ [FA k] ep 20: test=0.3803
+ [FA k] ep 30: test=0.3792
+ [FA k] ep 40: test=0.3955
+ [FA k] ep 50: test=0.3980
+ [FA k] ep 60: test=0.4071
+ [FA k] ep 70: test=0.4034
+ [FA k] ep 80: test=0.4076
+ [FA k] ep 90: test=0.4115
+ [FA k] ep 100: test=0.4122
+ FINAL fa k=4 seed=123: 0.4122
+
+=== FA k=4 (last 4 of 4 trainable) seed=456 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [FA k] ep 1: test=0.2713
+ [FA k] ep 10: test=0.3544
+ [FA k] ep 20: test=0.3702
+ [FA k] ep 30: test=0.3799
+ [FA k] ep 40: test=0.3845
+ [FA k] ep 50: test=0.3923
+ [FA k] ep 60: test=0.3992
+ [FA k] ep 70: test=0.3974
+ [FA k] ep 80: test=0.3990
+ [FA k] ep 90: test=0.4000
+ [FA k] ep 100: test=0.3987
+ FINAL fa k=4 seed=456: 0.3987
+
+=== DFA k=0 (last 0 of 4 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 789,770
+ [DFA k] ep 1: test=0.3185
+ [DFA k] ep 10: test=0.3370
+ [DFA k] ep 20: test=0.3458
+ [DFA k] ep 30: test=0.3425
+ [DFA k] ep 40: test=0.3419
+ [DFA k] ep 50: test=0.3425
+ [DFA k] ep 60: test=0.3420
+ [DFA k] ep 70: test=0.3466
+ [DFA k] ep 80: test=0.3458
+ [DFA k] ep 90: test=0.3470
+ [DFA k] ep 100: test=0.3454
+ FINAL dfa k=0 seed=42: 0.3454
+
+=== DFA k=0 (last 0 of 4 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 789,770
+ [DFA k] ep 1: test=0.3219
+ [DFA k] ep 10: test=0.3339
+ [DFA k] ep 20: test=0.3453
+ [DFA k] ep 30: test=0.3352
+ [DFA k] ep 40: test=0.3322
+ [DFA k] ep 50: test=0.3291
+ [DFA k] ep 60: test=0.3428
+ [DFA k] ep 70: test=0.3447
+ [DFA k] ep 80: test=0.3465
+ [DFA k] ep 90: test=0.3464
+ [DFA k] ep 100: test=0.3498
+ FINAL dfa k=0 seed=123: 0.3498
+
+=== DFA k=0 (last 0 of 4 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 789,770
+ [DFA k] ep 1: test=0.3241
+ [DFA k] ep 10: test=0.3486
+ [DFA k] ep 20: test=0.3396
+ [DFA k] ep 30: test=0.3396
+ [DFA k] ep 40: test=0.3387
+ [DFA k] ep 50: test=0.3456
+ [DFA k] ep 60: test=0.3508
+ [DFA k] ep 70: test=0.3527
+ [DFA k] ep 80: test=0.3498
+ [DFA k] ep 90: test=0.3508
+ [DFA k] ep 100: test=0.3516
+ FINAL dfa k=0 seed=456: 0.3516
+
+=== DFA k=1 (last 1 of 4 trainable) seed=42 ===
+ trainable blocks: [3] trainable params: 921,866
+ [DFA k] ep 1: test=0.2563
+ [DFA k] ep 10: test=0.2580
+ [DFA k] ep 20: test=0.2445
+ [DFA k] ep 30: test=0.2197
+ [DFA k] ep 40: test=0.2229
+ [DFA k] ep 50: test=0.1952
+ [DFA k] ep 60: test=0.2306
+ [DFA k] ep 70: test=0.2290
+ [DFA k] ep 80: test=0.2211
+ [DFA k] ep 90: test=0.2215
+ [DFA k] ep 100: test=0.2267
+ FINAL dfa k=1 seed=42: 0.2267
+
+=== DFA k=1 (last 1 of 4 trainable) seed=123 ===
+ trainable blocks: [3] trainable params: 921,866
+ [DFA k] ep 1: test=0.2549
+ [DFA k] ep 10: test=0.2505
+ [DFA k] ep 20: test=0.2453
+ [DFA k] ep 30: test=0.2358
+ [DFA k] ep 40: test=0.2499
+ [DFA k] ep 50: test=0.2506
+ [DFA k] ep 60: test=0.2467
+ [DFA k] ep 70: test=0.2513
+ [DFA k] ep 80: test=0.2597
+ [DFA k] ep 90: test=0.2586
+ [DFA k] ep 100: test=0.2563
+ FINAL dfa k=1 seed=123: 0.2563
+
+=== DFA k=1 (last 1 of 4 trainable) seed=456 ===
+ trainable blocks: [3] trainable params: 921,866
+ [DFA k] ep 1: test=0.2112
+ [DFA k] ep 10: test=0.2227
+ [DFA k] ep 20: test=0.2397
+ [DFA k] ep 30: test=0.2326
+ [DFA k] ep 40: test=0.2285
+ [DFA k] ep 50: test=0.2176
+ [DFA k] ep 60: test=0.2431
+ [DFA k] ep 70: test=0.2476
+ [DFA k] ep 80: test=0.2493
+ [DFA k] ep 90: test=0.2477
+ [DFA k] ep 100: test=0.2476
+ FINAL dfa k=1 seed=456: 0.2476
+
+=== DFA k=2 (last 2 of 4 trainable) seed=42 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [DFA k] ep 1: test=0.2792
+ [DFA k] ep 10: test=0.2893
+ [DFA k] ep 20: test=0.2978
+ [DFA k] ep 30: test=0.2960
+ [DFA k] ep 40: test=0.3010
+ [DFA k] ep 50: test=0.3014
+ [DFA k] ep 60: test=0.3005
+ [DFA k] ep 70: test=0.3036
+ [DFA k] ep 80: test=0.2997
+ [DFA k] ep 90: test=0.3005
+ [DFA k] ep 100: test=0.3005
+ FINAL dfa k=2 seed=42: 0.3005
+
+=== DFA k=2 (last 2 of 4 trainable) seed=123 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [DFA k] ep 1: test=0.2671
+ [DFA k] ep 10: test=0.2947
+ [DFA k] ep 20: test=0.2841
+ [DFA k] ep 30: test=0.2801
+ [DFA k] ep 40: test=0.2819
+ [DFA k] ep 50: test=0.2772
+ [DFA k] ep 60: test=0.2834
+ [DFA k] ep 70: test=0.2876
+ [DFA k] ep 80: test=0.2757
+ [DFA k] ep 90: test=0.2806
+ [DFA k] ep 100: test=0.2819
+ FINAL dfa k=2 seed=123: 0.2819
+
+=== DFA k=2 (last 2 of 4 trainable) seed=456 ===
+ trainable blocks: [2, 3] trainable params: 1,053,962
+ [DFA k] ep 1: test=0.2604
+ [DFA k] ep 10: test=0.2821
+ [DFA k] ep 20: test=0.2784
+ [DFA k] ep 30: test=0.2826
+ [DFA k] ep 40: test=0.2805
+ [DFA k] ep 50: test=0.2675
+ [DFA k] ep 60: test=0.2735
+ [DFA k] ep 70: test=0.2765
+ [DFA k] ep 80: test=0.2735
+ [DFA k] ep 90: test=0.2759
+ [DFA k] ep 100: test=0.2751
+ FINAL dfa k=2 seed=456: 0.2751
+
+=== DFA k=3 (last 3 of 4 trainable) seed=42 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [DFA k] ep 1: test=0.2821
+ [DFA k] ep 10: test=0.2882
+ [DFA k] ep 20: test=0.2921
+ [DFA k] ep 30: test=0.3064
+ [DFA k] ep 40: test=0.3009
+ [DFA k] ep 50: test=0.3044
+ [DFA k] ep 60: test=0.3041
+ [DFA k] ep 70: test=0.3075
+ [DFA k] ep 80: test=0.3064
+ [DFA k] ep 90: test=0.3021
+ [DFA k] ep 100: test=0.3047
+ FINAL dfa k=3 seed=42: 0.3047
+
+=== DFA k=3 (last 3 of 4 trainable) seed=123 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [DFA k] ep 1: test=0.2630
+ [DFA k] ep 10: test=0.2910
+ [DFA k] ep 20: test=0.2845
+ [DFA k] ep 30: test=0.2821
+ [DFA k] ep 40: test=0.2900
+ [DFA k] ep 50: test=0.2811
+ [DFA k] ep 60: test=0.2860
+ [DFA k] ep 70: test=0.2910
+ [DFA k] ep 80: test=0.2879
+ [DFA k] ep 90: test=0.2910
+ [DFA k] ep 100: test=0.2906
+ FINAL dfa k=3 seed=123: 0.2906
+
+=== DFA k=3 (last 3 of 4 trainable) seed=456 ===
+ trainable blocks: [1, 2, 3] trainable params: 1,186,058
+ [DFA k] ep 1: test=0.2544
+ [DFA k] ep 10: test=0.2841
+ [DFA k] ep 20: test=0.2892
+ [DFA k] ep 30: test=0.2998
+ [DFA k] ep 40: test=0.2891
+ [DFA k] ep 50: test=0.2844
+ [DFA k] ep 60: test=0.2938
+ [DFA k] ep 70: test=0.2928
+ [DFA k] ep 80: test=0.2901
+ [DFA k] ep 90: test=0.2932
+ [DFA k] ep 100: test=0.2919
+ FINAL dfa k=3 seed=456: 0.2919
+
+=== DFA k=4 (last 4 of 4 trainable) seed=42 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [DFA k] ep 1: test=0.2899
+ [DFA k] ep 10: test=0.2873
+ [DFA k] ep 20: test=0.3016
+ [DFA k] ep 30: test=0.3053
+ [DFA k] ep 40: test=0.3120
+ [DFA k] ep 50: test=0.3045
+ [DFA k] ep 60: test=0.3071
+ [DFA k] ep 70: test=0.3102
+ [DFA k] ep 80: test=0.3080
+ [DFA k] ep 90: test=0.3066
+ [DFA k] ep 100: test=0.3068
+ FINAL dfa k=4 seed=42: 0.3068
+
+=== DFA k=4 (last 4 of 4 trainable) seed=123 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [DFA k] ep 1: test=0.2683
+ [DFA k] ep 10: test=0.2926
+ [DFA k] ep 20: test=0.2861
+ [DFA k] ep 30: test=0.2875
+ [DFA k] ep 40: test=0.2978
+ [DFA k] ep 50: test=0.2910
+ [DFA k] ep 60: test=0.2972
+ [DFA k] ep 70: test=0.3011
+ [DFA k] ep 80: test=0.2974
+ [DFA k] ep 90: test=0.3015
+ [DFA k] ep 100: test=0.3023
+ FINAL dfa k=4 seed=123: 0.3023
+
+=== DFA k=4 (last 4 of 4 trainable) seed=456 ===
+ trainable blocks: [0, 1, 2, 3] trainable params: 1,318,154
+ [DFA k] ep 1: test=0.2591
+ [DFA k] ep 10: test=0.2883
+ [DFA k] ep 20: test=0.2948
+ [DFA k] ep 30: test=0.2995
+ [DFA k] ep 40: test=0.2921
+ [DFA k] ep 50: test=0.2956
+ [DFA k] ep 60: test=0.2960
+ [DFA k] ep 70: test=0.2943
+ [DFA k] ep 80: test=0.2910
+ [DFA k] ep 90: test=0.2955
+ [DFA k] ep 100: test=0.2949
+ FINAL dfa k=4 seed=456: 0.2949
+
+============================================================
+SUMMARY ladder_d256_L4_cifar10 (mean ± ddof-1 std over seeds)
+============================================================
+ BP k=0: 0.3886±0.0011 k=1: 0.5650±0.0031 k=2: 0.5978±0.0034 k=3: 0.6076±0.0006 k=4: 0.6167±0.0019
+ FA k=0: 0.3551±0.0029 k=1: 0.3820±0.0078 k=2: 0.3487±0.0160 k=3: 0.3983±0.0079 k=4: 0.4023±0.0087
+ DFA k=0: 0.3489±0.0032 k=1: 0.2435±0.0152 k=2: 0.2858±0.0131 k=3: 0.2957±0.0078 k=4: 0.3013±0.0060
+
+Saved -> results/depth_ladder/ladder_d256_L4_cifar10.json
+[Sun Jun 14 03:26:20 PM CDT 2026] START secondary d=512 L=2 FA-failure ladder
+Device=cuda:0 ladder_d512_L2_cifar10 methods=['bp', 'fa', 'dfa'] k=[0, 1, 2] seeds=[42, 123, 456] epochs=100
+
+=== BP k=0 (last 0 of 2 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [BP k] ep 1: test=0.3462
+ [BP k] ep 10: test=0.3633
+ [BP k] ep 20: test=0.3635
+ [BP k] ep 30: test=0.3543
+ [BP k] ep 40: test=0.3673
+ [BP k] ep 50: test=0.3633
+ [BP k] ep 60: test=0.3695
+ [BP k] ep 70: test=0.3753
+ [BP k] ep 80: test=0.3858
+ [BP k] ep 90: test=0.3887
+ [BP k] ep 100: test=0.3891
+ FINAL bp k=0 seed=42: 0.3891
+
+=== BP k=0 (last 0 of 2 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [BP k] ep 1: test=0.3497
+ [BP k] ep 10: test=0.3704
+ [BP k] ep 20: test=0.3698
+ [BP k] ep 30: test=0.3540
+ [BP k] ep 40: test=0.3505
+ [BP k] ep 50: test=0.3634
+ [BP k] ep 60: test=0.3675
+ [BP k] ep 70: test=0.3739
+ [BP k] ep 80: test=0.3823
+ [BP k] ep 90: test=0.3845
+ [BP k] ep 100: test=0.3846
+ FINAL bp k=0 seed=123: 0.3846
+
+=== BP k=0 (last 0 of 2 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [BP k] ep 1: test=0.3409
+ [BP k] ep 10: test=0.3578
+ [BP k] ep 20: test=0.3767
+ [BP k] ep 30: test=0.3607
+ [BP k] ep 40: test=0.3551
+ [BP k] ep 50: test=0.3632
+ [BP k] ep 60: test=0.3722
+ [BP k] ep 70: test=0.3704
+ [BP k] ep 80: test=0.3784
+ [BP k] ep 90: test=0.3834
+ [BP k] ep 100: test=0.3838
+ FINAL bp k=0 seed=456: 0.3838
+
+=== BP k=1 (last 1 of 2 trainable) seed=42 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [BP k] ep 1: test=0.3667
+ [BP k] ep 10: test=0.4836
+ [BP k] ep 20: test=0.5197
+ [BP k] ep 30: test=0.5367
+ [BP k] ep 40: test=0.5444
+ [BP k] ep 50: test=0.5629
+ [BP k] ep 60: test=0.5691
+ [BP k] ep 70: test=0.5779
+ [BP k] ep 80: test=0.5808
+ [BP k] ep 90: test=0.5849
+ [BP k] ep 100: test=0.5856
+ FINAL bp k=1 seed=42: 0.5856
+
+=== BP k=1 (last 1 of 2 trainable) seed=123 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [BP k] ep 1: test=0.3632
+ [BP k] ep 10: test=0.4865
+ [BP k] ep 20: test=0.5175
+ [BP k] ep 30: test=0.5360
+ [BP k] ep 40: test=0.5466
+ [BP k] ep 50: test=0.5606
+ [BP k] ep 60: test=0.5716
+ [BP k] ep 70: test=0.5749
+ [BP k] ep 80: test=0.5806
+ [BP k] ep 90: test=0.5817
+ [BP k] ep 100: test=0.5819
+ FINAL bp k=1 seed=123: 0.5819
+
+=== BP k=1 (last 1 of 2 trainable) seed=456 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [BP k] ep 1: test=0.3696
+ [BP k] ep 10: test=0.4737
+ [BP k] ep 20: test=0.5199
+ [BP k] ep 30: test=0.5317
+ [BP k] ep 40: test=0.5498
+ [BP k] ep 50: test=0.5610
+ [BP k] ep 60: test=0.5675
+ [BP k] ep 70: test=0.5767
+ [BP k] ep 80: test=0.5785
+ [BP k] ep 90: test=0.5802
+ [BP k] ep 100: test=0.5809
+ FINAL bp k=1 seed=456: 0.5809
+
+=== BP k=2 (last 2 of 2 trainable) seed=42 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [BP k] ep 1: test=0.3790
+ [BP k] ep 10: test=0.5174
+ [BP k] ep 20: test=0.5471
+ [BP k] ep 30: test=0.5712
+ [BP k] ep 40: test=0.5906
+ [BP k] ep 50: test=0.5969
+ [BP k] ep 60: test=0.5977
+ [BP k] ep 70: test=0.5992
+ [BP k] ep 80: test=0.6072
+ [BP k] ep 90: test=0.6037
+ [BP k] ep 100: test=0.6039
+ FINAL bp k=2 seed=42: 0.6039
+
+=== BP k=2 (last 2 of 2 trainable) seed=123 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [BP k] ep 1: test=0.3732
+ [BP k] ep 10: test=0.5161
+ [BP k] ep 20: test=0.5554
+ [BP k] ep 30: test=0.5756
+ [BP k] ep 40: test=0.5811
+ [BP k] ep 50: test=0.5928
+ [BP k] ep 60: test=0.5965
+ [BP k] ep 70: test=0.6016
+ [BP k] ep 80: test=0.6027
+ [BP k] ep 90: test=0.6007
+ [BP k] ep 100: test=0.6020
+ FINAL bp k=2 seed=123: 0.6020
+
+=== BP k=2 (last 2 of 2 trainable) seed=456 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [BP k] ep 1: test=0.3768
+ [BP k] ep 10: test=0.5097
+ [BP k] ep 20: test=0.5499
+ [BP k] ep 30: test=0.5773
+ [BP k] ep 40: test=0.5858
+ [BP k] ep 50: test=0.5845
+ [BP k] ep 60: test=0.5934
+ [BP k] ep 70: test=0.5985
+ [BP k] ep 80: test=0.6011
+ [BP k] ep 90: test=0.6020
+ [BP k] ep 100: test=0.6045
+ FINAL bp k=2 seed=456: 0.6045
+
+=== FA k=0 (last 0 of 2 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [FA k] ep 1: test=0.3288
+ [FA k] ep 10: test=0.3359
+ [FA k] ep 20: test=0.3336
+ [FA k] ep 30: test=0.3328
+ [FA k] ep 40: test=0.3418
+ [FA k] ep 50: test=0.3504
+ [FA k] ep 60: test=0.3564
+ [FA k] ep 70: test=0.3567
+ [FA k] ep 80: test=0.3543
+ [FA k] ep 90: test=0.3574
+ [FA k] ep 100: test=0.3585
+ FINAL fa k=0 seed=42: 0.3585
+
+=== FA k=0 (last 0 of 2 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [FA k] ep 1: test=0.3125
+ [FA k] ep 10: test=0.3374
+ [FA k] ep 20: test=0.3364
+ [FA k] ep 30: test=0.3453
+ [FA k] ep 40: test=0.3437
+ [FA k] ep 50: test=0.3522
+ [FA k] ep 60: test=0.3587
+ [FA k] ep 70: test=0.3550
+ [FA k] ep 80: test=0.3551
+ [FA k] ep 90: test=0.3558
+ [FA k] ep 100: test=0.3584
+ FINAL fa k=0 seed=123: 0.3584
+
+=== FA k=0 (last 0 of 2 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [FA k] ep 1: test=0.3180
+ [FA k] ep 10: test=0.3311
+ [FA k] ep 20: test=0.3344
+ [FA k] ep 30: test=0.3533
+ [FA k] ep 40: test=0.3476
+ [FA k] ep 50: test=0.3523
+ [FA k] ep 60: test=0.3455
+ [FA k] ep 70: test=0.3569
+ [FA k] ep 80: test=0.3562
+ [FA k] ep 90: test=0.3583
+ [FA k] ep 100: test=0.3590
+ FINAL fa k=0 seed=456: 0.3590
+
+=== FA k=1 (last 1 of 2 trainable) seed=42 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [FA k] ep 1: test=0.3235
+ [FA k] ep 10: test=0.3730
+ [FA k] ep 20: test=0.3734
+ [FA k] ep 30: test=0.3829
+ [FA k] ep 40: test=0.3916
+ [FA k] ep 50: test=0.4008
+ [FA k] ep 60: test=0.4012
+ [FA k] ep 70: test=0.4015
+ [FA k] ep 80: test=0.4042
+ [FA k] ep 90: test=0.4082
+ [FA k] ep 100: test=0.4083
+ FINAL fa k=1 seed=42: 0.4083
+
+=== FA k=1 (last 1 of 2 trainable) seed=123 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [FA k] ep 1: test=0.2930
+ [FA k] ep 10: test=0.3662
+ [FA k] ep 20: test=0.3905
+ [FA k] ep 30: test=0.4027
+ [FA k] ep 40: test=0.3948
+ [FA k] ep 50: test=0.4048
+ [FA k] ep 60: test=0.4067
+ [FA k] ep 70: test=0.4094
+ [FA k] ep 80: test=0.4115
+ [FA k] ep 90: test=0.4103
+ [FA k] ep 100: test=0.4134
+ FINAL fa k=1 seed=123: 0.4134
+
+=== FA k=1 (last 1 of 2 trainable) seed=456 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [FA k] ep 1: test=0.3098
+ [FA k] ep 10: test=0.3561
+ [FA k] ep 20: test=0.3860
+ [FA k] ep 30: test=0.3957
+ [FA k] ep 40: test=0.3907
+ [FA k] ep 50: test=0.4032
+ [FA k] ep 60: test=0.4017
+ [FA k] ep 70: test=0.4125
+ [FA k] ep 80: test=0.4123
+ [FA k] ep 90: test=0.4164
+ [FA k] ep 100: test=0.4155
+ FINAL fa k=1 seed=456: 0.4155
+
+=== FA k=2 (last 2 of 2 trainable) seed=42 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [FA k] ep 1: test=0.3028
+ [FA k] ep 10: test=0.3585
+ [FA k] ep 20: test=0.3523
+ [FA k] ep 30: test=0.3315
+ [FA k] ep 40: test=0.3191
+ [FA k] ep 50: test=0.3397
+ [FA k] ep 60: test=0.3566
+ [FA k] ep 70: test=0.3527
+ [FA k] ep 80: test=0.3554
+ [FA k] ep 90: test=0.3593
+ [FA k] ep 100: test=0.3582
+ FINAL fa k=2 seed=42: 0.3582
+
+=== FA k=2 (last 2 of 2 trainable) seed=123 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [FA k] ep 1: test=0.2794
+ [FA k] ep 10: test=0.3627
+ [FA k] ep 20: test=0.3600
+ [FA k] ep 30: test=0.3750
+ [FA k] ep 40: test=0.3482
+ [FA k] ep 50: test=0.3679
+ [FA k] ep 60: test=0.3630
+ [FA k] ep 70: test=0.3643
+ [FA k] ep 80: test=0.3636
+ [FA k] ep 90: test=0.3618
+ [FA k] ep 100: test=0.3621
+ FINAL fa k=2 seed=123: 0.3621
+
+=== FA k=2 (last 2 of 2 trainable) seed=456 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [FA k] ep 1: test=0.3005
+ [FA k] ep 10: test=0.3573
+ [FA k] ep 20: test=0.3624
+ [FA k] ep 30: test=0.3706
+ [FA k] ep 40: test=0.3529
+ [FA k] ep 50: test=0.3648
+ [FA k] ep 60: test=0.3581
+ [FA k] ep 70: test=0.3645
+ [FA k] ep 80: test=0.3652
+ [FA k] ep 90: test=0.3632
+ [FA k] ep 100: test=0.3642
+ FINAL fa k=2 seed=456: 0.3642
+
+=== DFA k=0 (last 0 of 2 trainable) seed=42 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [DFA k] ep 1: test=0.3196
+ [DFA k] ep 10: test=0.3187
+ [DFA k] ep 20: test=0.3369
+ [DFA k] ep 30: test=0.3221
+ [DFA k] ep 40: test=0.3386
+ [DFA k] ep 50: test=0.3401
+ [DFA k] ep 60: test=0.3473
+ [DFA k] ep 70: test=0.3472
+ [DFA k] ep 80: test=0.3426
+ [DFA k] ep 90: test=0.3445
+ [DFA k] ep 100: test=0.3432
+ FINAL dfa k=0 seed=42: 0.3432
+
+=== DFA k=0 (last 0 of 2 trainable) seed=123 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [DFA k] ep 1: test=0.3089
+ [DFA k] ep 10: test=0.3180
+ [DFA k] ep 20: test=0.3301
+ [DFA k] ep 30: test=0.3434
+ [DFA k] ep 40: test=0.3386
+ [DFA k] ep 50: test=0.3343
+ [DFA k] ep 60: test=0.3489
+ [DFA k] ep 70: test=0.3458
+ [DFA k] ep 80: test=0.3499
+ [DFA k] ep 90: test=0.3508
+ [DFA k] ep 100: test=0.3508
+ FINAL dfa k=0 seed=123: 0.3508
+
+=== DFA k=0 (last 0 of 2 trainable) seed=456 ===
+ trainable blocks: [] trainable params: 1,579,530
+ [DFA k] ep 1: test=0.3238
+ [DFA k] ep 10: test=0.3327
+ [DFA k] ep 20: test=0.3395
+ [DFA k] ep 30: test=0.3457
+ [DFA k] ep 40: test=0.3367
+ [DFA k] ep 50: test=0.3496
+ [DFA k] ep 60: test=0.3453
+ [DFA k] ep 70: test=0.3487
+ [DFA k] ep 80: test=0.3491
+ [DFA k] ep 90: test=0.3498
+ [DFA k] ep 100: test=0.3521
+ FINAL dfa k=0 seed=456: 0.3521
+
+=== DFA k=1 (last 1 of 2 trainable) seed=42 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [DFA k] ep 1: test=0.2687
+ [DFA k] ep 10: test=0.2106
+ [DFA k] ep 20: test=0.2293
+ [DFA k] ep 30: test=0.2297
+ [DFA k] ep 40: test=0.2241
+ [DFA k] ep 50: test=0.2318
+ [DFA k] ep 60: test=0.2417
+ [DFA k] ep 70: test=0.2458
+ [DFA k] ep 80: test=0.2463
+ [DFA k] ep 90: test=0.2438
+ [DFA k] ep 100: test=0.2384
+ FINAL dfa k=1 seed=42: 0.2384
+
+=== DFA k=1 (last 1 of 2 trainable) seed=123 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [DFA k] ep 1: test=0.1958
+ [DFA k] ep 10: test=0.1777
+ [DFA k] ep 20: test=0.2220
+ [DFA k] ep 30: test=0.1852
+ [DFA k] ep 40: test=0.2165
+ [DFA k] ep 50: test=0.2095
+ [DFA k] ep 60: test=0.1995
+ [DFA k] ep 70: test=0.2038
+ [DFA k] ep 80: test=0.2068
+ [DFA k] ep 90: test=0.2173
+ [DFA k] ep 100: test=0.2097
+ FINAL dfa k=1 seed=123: 0.2097
+
+=== DFA k=1 (last 1 of 2 trainable) seed=456 ===
+ trainable blocks: [1] trainable params: 2,105,866
+ [DFA k] ep 1: test=0.2118
+ [DFA k] ep 10: test=0.2074
+ [DFA k] ep 20: test=0.1777
+ [DFA k] ep 30: test=0.2043
+ [DFA k] ep 40: test=0.2010
+ [DFA k] ep 50: test=0.2087
+ [DFA k] ep 60: test=0.2073
+ [DFA k] ep 70: test=0.2126
+ [DFA k] ep 80: test=0.2202
+ [DFA k] ep 90: test=0.2355
+ [DFA k] ep 100: test=0.2295
+ FINAL dfa k=1 seed=456: 0.2295
+
+=== DFA k=2 (last 2 of 2 trainable) seed=42 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [DFA k] ep 1: test=0.2769
+ [DFA k] ep 10: test=0.2705
+ [DFA k] ep 20: test=0.3000
+ [DFA k] ep 30: test=0.2988
+ [DFA k] ep 40: test=0.3080
+ [DFA k] ep 50: test=0.2941
+ [DFA k] ep 60: test=0.3025
+ [DFA k] ep 70: test=0.3075
+ [DFA k] ep 80: test=0.3070
+ [DFA k] ep 90: test=0.3063
+ [DFA k] ep 100: test=0.3069
+ FINAL dfa k=2 seed=42: 0.3069
+
+=== DFA k=2 (last 2 of 2 trainable) seed=123 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [DFA k] ep 1: test=0.2582
+ [DFA k] ep 10: test=0.2772
+ [DFA k] ep 20: test=0.2904
+ [DFA k] ep 30: test=0.3072
+ [DFA k] ep 40: test=0.2898
+ [DFA k] ep 50: test=0.2938
+ [DFA k] ep 60: test=0.2892
+ [DFA k] ep 70: test=0.2974
+ [DFA k] ep 80: test=0.2970
+ [DFA k] ep 90: test=0.3035
+ [DFA k] ep 100: test=0.3025
+ FINAL dfa k=2 seed=123: 0.3025
+
+=== DFA k=2 (last 2 of 2 trainable) seed=456 ===
+ trainable blocks: [0, 1] trainable params: 2,632,202
+ [DFA k] ep 1: test=0.2794
+ [DFA k] ep 10: test=0.2888
+ [DFA k] ep 20: test=0.2884
+ [DFA k] ep 30: test=0.2901
+ [DFA k] ep 40: test=0.2784
+ [DFA k] ep 50: test=0.2817
+ [DFA k] ep 60: test=0.2983
+ [DFA k] ep 70: test=0.2920
+ [DFA k] ep 80: test=0.2904
+ [DFA k] ep 90: test=0.2999
+ [DFA k] ep 100: test=0.2963
+ FINAL dfa k=2 seed=456: 0.2963
+
+============================================================
+SUMMARY ladder_d512_L2_cifar10 (mean ± ddof-1 std over seeds)
+============================================================
+ BP k=0: 0.3858±0.0029 k=1: 0.5828±0.0025 k=2: 0.6035±0.0013
+ FA k=0: 0.3586±0.0003 k=1: 0.4124±0.0037 k=2: 0.3615±0.0030
+ DFA k=0: 0.3487±0.0048 k=1: 0.2259±0.0147 k=2: 0.3019±0.0053
+
+Saved -> results/depth_ladder/ladder_d512_L2_cifar10.json
+[Sun Jun 14 05:49:05 PM CDT 2026] ALL DONE
diff --git a/report_explore/MEMO_depth_utility_ladder.md b/report_explore/MEMO_depth_utility_ladder.md
new file mode 100644
index 0000000..d43a983
--- /dev/null
+++ b/report_explore/MEMO_depth_utility_ladder.md
@@ -0,0 +1,119 @@
+# MEMO — Depth-utility ladder (appendix experiment)
+
+**Date:** 2026-06-14
+**Purpose:** Reviewer asked to triangulate the depth-utility diagnostic (D3) more finely
+— turn the binary *frozen-vs-fully-trained* block comparison into a **curve**. We vary the
+number of trainable residual blocks `k`, training the **last `k`** blocks (output side) and
+freezing the first `L−k` at random init; embedding / out_ln / out_head are **always** trained.
+
+**Question.** As more blocks are made trainable, does test accuracy rise? Under a method that
+genuinely trains depth (BP) it should climb; under a method whose deep credit is non-functional
+(DFA) it should stay flat at — or below — the frozen baseline.
+
+**Why output-side-first.** The deepest block receives the most direct credit (FA's last block
+sees the exact output gradient), so the last `k` blocks are the **best case** for the method.
+If even these don't help, depth is unused.
+
+---
+
+## Setup
+
+- Arch / task: ResMLP (CIFAR-10). Two configs: **d=256 L=4** (primary audit) and **d=512 L=2**
+ (FA-failure case — vanilla FA is known to be ≈ frozen here).
+- Methods: **BP** (positive control), **FA** (Lillicrap vanilla feedback alignment), **DFA**.
+- `k ∈ {0,…,L}`; `k=0` = frozen-blocks baseline, `k=L` = full audit.
+- Seeds {42,123,456}; mean ± ddof-1 std.
+- Recipe identical to the main audit: AdamW, lr 1e-3, wd 0.01, cosine, batch 128, 100 epochs,
+ per-block independent optimizers, rms-normalized local surrogate losses.
+- The **full** ladder (all `k`, incl. 0 and L) was run in **one** script for internal
+ consistency — `k=0` / `k=L` reproduce the external anchors (see cross-checks).
+
+Harness: `experiments/depth_utility_ladder.py`.
+Raw results: `results/depth_ladder/ladder_d256_L4_cifar10.json`, `ladder_d512_L2_cifar10.json`.
+Figure: `results/depth_ladder/depth_ladder.png` (`experiments/plot_depth_ladder.py`).
+
+---
+
+## Results (CIFAR-10 test acc, mean ± ddof-1 std, n=3)
+
+**Primary — ResMLP d=256, L=4**
+
+| k (last-k trainable) | BP | FA | DFA |
+|---|---|---|---|
+| 0 (frozen) | 0.389 ± 0.001 | 0.355 ± 0.003 | 0.349 ± 0.003 |
+| 1 | 0.565 ± 0.003 | 0.382 ± 0.008 | 0.244 ± 0.015 |
+| 2 | 0.598 ± 0.003 | 0.349 ± 0.016 | 0.286 ± 0.013 |
+| 3 | 0.608 ± 0.001 | 0.398 ± 0.008 | 0.296 ± 0.008 |
+| 4 (full) | 0.617 ± 0.002 | 0.402 ± 0.009 | 0.301 ± 0.006 |
+
+**Secondary — ResMLP d=512, L=2 (FA-failure)**
+
+| k | BP | FA | DFA |
+|---|---|---|---|
+| 0 (frozen) | 0.386 ± 0.003 | 0.359 ± 0.000 | 0.349 ± 0.005 |
+| 1 | 0.583 ± 0.002 | 0.412 ± 0.004 | 0.226 ± 0.015 |
+| 2 (full) | 0.603 ± 0.001 | 0.361 ± 0.003 | 0.302 ± 0.005 |
+
+---
+
+## Interpretation
+
+- **BP — monotone climb.** d=256: 0.389 → 0.617 (**+23 pp**); d=512: 0.386 → 0.603 (**+22 pp**).
+ Each block made trainable adds accuracy → depth is genuinely usable, so the D3 precondition
+ (BP benefits from depth) holds.
+- **DFA — flat-to-negative.** The frozen rung `k=0` (≈0.349) is DFA's **maximum** in both configs.
+ Every trained-block configuration lands **below** it, including the full audit (`k=L`): d=256
+ full = 0.301 (−4.8 pp vs frozen), d=512 full = 0.302 (−4.7 pp). Training deep DFA blocks does
+ not just fail to help — it actively destroys ~5 pp. **The D3 failure now holds at every
+ granularity**, not just the two extremes.
+- **FA — partial / no net depth utility.** d=256 ends at 0.402 (+4.7 pp over frozen) but
+ non-monotonically; d=512 ends at 0.361 ≈ frozen 0.359 (**no net gain** — the FA-failure case
+ reproduces). FA is the intermediate: it can use some depth in the easier config and none in the
+ harder one. The non-monotonic dips (d=256 k=2; d=512 k=2) are consistent with FA's mis-scaled
+ sequential credit occasionally hurting.
+
+**One-line takeaway for §6.2:** *A trainable-depth ladder shows BP's accuracy climbs monotonically
+with the number of trainable blocks (+22–23 pp) while DFA peaks at the frozen baseline and
+declines once any deep block is trained; FA shows partial-to-no depth utility. Depth is usable
+(BP), but DFA's deep credit is not.*
+
+## Cross-checks (internal rerun reproduces external anchors)
+
+- BP `k=4` = 0.617 ≈ existing full-audit BP 0.615.
+- DFA `k=4` = 0.301 ≈ existing full-audit DFA 0.301 / 0.306.
+- FA `k=4` = 0.402 ≈ existing FA 0.401.
+- Frozen `k=0` ≈ 0.349 across methods ≈ existing frozen-blocks baseline 0.349.
+
+## Footnote — why `k=0` is already well above chance
+
+`k=0` is **not** an untrained network: embed / out_ln / out_head are trained; only the blocks are
+frozen at random init. At init the residual branches are **small but non-negligible**:
+per block `‖f_l(h_l)‖/‖h_l‖ ≈ 0.10`, and the full frozen 4-block stack deviates from the identity
+by `‖h_L−h_0‖/‖h_0‖ = 0.196 ± 0.003` with `cos(h_L,h_0) = 0.981 ± 0.001` (3 seeds, CIFAR-10
+batch). The frozen stack is therefore a fixed, **near-norm-preserving random feature map**, not a
+strict identity. So `k=0` (≈0.35) is the accuracy of a trained embedding+readout composed with
+this fixed map — effectively a trained (near-)linear classifier on pixels, well above the 10%
+chance level. Measurement: `experiments/frozen_init_identity_check.py` →
+`results/depth_ladder/frozen_init_identity.json`.
+
+## Reproduce
+
+```bash
+# ladders (GPU2, ~7 h for both, 72 runs, incremental/resumable JSON)
+CUDA_VISIBLE_DEVICES=2 python experiments/depth_utility_ladder.py \
+ --d_hidden 256 --num_blocks 4 --methods bp fa dfa --k_values 0 1 2 3 4 \
+ --seeds 42 123 456 --epochs 100 --gpu 0 --output_dir results/depth_ladder
+CUDA_VISIBLE_DEVICES=2 python experiments/depth_utility_ladder.py \
+ --d_hidden 512 --num_blocks 2 --methods bp fa dfa --k_values 0 1 2 \
+ --seeds 42 123 456 --epochs 100 --gpu 0 --output_dir results/depth_ladder
+# figure + identity check
+python experiments/plot_depth_ladder.py
+CUDA_VISIBLE_DEVICES=2 python experiments/frozen_init_identity_check.py
+```
+
+## Caveats / open items
+
+- Parameter-matched shallow baseline (rule out "it's capacity not depth") not yet run — lower
+ priority; given deep-BP beats frozen by +22–23 pp, the D3 precondition is already safe.
+- FA non-monotonicity (k=1 > k=2 in both configs) is noted but not separately investigated; it
+ does not affect the headline (FA full ≈ or slightly above frozen, ≪ BP).
diff --git a/results/depth_ladder/depth_ladder.png b/results/depth_ladder/depth_ladder.png
new file mode 100644
index 0000000..5fd1f81
--- /dev/null
+++ b/results/depth_ladder/depth_ladder.png
Binary files differ
diff --git a/results/depth_ladder/frozen_init_identity.json b/results/depth_ladder/frozen_init_identity.json
new file mode 100644
index 0000000..1c7048b
--- /dev/null
+++ b/results/depth_ladder/frozen_init_identity.json
@@ -0,0 +1,57 @@
+{
+ "config": {
+ "d_hidden": 256,
+ "L": 4,
+ "num_classes": 10,
+ "batch": 256,
+ "dataset": "cifar10-test",
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ]
+ },
+ "per_seed": {
+ "42": {
+ "per_block_ratio": [
+ 0.09595257043838501,
+ 0.0955488458275795,
+ 0.09637212753295898,
+ 0.09818045794963837
+ ],
+ "rel_dev": 0.1959637552499771,
+ "cos": 0.9811521172523499
+ },
+ "123": {
+ "per_block_ratio": [
+ 0.09584859013557434,
+ 0.09690074622631073,
+ 0.10017187148332596,
+ 0.09818752110004425
+ ],
+ "rel_dev": 0.19837374985218048,
+ "cos": 0.9805399179458618
+ },
+ "456": {
+ "per_block_ratio": [
+ 0.09482444077730179,
+ 0.09799206256866455,
+ 0.09791108965873718,
+ 0.09693857282400131
+ ],
+ "rel_dev": 0.19332122802734375,
+ "cos": 0.9819751381874084
+ }
+ },
+ "per_block_ratio_mean": [
+ 0.09554186711708705,
+ 0.09681388487418492,
+ 0.09815169622500737,
+ 0.09776885062456131
+ ],
+ "per_block_ratio_grand_mean": 0.09706907471021016,
+ "rel_dev_mean": 0.19588624437650046,
+ "rel_dev_std": 0.0025271525773572136,
+ "cos_mean": 0.98122239112854,
+ "cos_std": 0.0007201861555822825
+} \ No newline at end of file
diff --git a/results/depth_ladder/ladder_d256_L4_cifar10.json b/results/depth_ladder/ladder_d256_L4_cifar10.json
new file mode 100644
index 0000000..cfbd363
--- /dev/null
+++ b/results/depth_ladder/ladder_d256_L4_cifar10.json
@@ -0,0 +1,2274 @@
+{
+ "config": {
+ "d_hidden": 256,
+ "num_blocks": 4,
+ "dataset": "cifar10",
+ "methods": [
+ "bp",
+ "fa",
+ "dfa"
+ ],
+ "k_values": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ],
+ "epochs": 100,
+ "lr": 0.001,
+ "wd": 0.01,
+ "batch_size": 128,
+ "gpu": 0,
+ "output_dir": "results/depth_ladder",
+ "num_classes": 10
+ },
+ "results": {
+ "bp": {
+ "0": {
+ "42": {
+ "final_acc": 0.3882,
+ "curve": [
+ [
+ 1,
+ 0.3543
+ ],
+ [
+ 10,
+ 0.3673
+ ],
+ [
+ 20,
+ 0.3483
+ ],
+ [
+ 30,
+ 0.3498
+ ],
+ [
+ 40,
+ 0.3608
+ ],
+ [
+ 50,
+ 0.3627
+ ],
+ [
+ 60,
+ 0.3697
+ ],
+ [
+ 70,
+ 0.3803
+ ],
+ [
+ 80,
+ 0.3821
+ ],
+ [
+ 90,
+ 0.387
+ ],
+ [
+ 100,
+ 0.3882
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3899,
+ "curve": [
+ [
+ 1,
+ 0.3535
+ ],
+ [
+ 10,
+ 0.3654
+ ],
+ [
+ 20,
+ 0.3612
+ ],
+ [
+ 30,
+ 0.3586
+ ],
+ [
+ 40,
+ 0.3633
+ ],
+ [
+ 50,
+ 0.3608
+ ],
+ [
+ 60,
+ 0.3772
+ ],
+ [
+ 70,
+ 0.3791
+ ],
+ [
+ 80,
+ 0.3897
+ ],
+ [
+ 90,
+ 0.3884
+ ],
+ [
+ 100,
+ 0.3899
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3878,
+ "curve": [
+ [
+ 1,
+ 0.3551
+ ],
+ [
+ 10,
+ 0.368
+ ],
+ [
+ 20,
+ 0.3509
+ ],
+ [
+ 30,
+ 0.3655
+ ],
+ [
+ 40,
+ 0.3573
+ ],
+ [
+ 50,
+ 0.3543
+ ],
+ [
+ 60,
+ 0.3716
+ ],
+ [
+ 70,
+ 0.3824
+ ],
+ [
+ 80,
+ 0.3852
+ ],
+ [
+ 90,
+ 0.3891
+ ],
+ [
+ 100,
+ 0.3878
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.5683,
+ "curve": [
+ [
+ 1,
+ 0.3736
+ ],
+ [
+ 10,
+ 0.489
+ ],
+ [
+ 20,
+ 0.5089
+ ],
+ [
+ 30,
+ 0.526
+ ],
+ [
+ 40,
+ 0.5365
+ ],
+ [
+ 50,
+ 0.5486
+ ],
+ [
+ 60,
+ 0.5524
+ ],
+ [
+ 70,
+ 0.5638
+ ],
+ [
+ 80,
+ 0.5666
+ ],
+ [
+ 90,
+ 0.5678
+ ],
+ [
+ 100,
+ 0.5683
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.5623,
+ "curve": [
+ [
+ 1,
+ 0.3878
+ ],
+ [
+ 10,
+ 0.4797
+ ],
+ [
+ 20,
+ 0.5096
+ ],
+ [
+ 30,
+ 0.5209
+ ],
+ [
+ 40,
+ 0.528
+ ],
+ [
+ 50,
+ 0.5486
+ ],
+ [
+ 60,
+ 0.553
+ ],
+ [
+ 70,
+ 0.5564
+ ],
+ [
+ 80,
+ 0.5609
+ ],
+ [
+ 90,
+ 0.5611
+ ],
+ [
+ 100,
+ 0.5623
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.5643,
+ "curve": [
+ [
+ 1,
+ 0.3772
+ ],
+ [
+ 10,
+ 0.4853
+ ],
+ [
+ 20,
+ 0.5098
+ ],
+ [
+ 30,
+ 0.5238
+ ],
+ [
+ 40,
+ 0.5387
+ ],
+ [
+ 50,
+ 0.5488
+ ],
+ [
+ 60,
+ 0.5547
+ ],
+ [
+ 70,
+ 0.5588
+ ],
+ [
+ 80,
+ 0.5636
+ ],
+ [
+ 90,
+ 0.5637
+ ],
+ [
+ 100,
+ 0.5643
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.5994,
+ "curve": [
+ [
+ 1,
+ 0.3874
+ ],
+ [
+ 10,
+ 0.5157
+ ],
+ [
+ 20,
+ 0.5361
+ ],
+ [
+ 30,
+ 0.56
+ ],
+ [
+ 40,
+ 0.5753
+ ],
+ [
+ 50,
+ 0.5802
+ ],
+ [
+ 60,
+ 0.5843
+ ],
+ [
+ 70,
+ 0.5965
+ ],
+ [
+ 80,
+ 0.597
+ ],
+ [
+ 90,
+ 0.5979
+ ],
+ [
+ 100,
+ 0.5994
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.6,
+ "curve": [
+ [
+ 1,
+ 0.3925
+ ],
+ [
+ 10,
+ 0.5148
+ ],
+ [
+ 20,
+ 0.5376
+ ],
+ [
+ 30,
+ 0.5638
+ ],
+ [
+ 40,
+ 0.5693
+ ],
+ [
+ 50,
+ 0.5784
+ ],
+ [
+ 60,
+ 0.5927
+ ],
+ [
+ 70,
+ 0.5911
+ ],
+ [
+ 80,
+ 0.5973
+ ],
+ [
+ 90,
+ 0.5986
+ ],
+ [
+ 100,
+ 0.6
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.5939,
+ "curve": [
+ [
+ 1,
+ 0.3868
+ ],
+ [
+ 10,
+ 0.5103
+ ],
+ [
+ 20,
+ 0.542
+ ],
+ [
+ 30,
+ 0.561
+ ],
+ [
+ 40,
+ 0.5699
+ ],
+ [
+ 50,
+ 0.5789
+ ],
+ [
+ 60,
+ 0.5809
+ ],
+ [
+ 70,
+ 0.5844
+ ],
+ [
+ 80,
+ 0.5919
+ ],
+ [
+ 90,
+ 0.5919
+ ],
+ [
+ 100,
+ 0.5939
+ ]
+ ]
+ }
+ },
+ "3": {
+ "42": {
+ "final_acc": 0.6079,
+ "curve": [
+ [
+ 1,
+ 0.3904
+ ],
+ [
+ 10,
+ 0.5218
+ ],
+ [
+ 20,
+ 0.5469
+ ],
+ [
+ 30,
+ 0.5749
+ ],
+ [
+ 40,
+ 0.5935
+ ],
+ [
+ 50,
+ 0.595
+ ],
+ [
+ 60,
+ 0.5983
+ ],
+ [
+ 70,
+ 0.6015
+ ],
+ [
+ 80,
+ 0.607
+ ],
+ [
+ 90,
+ 0.6057
+ ],
+ [
+ 100,
+ 0.6079
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.6069,
+ "curve": [
+ [
+ 1,
+ 0.3965
+ ],
+ [
+ 10,
+ 0.524
+ ],
+ [
+ 20,
+ 0.5517
+ ],
+ [
+ 30,
+ 0.5747
+ ],
+ [
+ 40,
+ 0.5774
+ ],
+ [
+ 50,
+ 0.5927
+ ],
+ [
+ 60,
+ 0.6035
+ ],
+ [
+ 70,
+ 0.603
+ ],
+ [
+ 80,
+ 0.6057
+ ],
+ [
+ 90,
+ 0.6073
+ ],
+ [
+ 100,
+ 0.6069
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.608,
+ "curve": [
+ [
+ 1,
+ 0.3947
+ ],
+ [
+ 10,
+ 0.5148
+ ],
+ [
+ 20,
+ 0.5536
+ ],
+ [
+ 30,
+ 0.5723
+ ],
+ [
+ 40,
+ 0.5873
+ ],
+ [
+ 50,
+ 0.5861
+ ],
+ [
+ 60,
+ 0.5991
+ ],
+ [
+ 70,
+ 0.5989
+ ],
+ [
+ 80,
+ 0.6062
+ ],
+ [
+ 90,
+ 0.6093
+ ],
+ [
+ 100,
+ 0.608
+ ]
+ ]
+ }
+ },
+ "4": {
+ "42": {
+ "final_acc": 0.6173,
+ "curve": [
+ [
+ 1,
+ 0.3936
+ ],
+ [
+ 10,
+ 0.5235
+ ],
+ [
+ 20,
+ 0.5606
+ ],
+ [
+ 30,
+ 0.5794
+ ],
+ [
+ 40,
+ 0.5992
+ ],
+ [
+ 50,
+ 0.6044
+ ],
+ [
+ 60,
+ 0.5979
+ ],
+ [
+ 70,
+ 0.6115
+ ],
+ [
+ 80,
+ 0.6153
+ ],
+ [
+ 90,
+ 0.6177
+ ],
+ [
+ 100,
+ 0.6173
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.6182,
+ "curve": [
+ [
+ 1,
+ 0.3981
+ ],
+ [
+ 10,
+ 0.5257
+ ],
+ [
+ 20,
+ 0.558
+ ],
+ [
+ 30,
+ 0.5779
+ ],
+ [
+ 40,
+ 0.5896
+ ],
+ [
+ 50,
+ 0.6023
+ ],
+ [
+ 60,
+ 0.6053
+ ],
+ [
+ 70,
+ 0.6081
+ ],
+ [
+ 80,
+ 0.6185
+ ],
+ [
+ 90,
+ 0.6174
+ ],
+ [
+ 100,
+ 0.6182
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.6145,
+ "curve": [
+ [
+ 1,
+ 0.3967
+ ],
+ [
+ 10,
+ 0.5255
+ ],
+ [
+ 20,
+ 0.5632
+ ],
+ [
+ 30,
+ 0.5747
+ ],
+ [
+ 40,
+ 0.5948
+ ],
+ [
+ 50,
+ 0.5954
+ ],
+ [
+ 60,
+ 0.6092
+ ],
+ [
+ 70,
+ 0.614
+ ],
+ [
+ 80,
+ 0.6125
+ ],
+ [
+ 90,
+ 0.6145
+ ],
+ [
+ 100,
+ 0.6145
+ ]
+ ]
+ }
+ }
+ },
+ "fa": {
+ "0": {
+ "42": {
+ "final_acc": 0.3555,
+ "curve": [
+ [
+ 1,
+ 0.3112
+ ],
+ [
+ 10,
+ 0.3389
+ ],
+ [
+ 20,
+ 0.3325
+ ],
+ [
+ 30,
+ 0.3495
+ ],
+ [
+ 40,
+ 0.3467
+ ],
+ [
+ 50,
+ 0.3465
+ ],
+ [
+ 60,
+ 0.3573
+ ],
+ [
+ 70,
+ 0.3542
+ ],
+ [
+ 80,
+ 0.3567
+ ],
+ [
+ 90,
+ 0.3554
+ ],
+ [
+ 100,
+ 0.3555
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.352,
+ "curve": [
+ [
+ 1,
+ 0.3257
+ ],
+ [
+ 10,
+ 0.3409
+ ],
+ [
+ 20,
+ 0.3514
+ ],
+ [
+ 30,
+ 0.3357
+ ],
+ [
+ 40,
+ 0.3299
+ ],
+ [
+ 50,
+ 0.3495
+ ],
+ [
+ 60,
+ 0.3468
+ ],
+ [
+ 70,
+ 0.3548
+ ],
+ [
+ 80,
+ 0.3509
+ ],
+ [
+ 90,
+ 0.3536
+ ],
+ [
+ 100,
+ 0.352
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3578,
+ "curve": [
+ [
+ 1,
+ 0.3172
+ ],
+ [
+ 10,
+ 0.3374
+ ],
+ [
+ 20,
+ 0.3452
+ ],
+ [
+ 30,
+ 0.3431
+ ],
+ [
+ 40,
+ 0.3468
+ ],
+ [
+ 50,
+ 0.3563
+ ],
+ [
+ 60,
+ 0.3523
+ ],
+ [
+ 70,
+ 0.3578
+ ],
+ [
+ 80,
+ 0.3568
+ ],
+ [
+ 90,
+ 0.3576
+ ],
+ [
+ 100,
+ 0.3578
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.3819,
+ "curve": [
+ [
+ 1,
+ 0.2886
+ ],
+ [
+ 10,
+ 0.3301
+ ],
+ [
+ 20,
+ 0.3604
+ ],
+ [
+ 30,
+ 0.3595
+ ],
+ [
+ 40,
+ 0.3678
+ ],
+ [
+ 50,
+ 0.3779
+ ],
+ [
+ 60,
+ 0.3727
+ ],
+ [
+ 70,
+ 0.381
+ ],
+ [
+ 80,
+ 0.381
+ ],
+ [
+ 90,
+ 0.3821
+ ],
+ [
+ 100,
+ 0.3819
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3742,
+ "curve": [
+ [
+ 1,
+ 0.3105
+ ],
+ [
+ 10,
+ 0.3472
+ ],
+ [
+ 20,
+ 0.3444
+ ],
+ [
+ 30,
+ 0.3604
+ ],
+ [
+ 40,
+ 0.3615
+ ],
+ [
+ 50,
+ 0.3568
+ ],
+ [
+ 60,
+ 0.3708
+ ],
+ [
+ 70,
+ 0.3723
+ ],
+ [
+ 80,
+ 0.3749
+ ],
+ [
+ 90,
+ 0.3736
+ ],
+ [
+ 100,
+ 0.3742
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3898,
+ "curve": [
+ [
+ 1,
+ 0.2975
+ ],
+ [
+ 10,
+ 0.3481
+ ],
+ [
+ 20,
+ 0.3454
+ ],
+ [
+ 30,
+ 0.3683
+ ],
+ [
+ 40,
+ 0.3618
+ ],
+ [
+ 50,
+ 0.3675
+ ],
+ [
+ 60,
+ 0.3826
+ ],
+ [
+ 70,
+ 0.3867
+ ],
+ [
+ 80,
+ 0.3863
+ ],
+ [
+ 90,
+ 0.3899
+ ],
+ [
+ 100,
+ 0.3898
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.3305,
+ "curve": [
+ [
+ 1,
+ 0.2657
+ ],
+ [
+ 10,
+ 0.3431
+ ],
+ [
+ 20,
+ 0.3494
+ ],
+ [
+ 30,
+ 0.3436
+ ],
+ [
+ 40,
+ 0.3574
+ ],
+ [
+ 50,
+ 0.3388
+ ],
+ [
+ 60,
+ 0.3426
+ ],
+ [
+ 70,
+ 0.3341
+ ],
+ [
+ 80,
+ 0.3303
+ ],
+ [
+ 90,
+ 0.331
+ ],
+ [
+ 100,
+ 0.3305
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3607,
+ "curve": [
+ [
+ 1,
+ 0.2982
+ ],
+ [
+ 10,
+ 0.3524
+ ],
+ [
+ 20,
+ 0.3694
+ ],
+ [
+ 30,
+ 0.3691
+ ],
+ [
+ 40,
+ 0.3703
+ ],
+ [
+ 50,
+ 0.3605
+ ],
+ [
+ 60,
+ 0.3546
+ ],
+ [
+ 70,
+ 0.3547
+ ],
+ [
+ 80,
+ 0.3651
+ ],
+ [
+ 90,
+ 0.3565
+ ],
+ [
+ 100,
+ 0.3607
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3548,
+ "curve": [
+ [
+ 1,
+ 0.2753
+ ],
+ [
+ 10,
+ 0.3386
+ ],
+ [
+ 20,
+ 0.3495
+ ],
+ [
+ 30,
+ 0.3458
+ ],
+ [
+ 40,
+ 0.3374
+ ],
+ [
+ 50,
+ 0.3333
+ ],
+ [
+ 60,
+ 0.3523
+ ],
+ [
+ 70,
+ 0.3538
+ ],
+ [
+ 80,
+ 0.3519
+ ],
+ [
+ 90,
+ 0.3555
+ ],
+ [
+ 100,
+ 0.3548
+ ]
+ ]
+ }
+ },
+ "3": {
+ "42": {
+ "final_acc": 0.393,
+ "curve": [
+ [
+ 1,
+ 0.277
+ ],
+ [
+ 10,
+ 0.3554
+ ],
+ [
+ 20,
+ 0.3681
+ ],
+ [
+ 30,
+ 0.3841
+ ],
+ [
+ 40,
+ 0.3829
+ ],
+ [
+ 50,
+ 0.3847
+ ],
+ [
+ 60,
+ 0.3885
+ ],
+ [
+ 70,
+ 0.3956
+ ],
+ [
+ 80,
+ 0.3947
+ ],
+ [
+ 90,
+ 0.3916
+ ],
+ [
+ 100,
+ 0.393
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.4074,
+ "curve": [
+ [
+ 1,
+ 0.2905
+ ],
+ [
+ 10,
+ 0.3495
+ ],
+ [
+ 20,
+ 0.3804
+ ],
+ [
+ 30,
+ 0.382
+ ],
+ [
+ 40,
+ 0.3885
+ ],
+ [
+ 50,
+ 0.395
+ ],
+ [
+ 60,
+ 0.3971
+ ],
+ [
+ 70,
+ 0.4049
+ ],
+ [
+ 80,
+ 0.4047
+ ],
+ [
+ 90,
+ 0.4075
+ ],
+ [
+ 100,
+ 0.4074
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3946,
+ "curve": [
+ [
+ 1,
+ 0.2708
+ ],
+ [
+ 10,
+ 0.3511
+ ],
+ [
+ 20,
+ 0.3662
+ ],
+ [
+ 30,
+ 0.3755
+ ],
+ [
+ 40,
+ 0.3818
+ ],
+ [
+ 50,
+ 0.3828
+ ],
+ [
+ 60,
+ 0.3966
+ ],
+ [
+ 70,
+ 0.3939
+ ],
+ [
+ 80,
+ 0.3928
+ ],
+ [
+ 90,
+ 0.3933
+ ],
+ [
+ 100,
+ 0.3946
+ ]
+ ]
+ }
+ },
+ "4": {
+ "42": {
+ "final_acc": 0.3959,
+ "curve": [
+ [
+ 1,
+ 0.2789
+ ],
+ [
+ 10,
+ 0.3498
+ ],
+ [
+ 20,
+ 0.3601
+ ],
+ [
+ 30,
+ 0.371
+ ],
+ [
+ 40,
+ 0.3834
+ ],
+ [
+ 50,
+ 0.3923
+ ],
+ [
+ 60,
+ 0.3912
+ ],
+ [
+ 70,
+ 0.3945
+ ],
+ [
+ 80,
+ 0.3957
+ ],
+ [
+ 90,
+ 0.3944
+ ],
+ [
+ 100,
+ 0.3959
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.4122,
+ "curve": [
+ [
+ 1,
+ 0.2905
+ ],
+ [
+ 10,
+ 0.3596
+ ],
+ [
+ 20,
+ 0.3803
+ ],
+ [
+ 30,
+ 0.3792
+ ],
+ [
+ 40,
+ 0.3955
+ ],
+ [
+ 50,
+ 0.398
+ ],
+ [
+ 60,
+ 0.4071
+ ],
+ [
+ 70,
+ 0.4034
+ ],
+ [
+ 80,
+ 0.4076
+ ],
+ [
+ 90,
+ 0.4115
+ ],
+ [
+ 100,
+ 0.4122
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3987,
+ "curve": [
+ [
+ 1,
+ 0.2713
+ ],
+ [
+ 10,
+ 0.3544
+ ],
+ [
+ 20,
+ 0.3702
+ ],
+ [
+ 30,
+ 0.3799
+ ],
+ [
+ 40,
+ 0.3845
+ ],
+ [
+ 50,
+ 0.3923
+ ],
+ [
+ 60,
+ 0.3992
+ ],
+ [
+ 70,
+ 0.3974
+ ],
+ [
+ 80,
+ 0.399
+ ],
+ [
+ 90,
+ 0.4
+ ],
+ [
+ 100,
+ 0.3987
+ ]
+ ]
+ }
+ }
+ },
+ "dfa": {
+ "0": {
+ "42": {
+ "final_acc": 0.3454,
+ "curve": [
+ [
+ 1,
+ 0.3185
+ ],
+ [
+ 10,
+ 0.337
+ ],
+ [
+ 20,
+ 0.3458
+ ],
+ [
+ 30,
+ 0.3425
+ ],
+ [
+ 40,
+ 0.3419
+ ],
+ [
+ 50,
+ 0.3425
+ ],
+ [
+ 60,
+ 0.342
+ ],
+ [
+ 70,
+ 0.3466
+ ],
+ [
+ 80,
+ 0.3458
+ ],
+ [
+ 90,
+ 0.347
+ ],
+ [
+ 100,
+ 0.3454
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3498,
+ "curve": [
+ [
+ 1,
+ 0.3219
+ ],
+ [
+ 10,
+ 0.3339
+ ],
+ [
+ 20,
+ 0.3453
+ ],
+ [
+ 30,
+ 0.3352
+ ],
+ [
+ 40,
+ 0.3322
+ ],
+ [
+ 50,
+ 0.3291
+ ],
+ [
+ 60,
+ 0.3428
+ ],
+ [
+ 70,
+ 0.3447
+ ],
+ [
+ 80,
+ 0.3465
+ ],
+ [
+ 90,
+ 0.3464
+ ],
+ [
+ 100,
+ 0.3498
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3516,
+ "curve": [
+ [
+ 1,
+ 0.3241
+ ],
+ [
+ 10,
+ 0.3486
+ ],
+ [
+ 20,
+ 0.3396
+ ],
+ [
+ 30,
+ 0.3396
+ ],
+ [
+ 40,
+ 0.3387
+ ],
+ [
+ 50,
+ 0.3456
+ ],
+ [
+ 60,
+ 0.3508
+ ],
+ [
+ 70,
+ 0.3527
+ ],
+ [
+ 80,
+ 0.3498
+ ],
+ [
+ 90,
+ 0.3508
+ ],
+ [
+ 100,
+ 0.3516
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.2267,
+ "curve": [
+ [
+ 1,
+ 0.2563
+ ],
+ [
+ 10,
+ 0.258
+ ],
+ [
+ 20,
+ 0.2445
+ ],
+ [
+ 30,
+ 0.2197
+ ],
+ [
+ 40,
+ 0.2229
+ ],
+ [
+ 50,
+ 0.1952
+ ],
+ [
+ 60,
+ 0.2306
+ ],
+ [
+ 70,
+ 0.229
+ ],
+ [
+ 80,
+ 0.2211
+ ],
+ [
+ 90,
+ 0.2215
+ ],
+ [
+ 100,
+ 0.2267
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.2563,
+ "curve": [
+ [
+ 1,
+ 0.2549
+ ],
+ [
+ 10,
+ 0.2505
+ ],
+ [
+ 20,
+ 0.2453
+ ],
+ [
+ 30,
+ 0.2358
+ ],
+ [
+ 40,
+ 0.2499
+ ],
+ [
+ 50,
+ 0.2506
+ ],
+ [
+ 60,
+ 0.2467
+ ],
+ [
+ 70,
+ 0.2513
+ ],
+ [
+ 80,
+ 0.2597
+ ],
+ [
+ 90,
+ 0.2586
+ ],
+ [
+ 100,
+ 0.2563
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2476,
+ "curve": [
+ [
+ 1,
+ 0.2112
+ ],
+ [
+ 10,
+ 0.2227
+ ],
+ [
+ 20,
+ 0.2397
+ ],
+ [
+ 30,
+ 0.2326
+ ],
+ [
+ 40,
+ 0.2285
+ ],
+ [
+ 50,
+ 0.2176
+ ],
+ [
+ 60,
+ 0.2431
+ ],
+ [
+ 70,
+ 0.2476
+ ],
+ [
+ 80,
+ 0.2493
+ ],
+ [
+ 90,
+ 0.2477
+ ],
+ [
+ 100,
+ 0.2476
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.3005,
+ "curve": [
+ [
+ 1,
+ 0.2792
+ ],
+ [
+ 10,
+ 0.2893
+ ],
+ [
+ 20,
+ 0.2978
+ ],
+ [
+ 30,
+ 0.296
+ ],
+ [
+ 40,
+ 0.301
+ ],
+ [
+ 50,
+ 0.3014
+ ],
+ [
+ 60,
+ 0.3005
+ ],
+ [
+ 70,
+ 0.3036
+ ],
+ [
+ 80,
+ 0.2997
+ ],
+ [
+ 90,
+ 0.3005
+ ],
+ [
+ 100,
+ 0.3005
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.2819,
+ "curve": [
+ [
+ 1,
+ 0.2671
+ ],
+ [
+ 10,
+ 0.2947
+ ],
+ [
+ 20,
+ 0.2841
+ ],
+ [
+ 30,
+ 0.2801
+ ],
+ [
+ 40,
+ 0.2819
+ ],
+ [
+ 50,
+ 0.2772
+ ],
+ [
+ 60,
+ 0.2834
+ ],
+ [
+ 70,
+ 0.2876
+ ],
+ [
+ 80,
+ 0.2757
+ ],
+ [
+ 90,
+ 0.2806
+ ],
+ [
+ 100,
+ 0.2819
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2751,
+ "curve": [
+ [
+ 1,
+ 0.2604
+ ],
+ [
+ 10,
+ 0.2821
+ ],
+ [
+ 20,
+ 0.2784
+ ],
+ [
+ 30,
+ 0.2826
+ ],
+ [
+ 40,
+ 0.2805
+ ],
+ [
+ 50,
+ 0.2675
+ ],
+ [
+ 60,
+ 0.2735
+ ],
+ [
+ 70,
+ 0.2765
+ ],
+ [
+ 80,
+ 0.2735
+ ],
+ [
+ 90,
+ 0.2759
+ ],
+ [
+ 100,
+ 0.2751
+ ]
+ ]
+ }
+ },
+ "3": {
+ "42": {
+ "final_acc": 0.3047,
+ "curve": [
+ [
+ 1,
+ 0.2821
+ ],
+ [
+ 10,
+ 0.2882
+ ],
+ [
+ 20,
+ 0.2921
+ ],
+ [
+ 30,
+ 0.3064
+ ],
+ [
+ 40,
+ 0.3009
+ ],
+ [
+ 50,
+ 0.3044
+ ],
+ [
+ 60,
+ 0.3041
+ ],
+ [
+ 70,
+ 0.3075
+ ],
+ [
+ 80,
+ 0.3064
+ ],
+ [
+ 90,
+ 0.3021
+ ],
+ [
+ 100,
+ 0.3047
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.2906,
+ "curve": [
+ [
+ 1,
+ 0.263
+ ],
+ [
+ 10,
+ 0.291
+ ],
+ [
+ 20,
+ 0.2845
+ ],
+ [
+ 30,
+ 0.2821
+ ],
+ [
+ 40,
+ 0.29
+ ],
+ [
+ 50,
+ 0.2811
+ ],
+ [
+ 60,
+ 0.286
+ ],
+ [
+ 70,
+ 0.291
+ ],
+ [
+ 80,
+ 0.2879
+ ],
+ [
+ 90,
+ 0.291
+ ],
+ [
+ 100,
+ 0.2906
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2919,
+ "curve": [
+ [
+ 1,
+ 0.2544
+ ],
+ [
+ 10,
+ 0.2841
+ ],
+ [
+ 20,
+ 0.2892
+ ],
+ [
+ 30,
+ 0.2998
+ ],
+ [
+ 40,
+ 0.2891
+ ],
+ [
+ 50,
+ 0.2844
+ ],
+ [
+ 60,
+ 0.2938
+ ],
+ [
+ 70,
+ 0.2928
+ ],
+ [
+ 80,
+ 0.2901
+ ],
+ [
+ 90,
+ 0.2932
+ ],
+ [
+ 100,
+ 0.2919
+ ]
+ ]
+ }
+ },
+ "4": {
+ "42": {
+ "final_acc": 0.3068,
+ "curve": [
+ [
+ 1,
+ 0.2899
+ ],
+ [
+ 10,
+ 0.2873
+ ],
+ [
+ 20,
+ 0.3016
+ ],
+ [
+ 30,
+ 0.3053
+ ],
+ [
+ 40,
+ 0.312
+ ],
+ [
+ 50,
+ 0.3045
+ ],
+ [
+ 60,
+ 0.3071
+ ],
+ [
+ 70,
+ 0.3102
+ ],
+ [
+ 80,
+ 0.308
+ ],
+ [
+ 90,
+ 0.3066
+ ],
+ [
+ 100,
+ 0.3068
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3023,
+ "curve": [
+ [
+ 1,
+ 0.2683
+ ],
+ [
+ 10,
+ 0.2926
+ ],
+ [
+ 20,
+ 0.2861
+ ],
+ [
+ 30,
+ 0.2875
+ ],
+ [
+ 40,
+ 0.2978
+ ],
+ [
+ 50,
+ 0.291
+ ],
+ [
+ 60,
+ 0.2972
+ ],
+ [
+ 70,
+ 0.3011
+ ],
+ [
+ 80,
+ 0.2974
+ ],
+ [
+ 90,
+ 0.3015
+ ],
+ [
+ 100,
+ 0.3023
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2949,
+ "curve": [
+ [
+ 1,
+ 0.2591
+ ],
+ [
+ 10,
+ 0.2883
+ ],
+ [
+ 20,
+ 0.2948
+ ],
+ [
+ 30,
+ 0.2995
+ ],
+ [
+ 40,
+ 0.2921
+ ],
+ [
+ 50,
+ 0.2956
+ ],
+ [
+ 60,
+ 0.296
+ ],
+ [
+ 70,
+ 0.2943
+ ],
+ [
+ 80,
+ 0.291
+ ],
+ [
+ 90,
+ 0.2955
+ ],
+ [
+ 100,
+ 0.2949
+ ]
+ ]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/results/depth_ladder/ladder_d512_L2_cifar10.json b/results/depth_ladder/ladder_d512_L2_cifar10.json
new file mode 100644
index 0000000..4a3feff
--- /dev/null
+++ b/results/depth_ladder/ladder_d512_L2_cifar10.json
@@ -0,0 +1,1378 @@
+{
+ "config": {
+ "d_hidden": 512,
+ "num_blocks": 2,
+ "dataset": "cifar10",
+ "methods": [
+ "bp",
+ "fa",
+ "dfa"
+ ],
+ "k_values": [
+ 0,
+ 1,
+ 2
+ ],
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ],
+ "epochs": 100,
+ "lr": 0.001,
+ "wd": 0.01,
+ "batch_size": 128,
+ "gpu": 0,
+ "output_dir": "results/depth_ladder",
+ "num_classes": 10
+ },
+ "results": {
+ "bp": {
+ "0": {
+ "42": {
+ "final_acc": 0.3891,
+ "curve": [
+ [
+ 1,
+ 0.3462
+ ],
+ [
+ 10,
+ 0.3633
+ ],
+ [
+ 20,
+ 0.3635
+ ],
+ [
+ 30,
+ 0.3543
+ ],
+ [
+ 40,
+ 0.3673
+ ],
+ [
+ 50,
+ 0.3633
+ ],
+ [
+ 60,
+ 0.3695
+ ],
+ [
+ 70,
+ 0.3753
+ ],
+ [
+ 80,
+ 0.3858
+ ],
+ [
+ 90,
+ 0.3887
+ ],
+ [
+ 100,
+ 0.3891
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3846,
+ "curve": [
+ [
+ 1,
+ 0.3497
+ ],
+ [
+ 10,
+ 0.3704
+ ],
+ [
+ 20,
+ 0.3698
+ ],
+ [
+ 30,
+ 0.354
+ ],
+ [
+ 40,
+ 0.3505
+ ],
+ [
+ 50,
+ 0.3634
+ ],
+ [
+ 60,
+ 0.3675
+ ],
+ [
+ 70,
+ 0.3739
+ ],
+ [
+ 80,
+ 0.3823
+ ],
+ [
+ 90,
+ 0.3845
+ ],
+ [
+ 100,
+ 0.3846
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3838,
+ "curve": [
+ [
+ 1,
+ 0.3409
+ ],
+ [
+ 10,
+ 0.3578
+ ],
+ [
+ 20,
+ 0.3767
+ ],
+ [
+ 30,
+ 0.3607
+ ],
+ [
+ 40,
+ 0.3551
+ ],
+ [
+ 50,
+ 0.3632
+ ],
+ [
+ 60,
+ 0.3722
+ ],
+ [
+ 70,
+ 0.3704
+ ],
+ [
+ 80,
+ 0.3784
+ ],
+ [
+ 90,
+ 0.3834
+ ],
+ [
+ 100,
+ 0.3838
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.5856,
+ "curve": [
+ [
+ 1,
+ 0.3667
+ ],
+ [
+ 10,
+ 0.4836
+ ],
+ [
+ 20,
+ 0.5197
+ ],
+ [
+ 30,
+ 0.5367
+ ],
+ [
+ 40,
+ 0.5444
+ ],
+ [
+ 50,
+ 0.5629
+ ],
+ [
+ 60,
+ 0.5691
+ ],
+ [
+ 70,
+ 0.5779
+ ],
+ [
+ 80,
+ 0.5808
+ ],
+ [
+ 90,
+ 0.5849
+ ],
+ [
+ 100,
+ 0.5856
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.5819,
+ "curve": [
+ [
+ 1,
+ 0.3632
+ ],
+ [
+ 10,
+ 0.4865
+ ],
+ [
+ 20,
+ 0.5175
+ ],
+ [
+ 30,
+ 0.536
+ ],
+ [
+ 40,
+ 0.5466
+ ],
+ [
+ 50,
+ 0.5606
+ ],
+ [
+ 60,
+ 0.5716
+ ],
+ [
+ 70,
+ 0.5749
+ ],
+ [
+ 80,
+ 0.5806
+ ],
+ [
+ 90,
+ 0.5817
+ ],
+ [
+ 100,
+ 0.5819
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.5809,
+ "curve": [
+ [
+ 1,
+ 0.3696
+ ],
+ [
+ 10,
+ 0.4737
+ ],
+ [
+ 20,
+ 0.5199
+ ],
+ [
+ 30,
+ 0.5317
+ ],
+ [
+ 40,
+ 0.5498
+ ],
+ [
+ 50,
+ 0.561
+ ],
+ [
+ 60,
+ 0.5675
+ ],
+ [
+ 70,
+ 0.5767
+ ],
+ [
+ 80,
+ 0.5785
+ ],
+ [
+ 90,
+ 0.5802
+ ],
+ [
+ 100,
+ 0.5809
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.6039,
+ "curve": [
+ [
+ 1,
+ 0.379
+ ],
+ [
+ 10,
+ 0.5174
+ ],
+ [
+ 20,
+ 0.5471
+ ],
+ [
+ 30,
+ 0.5712
+ ],
+ [
+ 40,
+ 0.5906
+ ],
+ [
+ 50,
+ 0.5969
+ ],
+ [
+ 60,
+ 0.5977
+ ],
+ [
+ 70,
+ 0.5992
+ ],
+ [
+ 80,
+ 0.6072
+ ],
+ [
+ 90,
+ 0.6037
+ ],
+ [
+ 100,
+ 0.6039
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.602,
+ "curve": [
+ [
+ 1,
+ 0.3732
+ ],
+ [
+ 10,
+ 0.5161
+ ],
+ [
+ 20,
+ 0.5554
+ ],
+ [
+ 30,
+ 0.5756
+ ],
+ [
+ 40,
+ 0.5811
+ ],
+ [
+ 50,
+ 0.5928
+ ],
+ [
+ 60,
+ 0.5965
+ ],
+ [
+ 70,
+ 0.6016
+ ],
+ [
+ 80,
+ 0.6027
+ ],
+ [
+ 90,
+ 0.6007
+ ],
+ [
+ 100,
+ 0.602
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.6045,
+ "curve": [
+ [
+ 1,
+ 0.3768
+ ],
+ [
+ 10,
+ 0.5097
+ ],
+ [
+ 20,
+ 0.5499
+ ],
+ [
+ 30,
+ 0.5773
+ ],
+ [
+ 40,
+ 0.5858
+ ],
+ [
+ 50,
+ 0.5845
+ ],
+ [
+ 60,
+ 0.5934
+ ],
+ [
+ 70,
+ 0.5985
+ ],
+ [
+ 80,
+ 0.6011
+ ],
+ [
+ 90,
+ 0.602
+ ],
+ [
+ 100,
+ 0.6045
+ ]
+ ]
+ }
+ }
+ },
+ "fa": {
+ "0": {
+ "42": {
+ "final_acc": 0.3585,
+ "curve": [
+ [
+ 1,
+ 0.3288
+ ],
+ [
+ 10,
+ 0.3359
+ ],
+ [
+ 20,
+ 0.3336
+ ],
+ [
+ 30,
+ 0.3328
+ ],
+ [
+ 40,
+ 0.3418
+ ],
+ [
+ 50,
+ 0.3504
+ ],
+ [
+ 60,
+ 0.3564
+ ],
+ [
+ 70,
+ 0.3567
+ ],
+ [
+ 80,
+ 0.3543
+ ],
+ [
+ 90,
+ 0.3574
+ ],
+ [
+ 100,
+ 0.3585
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3584,
+ "curve": [
+ [
+ 1,
+ 0.3125
+ ],
+ [
+ 10,
+ 0.3374
+ ],
+ [
+ 20,
+ 0.3364
+ ],
+ [
+ 30,
+ 0.3453
+ ],
+ [
+ 40,
+ 0.3437
+ ],
+ [
+ 50,
+ 0.3522
+ ],
+ [
+ 60,
+ 0.3587
+ ],
+ [
+ 70,
+ 0.355
+ ],
+ [
+ 80,
+ 0.3551
+ ],
+ [
+ 90,
+ 0.3558
+ ],
+ [
+ 100,
+ 0.3584
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.359,
+ "curve": [
+ [
+ 1,
+ 0.318
+ ],
+ [
+ 10,
+ 0.3311
+ ],
+ [
+ 20,
+ 0.3344
+ ],
+ [
+ 30,
+ 0.3533
+ ],
+ [
+ 40,
+ 0.3476
+ ],
+ [
+ 50,
+ 0.3523
+ ],
+ [
+ 60,
+ 0.3455
+ ],
+ [
+ 70,
+ 0.3569
+ ],
+ [
+ 80,
+ 0.3562
+ ],
+ [
+ 90,
+ 0.3583
+ ],
+ [
+ 100,
+ 0.359
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.4083,
+ "curve": [
+ [
+ 1,
+ 0.3235
+ ],
+ [
+ 10,
+ 0.373
+ ],
+ [
+ 20,
+ 0.3734
+ ],
+ [
+ 30,
+ 0.3829
+ ],
+ [
+ 40,
+ 0.3916
+ ],
+ [
+ 50,
+ 0.4008
+ ],
+ [
+ 60,
+ 0.4012
+ ],
+ [
+ 70,
+ 0.4015
+ ],
+ [
+ 80,
+ 0.4042
+ ],
+ [
+ 90,
+ 0.4082
+ ],
+ [
+ 100,
+ 0.4083
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.4134,
+ "curve": [
+ [
+ 1,
+ 0.293
+ ],
+ [
+ 10,
+ 0.3662
+ ],
+ [
+ 20,
+ 0.3905
+ ],
+ [
+ 30,
+ 0.4027
+ ],
+ [
+ 40,
+ 0.3948
+ ],
+ [
+ 50,
+ 0.4048
+ ],
+ [
+ 60,
+ 0.4067
+ ],
+ [
+ 70,
+ 0.4094
+ ],
+ [
+ 80,
+ 0.4115
+ ],
+ [
+ 90,
+ 0.4103
+ ],
+ [
+ 100,
+ 0.4134
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.4155,
+ "curve": [
+ [
+ 1,
+ 0.3098
+ ],
+ [
+ 10,
+ 0.3561
+ ],
+ [
+ 20,
+ 0.386
+ ],
+ [
+ 30,
+ 0.3957
+ ],
+ [
+ 40,
+ 0.3907
+ ],
+ [
+ 50,
+ 0.4032
+ ],
+ [
+ 60,
+ 0.4017
+ ],
+ [
+ 70,
+ 0.4125
+ ],
+ [
+ 80,
+ 0.4123
+ ],
+ [
+ 90,
+ 0.4164
+ ],
+ [
+ 100,
+ 0.4155
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.3582,
+ "curve": [
+ [
+ 1,
+ 0.3028
+ ],
+ [
+ 10,
+ 0.3585
+ ],
+ [
+ 20,
+ 0.3523
+ ],
+ [
+ 30,
+ 0.3315
+ ],
+ [
+ 40,
+ 0.3191
+ ],
+ [
+ 50,
+ 0.3397
+ ],
+ [
+ 60,
+ 0.3566
+ ],
+ [
+ 70,
+ 0.3527
+ ],
+ [
+ 80,
+ 0.3554
+ ],
+ [
+ 90,
+ 0.3593
+ ],
+ [
+ 100,
+ 0.3582
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3621,
+ "curve": [
+ [
+ 1,
+ 0.2794
+ ],
+ [
+ 10,
+ 0.3627
+ ],
+ [
+ 20,
+ 0.36
+ ],
+ [
+ 30,
+ 0.375
+ ],
+ [
+ 40,
+ 0.3482
+ ],
+ [
+ 50,
+ 0.3679
+ ],
+ [
+ 60,
+ 0.363
+ ],
+ [
+ 70,
+ 0.3643
+ ],
+ [
+ 80,
+ 0.3636
+ ],
+ [
+ 90,
+ 0.3618
+ ],
+ [
+ 100,
+ 0.3621
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3642,
+ "curve": [
+ [
+ 1,
+ 0.3005
+ ],
+ [
+ 10,
+ 0.3573
+ ],
+ [
+ 20,
+ 0.3624
+ ],
+ [
+ 30,
+ 0.3706
+ ],
+ [
+ 40,
+ 0.3529
+ ],
+ [
+ 50,
+ 0.3648
+ ],
+ [
+ 60,
+ 0.3581
+ ],
+ [
+ 70,
+ 0.3645
+ ],
+ [
+ 80,
+ 0.3652
+ ],
+ [
+ 90,
+ 0.3632
+ ],
+ [
+ 100,
+ 0.3642
+ ]
+ ]
+ }
+ }
+ },
+ "dfa": {
+ "0": {
+ "42": {
+ "final_acc": 0.3432,
+ "curve": [
+ [
+ 1,
+ 0.3196
+ ],
+ [
+ 10,
+ 0.3187
+ ],
+ [
+ 20,
+ 0.3369
+ ],
+ [
+ 30,
+ 0.3221
+ ],
+ [
+ 40,
+ 0.3386
+ ],
+ [
+ 50,
+ 0.3401
+ ],
+ [
+ 60,
+ 0.3473
+ ],
+ [
+ 70,
+ 0.3472
+ ],
+ [
+ 80,
+ 0.3426
+ ],
+ [
+ 90,
+ 0.3445
+ ],
+ [
+ 100,
+ 0.3432
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3508,
+ "curve": [
+ [
+ 1,
+ 0.3089
+ ],
+ [
+ 10,
+ 0.318
+ ],
+ [
+ 20,
+ 0.3301
+ ],
+ [
+ 30,
+ 0.3434
+ ],
+ [
+ 40,
+ 0.3386
+ ],
+ [
+ 50,
+ 0.3343
+ ],
+ [
+ 60,
+ 0.3489
+ ],
+ [
+ 70,
+ 0.3458
+ ],
+ [
+ 80,
+ 0.3499
+ ],
+ [
+ 90,
+ 0.3508
+ ],
+ [
+ 100,
+ 0.3508
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.3521,
+ "curve": [
+ [
+ 1,
+ 0.3238
+ ],
+ [
+ 10,
+ 0.3327
+ ],
+ [
+ 20,
+ 0.3395
+ ],
+ [
+ 30,
+ 0.3457
+ ],
+ [
+ 40,
+ 0.3367
+ ],
+ [
+ 50,
+ 0.3496
+ ],
+ [
+ 60,
+ 0.3453
+ ],
+ [
+ 70,
+ 0.3487
+ ],
+ [
+ 80,
+ 0.3491
+ ],
+ [
+ 90,
+ 0.3498
+ ],
+ [
+ 100,
+ 0.3521
+ ]
+ ]
+ }
+ },
+ "1": {
+ "42": {
+ "final_acc": 0.2384,
+ "curve": [
+ [
+ 1,
+ 0.2687
+ ],
+ [
+ 10,
+ 0.2106
+ ],
+ [
+ 20,
+ 0.2293
+ ],
+ [
+ 30,
+ 0.2297
+ ],
+ [
+ 40,
+ 0.2241
+ ],
+ [
+ 50,
+ 0.2318
+ ],
+ [
+ 60,
+ 0.2417
+ ],
+ [
+ 70,
+ 0.2458
+ ],
+ [
+ 80,
+ 0.2463
+ ],
+ [
+ 90,
+ 0.2438
+ ],
+ [
+ 100,
+ 0.2384
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.2097,
+ "curve": [
+ [
+ 1,
+ 0.1958
+ ],
+ [
+ 10,
+ 0.1777
+ ],
+ [
+ 20,
+ 0.222
+ ],
+ [
+ 30,
+ 0.1852
+ ],
+ [
+ 40,
+ 0.2165
+ ],
+ [
+ 50,
+ 0.2095
+ ],
+ [
+ 60,
+ 0.1995
+ ],
+ [
+ 70,
+ 0.2038
+ ],
+ [
+ 80,
+ 0.2068
+ ],
+ [
+ 90,
+ 0.2173
+ ],
+ [
+ 100,
+ 0.2097
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2295,
+ "curve": [
+ [
+ 1,
+ 0.2118
+ ],
+ [
+ 10,
+ 0.2074
+ ],
+ [
+ 20,
+ 0.1777
+ ],
+ [
+ 30,
+ 0.2043
+ ],
+ [
+ 40,
+ 0.201
+ ],
+ [
+ 50,
+ 0.2087
+ ],
+ [
+ 60,
+ 0.2073
+ ],
+ [
+ 70,
+ 0.2126
+ ],
+ [
+ 80,
+ 0.2202
+ ],
+ [
+ 90,
+ 0.2355
+ ],
+ [
+ 100,
+ 0.2295
+ ]
+ ]
+ }
+ },
+ "2": {
+ "42": {
+ "final_acc": 0.3069,
+ "curve": [
+ [
+ 1,
+ 0.2769
+ ],
+ [
+ 10,
+ 0.2705
+ ],
+ [
+ 20,
+ 0.3
+ ],
+ [
+ 30,
+ 0.2988
+ ],
+ [
+ 40,
+ 0.308
+ ],
+ [
+ 50,
+ 0.2941
+ ],
+ [
+ 60,
+ 0.3025
+ ],
+ [
+ 70,
+ 0.3075
+ ],
+ [
+ 80,
+ 0.307
+ ],
+ [
+ 90,
+ 0.3063
+ ],
+ [
+ 100,
+ 0.3069
+ ]
+ ]
+ },
+ "123": {
+ "final_acc": 0.3025,
+ "curve": [
+ [
+ 1,
+ 0.2582
+ ],
+ [
+ 10,
+ 0.2772
+ ],
+ [
+ 20,
+ 0.2904
+ ],
+ [
+ 30,
+ 0.3072
+ ],
+ [
+ 40,
+ 0.2898
+ ],
+ [
+ 50,
+ 0.2938
+ ],
+ [
+ 60,
+ 0.2892
+ ],
+ [
+ 70,
+ 0.2974
+ ],
+ [
+ 80,
+ 0.297
+ ],
+ [
+ 90,
+ 0.3035
+ ],
+ [
+ 100,
+ 0.3025
+ ]
+ ]
+ },
+ "456": {
+ "final_acc": 0.2963,
+ "curve": [
+ [
+ 1,
+ 0.2794
+ ],
+ [
+ 10,
+ 0.2888
+ ],
+ [
+ 20,
+ 0.2884
+ ],
+ [
+ 30,
+ 0.2901
+ ],
+ [
+ 40,
+ 0.2784
+ ],
+ [
+ 50,
+ 0.2817
+ ],
+ [
+ 60,
+ 0.2983
+ ],
+ [
+ 70,
+ 0.292
+ ],
+ [
+ 80,
+ 0.2904
+ ],
+ [
+ 90,
+ 0.2999
+ ],
+ [
+ 100,
+ 0.2963
+ ]
+ ]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/run_depth_ladder.sh b/run_depth_ladder.sh
new file mode 100755
index 0000000..e450b47
--- /dev/null
+++ b/run_depth_ladder.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+cd /home/yurenh2/fa
+echo "[$(date)] START primary d=256 L=4 ladder"
+CUDA_VISIBLE_DEVICES=2 python3 experiments/depth_utility_ladder.py \
+ --d_hidden 256 --num_blocks 4 --dataset cifar10 \
+ --methods bp fa dfa --k_values 0 1 2 3 4 --seeds 42 123 456 \
+ --epochs 100 --gpu 0 --output_dir results/depth_ladder
+echo "[$(date)] START secondary d=512 L=2 FA-failure ladder"
+CUDA_VISIBLE_DEVICES=2 python3 experiments/depth_utility_ladder.py \
+ --d_hidden 512 --num_blocks 2 --dataset cifar10 \
+ --methods bp fa dfa --k_values 0 1 2 --seeds 42 123 456 \
+ --epochs 100 --gpu 0 --output_dir results/depth_ladder
+echo "[$(date)] ALL DONE"