diff options
Diffstat (limited to 'experiments')
| -rw-r--r-- | experiments/ep_baseline.py | 3 | ||||
| -rw-r--r-- | experiments/ep_synthetic.py | 4 |
2 files changed, 5 insertions, 2 deletions
diff --git a/experiments/ep_baseline.py b/experiments/ep_baseline.py index de7d853..7f3d004 100644 --- a/experiments/ep_baseline.py +++ b/experiments/ep_baseline.py @@ -206,7 +206,8 @@ def ep_credit_signals(model, x, y, beta, T_nudge, alpha_nudge): _, h_free = model(x, return_hidden=True) h_nudged = ep_nudged_phase(model, x, y, h_free, beta, T_nudge, alpha_nudge) L = model.num_blocks - credits = [(h_nudged[l] - h_free[l]) / beta for l in range(L)] + # Negate: EP nudge moves h toward lower loss, opposite to BP grad direction + credits = [-(h_nudged[l] - h_free[l]) / beta for l in range(L)] return credits, h_free, h_nudged diff --git a/experiments/ep_synthetic.py b/experiments/ep_synthetic.py index 7daecde..a2f24df 100644 --- a/experiments/ep_synthetic.py +++ b/experiments/ep_synthetic.py @@ -115,7 +115,9 @@ def compute_diagnostics(model, teacher, dev, d, C, L, beta=0.5, T_nudge=20, alph gammas,rhos=[],[] with torch.no_grad():_,hi=model(x,return_hidden=True) for l in range(L): - a_ep=(h_nudge[l+1].detach()-h_free[l+1])/beta + # EP nudge moves h toward lower loss, so (h_nudge - h_free) points opposite to BP grad. + # Negate to align with BP gradient convention (pointing toward loss increase). + a_ep=-(h_nudge[l+1].detach()-h_free[l+1])/beta gammas.append(cosine_similarity_batch(a_ep,bp[l+1])) def mk(sl): def f(h): |
