From 2aaabd9a95386bf9f274cb9907ac6a5306171759 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Thu, 2 Apr 2026 23:34:12 -0500 Subject: =?UTF-8?q?Fix=20EP=20credit=20sign:=20negate=20(h=5Fnudge=20-=20h?= =?UTF-8?q?=5Ffree)/=CE=B2=20to=20align=20with=20BP=20grad=20direction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EP nudge moves h toward lower loss (opposite to BP grad which points toward loss increase). Without negation, Gamma is negative and rho is -0.25. Co-Authored-By: Claude Opus 4.6 (1M context) --- experiments/ep_baseline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'experiments/ep_baseline.py') diff --git a/experiments/ep_baseline.py b/experiments/ep_baseline.py index de7d853..7f3d004 100644 --- a/experiments/ep_baseline.py +++ b/experiments/ep_baseline.py @@ -206,7 +206,8 @@ def ep_credit_signals(model, x, y, beta, T_nudge, alpha_nudge): _, h_free = model(x, return_hidden=True) h_nudged = ep_nudged_phase(model, x, y, h_free, beta, T_nudge, alpha_nudge) L = model.num_blocks - credits = [(h_nudged[l] - h_free[l]) / beta for l in range(L)] + # Negate: EP nudge moves h toward lower loss, opposite to BP grad direction + credits = [-(h_nudged[l] - h_free[l]) / beta for l in range(L)] return credits, h_free, h_nudged -- cgit v1.2.3