From 112c5d354f36d6ea6e8049cf1aeaebeb9944aa02 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Fri, 10 Apr 2026 14:50:22 -0500 Subject: Fix two bugs: PEFT cleanup model corruption and K=16 OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1: PEFTBaseline.cleanup() corrupted wrapper.model after LoRA unload, causing 'Qwen2Model has no attribute prepare_inputs_for_generation' for subsequent methods. Fix: save reference to original model before wrapping, restore it directly in cleanup() instead of relying on unload(). Bug 2: fit_theta OOM at K=16 due to large logit chunks (128 × 151936 vocab). Fix: reduce CHUNK_SIZE from 128 to 32 (~4x less memory per chunk). Co-Authored-By: Claude Opus 4.6 (1M context) --- adapt/fit_theta.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'adapt/fit_theta.py') diff --git a/adapt/fit_theta.py b/adapt/fit_theta.py index f5b047b..e8bff28 100644 --- a/adapt/fit_theta.py +++ b/adapt/fit_theta.py @@ -7,7 +7,8 @@ import torch import torch.nn.functional as F # Maximum chunk size for logit computation to avoid OOM -CHUNK_SIZE = 128 +# Reduced from 128 to 32 to handle K=16 (longer sequences) +CHUNK_SIZE = 32 def _chunked_ce_kl(h_prime, h_base, lm_w, lm_bias, y, beta): -- cgit v1.2.3