From 112c5d354f36d6ea6e8049cf1aeaebeb9944aa02 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Fri, 10 Apr 2026 14:50:22 -0500 Subject: Fix two bugs: PEFT cleanup model corruption and K=16 OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1: PEFTBaseline.cleanup() corrupted wrapper.model after LoRA unload, causing 'Qwen2Model has no attribute prepare_inputs_for_generation' for subsequent methods. Fix: save reference to original model before wrapping, restore it directly in cleanup() instead of relying on unload(). Bug 2: fit_theta OOM at K=16 due to large logit chunks (128 × 151936 vocab). Fix: reduce CHUNK_SIZE from 128 to 32 (~4x less memory per chunk). Co-Authored-By: Claude Opus 4.6 (1M context) --- baselines/peft_baseline.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'baselines') diff --git a/baselines/peft_baseline.py b/baselines/peft_baseline.py index af1b71c..9d3b03a 100644 --- a/baselines/peft_baseline.py +++ b/baselines/peft_baseline.py @@ -87,6 +87,8 @@ class PEFTBaseline: def __init__(self, wrapper, peft_config): self.wrapper = wrapper self.device = wrapper.device + # Save reference to original model BEFORE wrapping + self._original_model = wrapper.model self.peft_model = get_peft_model(wrapper.model, peft_config) self.n_params = sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad) @@ -214,14 +216,7 @@ class PEFTBaseline: def cleanup(self): """Remove adapter and restore wrapper.model to the original base model.""" - try: - base_model = self.peft_model.unload() - self.wrapper.model = base_model - except (AttributeError, NotImplementedError): - # unload() not supported for PromptTuning/PrefixTuning - # Access base model directly - self.wrapper.model = self.peft_model.base_model - if hasattr(self.wrapper.model, 'model'): - self.wrapper.model = self.wrapper.model.model + # Always restore the saved original model reference — safe for all PEFT types + self.wrapper.model = self._original_model del self.peft_model torch.cuda.empty_cache() -- cgit v1.2.3