diff options
| -rw-r--r-- | adapt/fit_theta.py | 3 | ||||
| -rw-r--r-- | baselines/peft_baseline.py | 13 |
2 files changed, 6 insertions, 10 deletions
diff --git a/adapt/fit_theta.py b/adapt/fit_theta.py index f5b047b..e8bff28 100644 --- a/adapt/fit_theta.py +++ b/adapt/fit_theta.py @@ -7,7 +7,8 @@ import torch import torch.nn.functional as F # Maximum chunk size for logit computation to avoid OOM -CHUNK_SIZE = 128 +# Reduced from 128 to 32 to handle K=16 (longer sequences) +CHUNK_SIZE = 32 def _chunked_ce_kl(h_prime, h_base, lm_w, lm_bias, y, beta): diff --git a/baselines/peft_baseline.py b/baselines/peft_baseline.py index af1b71c..9d3b03a 100644 --- a/baselines/peft_baseline.py +++ b/baselines/peft_baseline.py @@ -87,6 +87,8 @@ class PEFTBaseline: def __init__(self, wrapper, peft_config): self.wrapper = wrapper self.device = wrapper.device + # Save reference to original model BEFORE wrapping + self._original_model = wrapper.model self.peft_model = get_peft_model(wrapper.model, peft_config) self.n_params = sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad) @@ -214,14 +216,7 @@ class PEFTBaseline: def cleanup(self): """Remove adapter and restore wrapper.model to the original base model.""" - try: - base_model = self.peft_model.unload() - self.wrapper.model = base_model - except (AttributeError, NotImplementedError): - # unload() not supported for PromptTuning/PrefixTuning - # Access base model directly - self.wrapper.model = self.peft_model.base_model - if hasattr(self.wrapper.model, 'model'): - self.wrapper.model = self.wrapper.model.model + # Always restore the saved original model reference — safe for all PEFT types + self.wrapper.model = self._original_model del self.peft_model torch.cuda.empty_cache() |
