summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--adapt/fit_theta.py3
-rw-r--r--baselines/peft_baseline.py13
2 files changed, 6 insertions, 10 deletions
diff --git a/adapt/fit_theta.py b/adapt/fit_theta.py
index f5b047b..e8bff28 100644
--- a/adapt/fit_theta.py
+++ b/adapt/fit_theta.py
@@ -7,7 +7,8 @@ import torch
import torch.nn.functional as F
# Maximum chunk size for logit computation to avoid OOM
-CHUNK_SIZE = 128
+# Reduced from 128 to 32 to handle K=16 (longer sequences)
+CHUNK_SIZE = 32
def _chunked_ce_kl(h_prime, h_base, lm_w, lm_bias, y, beta):
diff --git a/baselines/peft_baseline.py b/baselines/peft_baseline.py
index af1b71c..9d3b03a 100644
--- a/baselines/peft_baseline.py
+++ b/baselines/peft_baseline.py
@@ -87,6 +87,8 @@ class PEFTBaseline:
def __init__(self, wrapper, peft_config):
self.wrapper = wrapper
self.device = wrapper.device
+ # Save reference to original model BEFORE wrapping
+ self._original_model = wrapper.model
self.peft_model = get_peft_model(wrapper.model, peft_config)
self.n_params = sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad)
@@ -214,14 +216,7 @@ class PEFTBaseline:
def cleanup(self):
"""Remove adapter and restore wrapper.model to the original base model."""
- try:
- base_model = self.peft_model.unload()
- self.wrapper.model = base_model
- except (AttributeError, NotImplementedError):
- # unload() not supported for PromptTuning/PrefixTuning
- # Access base model directly
- self.wrapper.model = self.peft_model.base_model
- if hasattr(self.wrapper.model, 'model'):
- self.wrapper.model = self.wrapper.model.model
+ # Always restore the saved original model reference — safe for all PEFT types
+ self.wrapper.model = self._original_model
del self.peft_model
torch.cuda.empty_cache()