files/train_snntorch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

"""
Training script for snnTorch-based deep SNNs with Lyapunov regularization.

Usage:
    # Baseline (no Lyapunov)
    python files/train_snntorch.py --hidden 256 128 --epochs 10

    # With Lyapunov regularization
    python files/train_snntorch.py --hidden 256 128 --epochs 10 --lyapunov --lambda_reg 0.1

    # Recurrent model
    python files/train_snntorch.py --model recurrent --hidden 256 --epochs 10 --lyapunov
"""

import os
import sys
import json
import csv

_HERE = os.path.dirname(__file__)
_ROOT = os.path.dirname(_HERE)
if _ROOT not in sys.path:
    sys.path.insert(0, _ROOT)

import argparse
import time
from typing import List, Optional

import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm

from files.data_io.dataset_loader import get_dataloader
from files.models.snn_snntorch import create_snn


def _prepare_run_dir(base_dir: str) -> str:
    ts = time.strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, ts)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir


def _append_metrics(csv_path: str, row: dict):
    write_header = not os.path.exists(csv_path)
    with open(csv_path, "a", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=row.keys())
        if write_header:
            writer.writeheader()
        writer.writerow(row)


def parse_args():
    p = argparse.ArgumentParser(
        description="Train deep SNN with snnTorch and optional Lyapunov regularization"
    )

    # Model architecture
    p.add_argument(
        "--model", type=str, default="feedforward", choices=["feedforward", "recurrent"],
        help="Model type: 'feedforward' (LIF) or 'recurrent' (RSynaptic)"
    )
    p.add_argument(
        "--hidden", type=int, nargs="+", default=[256],
        help="Hidden layer sizes (e.g., --hidden 256 128 for 2 layers)"
    )
    p.add_argument("--classes", type=int, default=20, help="Number of output classes")
    p.add_argument("--beta", type=float, default=0.9, help="Membrane decay (beta)")
    p.add_argument("--threshold", type=float, default=1.0, help="Firing threshold")
    p.add_argument("--dropout", type=float, default=0.0, help="Dropout between layers")
    p.add_argument(
        "--surrogate_slope", type=float, default=25.0,
        help="Slope for fast_sigmoid surrogate gradient"
    )

    # Recurrent-specific (only for --model recurrent)
    p.add_argument("--alpha", type=float, default=0.9, help="Synaptic current decay (recurrent only)")

    # Training
    p.add_argument("--epochs", type=int, default=10)
    p.add_argument("--lr", type=float, default=1e-3)
    p.add_argument("--weight_decay", type=float, default=0.0, help="L2 regularization")
    p.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu")
    p.add_argument("--cfg", type=str, default="data_io/configs/shd.yaml", help="Dataset config")

    # Lyapunov regularization
    p.add_argument("--lyapunov", action="store_true", help="Enable Lyapunov regularization")
    p.add_argument("--lambda_reg", type=float, default=0.1, help="Lyapunov penalty weight")
    p.add_argument("--lambda_target", type=float, default=0.0, help="Target Lyapunov exponent")
    p.add_argument("--lyap_eps", type=float, default=1e-4, help="Perturbation magnitude")
    p.add_argument(
        "--lyap_layers", type=int, nargs="*", default=None,
        help="Which layers to measure (default: all). E.g., --lyap_layers 0 1"
    )

    # Output
    p.add_argument("--out_dir", type=str, default="runs/snntorch", help="Output directory")
    p.add_argument("--log_batches", action="store_true", help="Log per-batch metrics")
    p.add_argument("--no-progress", action="store_true", help="Disable progress bar")
    p.add_argument("--save_model", action="store_true", help="Save model checkpoint")

    return p.parse_args()


def train_one_epoch(
    model: nn.Module,
    loader,
    optimizer: optim.Optimizer,
    device: torch.device,
    ce_loss: nn.Module,
    lyapunov: bool,
    lambda_reg: float,
    lambda_target: float,
    lyap_eps: float,
    lyap_layers: Optional[List[int]],
    progress: bool,
    run_dir: Optional[str] = None,
    epoch_idx: Optional[int] = None,
    log_batches: bool = False,
):
    model.train()
    total = 0
    correct = 0
    running_loss = 0.0
    lyap_vals = []

    iterator = tqdm(loader, desc="train", leave=False, dynamic_ncols=True) if progress else loader

    for bidx, (x, y) in enumerate(iterator):
        x = x.to(device)  # (B, T, D)
        y = y.to(device)

        optimizer.zero_grad(set_to_none=True)

        logits, lyap_est = model(
            x,
            compute_lyapunov=lyapunov,
            lyap_eps=lyap_eps,
            lyap_layers=lyap_layers,
        )

        ce = ce_loss(logits, y)

        if lyapunov and lyap_est is not None:
            # Penalize deviation from target Lyapunov exponent
            reg = (lyap_est - lambda_target) ** 2
            loss = ce + lambda_reg * reg
            lyap_vals.append(lyap_est.detach().item())
        else:
            loss = ce

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        batch_correct = (preds == y).sum().item()
        correct += batch_correct
        total += x.size(0)

        if log_batches and run_dir is not None and epoch_idx is not None:
            _append_metrics(
                os.path.join(run_dir, "metrics.csv"),
                {
                    "step": "batch",
                    "epoch": int(epoch_idx),
                    "batch": int(bidx),
                    "loss": float(loss.item()),
                    "acc": float(batch_correct / max(x.size(0), 1)),
                    "lyap": float(lyap_est.item()) if (lyapunov and lyap_est is not None) else float("nan"),
                    "time_sec": 0.0,
                },
            )

        if progress:
            avg_loss = running_loss / max(total, 1)
            avg_lyap = (sum(lyap_vals) / len(lyap_vals)) if lyap_vals else None
            postfix = {"loss": f"{avg_loss:.4f}", "acc": f"{correct / total:.3f}"}
            if avg_lyap is not None:
                postfix["lyap"] = f"{avg_lyap:.4f}"
            iterator.set_postfix(postfix)

    avg_loss = running_loss / max(total, 1)
    acc = correct / max(total, 1)
    avg_lyap = sum(lyap_vals) / len(lyap_vals) if lyap_vals else None
    return avg_loss, acc, avg_lyap


@torch.no_grad()
def evaluate(
    model: nn.Module,
    loader,
    device: torch.device,
    ce_loss: nn.Module,
    progress: bool,
):
    model.eval()
    total = 0
    correct = 0
    running_loss = 0.0

    iterator = tqdm(loader, desc="eval", leave=False, dynamic_ncols=True) if progress else loader

    for x, y in iterator:
        x = x.to(device)
        y = y.to(device)

        logits, _ = model(x, compute_lyapunov=False)
        loss = ce_loss(logits, y)

        running_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    avg_loss = running_loss / max(total, 1)
    acc = correct / max(total, 1)
    return avg_loss, acc


def main():
    args = parse_args()
    device = torch.device(args.device)

    # Prepare output directory
    run_dir = _prepare_run_dir(args.out_dir)
    with open(os.path.join(run_dir, "args.json"), "w") as f:
        json.dump(vars(args), f, indent=2)

    # Load data
    train_loader, val_loader = get_dataloader(args.cfg)

    # Infer dimensions from data
    xb, yb = next(iter(train_loader))
    _, T, D = xb.shape
    C = args.classes

    print(f"Data: T={T}, D={D}, classes={C}")
    print(f"Model: {args.model}, hidden={args.hidden}")

    # Create model
    from snntorch import surrogate
    spike_grad = surrogate.fast_sigmoid(slope=args.surrogate_slope)

    if args.model == "feedforward":
        model = create_snn(
            model_type="feedforward",
            input_dim=D,
            hidden_dims=args.hidden,
            num_classes=C,
            beta=args.beta,
            threshold=args.threshold,
            spike_grad=spike_grad,
            dropout=args.dropout,
        )
    else:  # recurrent
        model = create_snn(
            model_type="recurrent",
            input_dim=D,
            hidden_dims=args.hidden,
            num_classes=C,
            alpha=args.alpha,
            beta=args.beta,
            threshold=args.threshold,
            spike_grad=spike_grad,
        )

    model = model.to(device)
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Parameters: {num_params:,}")

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    ce_loss = nn.CrossEntropyLoss()

    print(f"\nTraining on {device} | lyapunov={args.lyapunov} λ_reg={args.lambda_reg} λ_target={args.lambda_target}")
    print(f"Output: {run_dir}\n")

    best_val_acc = 0.0

    for epoch in range(1, args.epochs + 1):
        t0 = time.time()

        tr_loss, tr_acc, tr_lyap = train_one_epoch(
            model=model,
            loader=train_loader,
            optimizer=optimizer,
            device=device,
            ce_loss=ce_loss,
            lyapunov=args.lyapunov,
            lambda_reg=args.lambda_reg,
            lambda_target=args.lambda_target,
            lyap_eps=args.lyap_eps,
            lyap_layers=args.lyap_layers,
            progress=(not args.no_progress),
            run_dir=run_dir,
            epoch_idx=epoch,
            log_batches=args.log_batches,
        )

        val_loss, val_acc = evaluate(
            model=model,
            loader=val_loader,
            device=device,
            ce_loss=ce_loss,
            progress=(not args.no_progress),
        )

        dt = time.time() - t0
        lyap_str = f" lyap={tr_lyap:.4f}" if tr_lyap is not None else ""

        print(
            f"[Epoch {epoch:3d}] "
            f"train_loss={tr_loss:.4f} train_acc={tr_acc:.3f}{lyap_str} | "
            f"val_loss={val_loss:.4f} val_acc={val_acc:.3f} ({dt:.1f}s)"
        )

        _append_metrics(
            os.path.join(run_dir, "metrics.csv"),
            {
                "step": "epoch",
                "epoch": int(epoch),
                "batch": -1,
                "loss": float(tr_loss),
                "acc": float(tr_acc),
                "val_loss": float(val_loss),
                "val_acc": float(val_acc),
                "lyap": float(tr_lyap) if tr_lyap is not None else float("nan"),
                "time_sec": float(dt),
            },
        )

        # Save best model
        if args.save_model and val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), os.path.join(run_dir, "best_model.pt"))

    print(f"\nTraining complete. Best val_acc: {best_val_acc:.3f}")
    if args.save_model:
        torch.save(model.state_dict(), os.path.join(run_dir, "final_model.pt"))
        print(f"Model saved to {run_dir}")


if __name__ == "__main__":
    main()