diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-13 23:49:05 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-13 23:49:05 -0600 |
| commit | cd99d6b874d9d09b3bb87b8485cc787885af71f1 (patch) | |
| tree | 59a233959932ca0e4f12f196275e07fcf443b33f /scripts | |
init commit
Diffstat (limited to 'scripts')
31 files changed, 1792 insertions, 0 deletions
diff --git a/scripts/run_adaptive_exp.sbatch b/scripts/run_adaptive_exp.sbatch new file mode 100644 index 0000000..e72fe34 --- /dev/null +++ b/scripts/run_adaptive_exp.sbatch @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH --job-name=snn_adapt_exp +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=12:00:00 +#SBATCH --output=runs/slurm_logs/%j_adaptive_exp.out +#SBATCH --error=runs/slurm_logs/%j_adaptive_exp.err + +# ============================================================ +# Experiment: Adaptive Exponential Regularization +# ============================================================ +# Penalty = (exp(excess) - 1) * excess where excess = max(0, λ - threshold) +# This gives: +# - Zero penalty when λ < threshold +# - Exponential growth for λ > threshold (very gentle near threshold, explosive when chaotic) +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data runs/adaptive_exp + +echo "============================================================" +echo "ADAPTIVE EXPONENTIAL Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type adaptive_exp \ + --lyap_threshold 2.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/adaptive_exp \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_adaptive_linear.sbatch b/scripts/run_adaptive_linear.sbatch new file mode 100644 index 0000000..45cf160 --- /dev/null +++ b/scripts/run_adaptive_linear.sbatch @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH --job-name=snn_adapt_lin +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=12:00:00 +#SBATCH --output=runs/slurm_logs/%j_adaptive_linear.out +#SBATCH --error=runs/slurm_logs/%j_adaptive_linear.err + +# ============================================================ +# Experiment: Adaptive Linear Regularization +# ============================================================ +# Penalty = excess³ where excess = max(0, λ - threshold) +# This gives: +# - Zero penalty when λ < threshold +# - Cubic growth for λ > threshold (gentle near threshold, strong when chaotic) +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data runs/adaptive_linear + +echo "============================================================" +echo "ADAPTIVE LINEAR (Cubic) Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type adaptive_linear \ + --lyap_threshold 2.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/adaptive_linear \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_adaptive_sigmoid.sbatch b/scripts/run_adaptive_sigmoid.sbatch new file mode 100644 index 0000000..b28f5a8 --- /dev/null +++ b/scripts/run_adaptive_sigmoid.sbatch @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=snn_adapt_sig +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=12:00:00 +#SBATCH --output=runs/slurm_logs/%j_adaptive_sigmoid.out +#SBATCH --error=runs/slurm_logs/%j_adaptive_sigmoid.err + +# ============================================================ +# Experiment: Adaptive Sigmoid Regularization +# ============================================================ +# Penalty = sigmoid((λ - threshold) / T) * (λ - target)² +# This gives: +# - Smooth transition around threshold (not hard cutoff) +# - ~0 penalty when λ << threshold +# - Full penalty when λ >> threshold +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data runs/adaptive_sigmoid + +echo "============================================================" +echo "ADAPTIVE SIGMOID Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type adaptive_sigmoid \ + --lyap_threshold 2.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/adaptive_sigmoid \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_cifar10.sbatch b/scripts/run_cifar10.sbatch new file mode 100644 index 0000000..e4e1df9 --- /dev/null +++ b/scripts/run_cifar10.sbatch @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=snn_cifar10 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=08:00:00 +#SBATCH --output=runs/slurm_logs/%j_cifar10.out +#SBATCH --error=runs/slurm_logs/%j_cifar10.err + +# ============================================================ +# CIFAR-10 Rate-Coded Experiment +# ============================================================ +# Challenge: +# - 3072 input dimensions (32x32x3 flattened) +# - Requires hierarchical feature learning +# - Deep networks essential for good accuracy +# +# This is the hardest benchmark - real image classification +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "CIFAR-10 Rate-Coded Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# CIFAR-10 with deeper networks (needs more capacity) +python files/experiments/benchmark_experiment.py \ + --dataset cifar10 \ + --depths 4 6 8 10 12 \ + --hidden_dim 256 \ + --epochs 50 \ + --batch_size 64 \ + --lr 0.0005 \ + --T 100 \ + --lambda_reg 0.5 \ + --lambda_target -0.2 \ + --data_dir ./data \ + --out_dir runs/benchmark \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_cifar10_conv.sbatch b/scripts/run_cifar10_conv.sbatch new file mode 100644 index 0000000..7d40074 --- /dev/null +++ b/scripts/run_cifar10_conv.sbatch @@ -0,0 +1,52 @@ +#!/bin/bash +#SBATCH --job-name=snn_cifar10_conv +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_cifar10_conv.out +#SBATCH --error=runs/slurm_logs/%j_cifar10_conv.err + +# ============================================================ +# CIFAR-10 Conv-SNN Experiment (Proper Architecture) +# ============================================================ +# Uses convolutional SNN that preserves spatial structure: +# Image (3,32,32) → Rate Encoding → Conv-LIF-Pool → FC → Output +# +# Tests whether Lyapunov regularization helps deeper Conv-SNNs +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "CIFAR-10 Conv-SNN Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/cifar10_conv_experiment.py \ + --model simple \ + --T 25 \ + --epochs 50 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --data_dir ./data \ + --out_dir runs/cifar10_conv \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_experiment.sbatch b/scripts/run_depth_experiment.sbatch new file mode 100644 index 0000000..0e03d82 --- /dev/null +++ b/scripts/run_depth_experiment.sbatch @@ -0,0 +1,95 @@ +#!/bin/bash +#SBATCH --job-name=snn_depth_exp +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=02:00:00 +#SBATCH --output=runs/slurm_logs/%j_depth_exp.out +#SBATCH --error=runs/slurm_logs/%j_depth_exp.err + +# ============================================================ +# Depth Comparison Experiment: Vanilla vs Lyapunov SNN +# ============================================================ +# Compares training stability across network depths (1-8 layers) +# Hypothesis: Lyapunov regularization enables training of deeper networks +# +# Usage: +# sbatch scripts/run_depth_experiment.sbatch +# +# Or with custom parameters: +# sbatch --export=EPOCHS=50,DEPTHS="1 2 4 6 8" scripts/run_depth_experiment.sbatch +# ============================================================ + +set -e + +# Default parameters (can be overridden via --export) +EPOCHS=${EPOCHS:-30} +DEPTHS=${DEPTHS:-"1 2 3 4 6"} +HIDDEN_DIM=${HIDDEN_DIM:-128} +LAMBDA_REG=${LAMBDA_REG:-0.1} +LR=${LR:-0.001} +SEED=${SEED:-42} +USE_SYNTHETIC=${USE_SYNTHETIC:-true} + +# Project directory +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +# Create log directory +mkdir -p runs/slurm_logs + +# Print job info +echo "============================================================" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +echo "Start time: $(date)" +echo "============================================================" +echo "Configuration:" +echo " EPOCHS: $EPOCHS" +echo " DEPTHS: $DEPTHS" +echo " HIDDEN_DIM: $HIDDEN_DIM" +echo " LAMBDA_REG: $LAMBDA_REG" +echo " LR: $LR" +echo " USE_SYNTHETIC: $USE_SYNTHETIC" +echo "============================================================" + +# Check GPU +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Build command +CMD="python files/experiments/depth_comparison.py" +CMD="$CMD --epochs $EPOCHS" +CMD="$CMD --depths $DEPTHS" +CMD="$CMD --hidden_dim $HIDDEN_DIM" +CMD="$CMD --lambda_reg $LAMBDA_REG" +CMD="$CMD --lr $LR" +CMD="$CMD --seed $SEED" +CMD="$CMD --out_dir runs/depth_comparison" +CMD="$CMD --device cuda" + +if [ "$USE_SYNTHETIC" = true ]; then + CMD="$CMD --synthetic" +fi + +echo "Running: $CMD" +echo "============================================================" + +# Run experiment +$CMD + +# Generate plots if results exist +LATEST_RUN=$(ls -td runs/depth_comparison/*/ 2>/dev/null | head -1) +if [ -n "$LATEST_RUN" ]; then + echo "============================================================" + echo "Generating plots for: $LATEST_RUN" + python files/experiments/plot_depth_comparison.py --results_dir "$LATEST_RUN" +fi + +echo "============================================================" +echo "Job finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_experiment_shd.sbatch b/scripts/run_depth_experiment_shd.sbatch new file mode 100644 index 0000000..060bbe1 --- /dev/null +++ b/scripts/run_depth_experiment_shd.sbatch @@ -0,0 +1,65 @@ +#!/bin/bash +#SBATCH --job-name=snn_depth_shd +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=04:00:00 +#SBATCH --output=runs/slurm_logs/%j_depth_shd.out +#SBATCH --error=runs/slurm_logs/%j_depth_shd.err + +# ============================================================ +# Depth Comparison Experiment with SHD Dataset +# ============================================================ +# Full experiment with real neuromorphic data (Spiking Heidelberg Digits) +# +# Usage: +# sbatch scripts/run_depth_experiment_shd.sbatch +# ============================================================ + +set -e + +EPOCHS=${EPOCHS:-50} +DEPTHS=${DEPTHS:-"1 2 3 4 6 8"} +HIDDEN_DIM=${HIDDEN_DIM:-256} +LAMBDA_REG=${LAMBDA_REG:-0.1} +LR=${LR:-0.001} +SEED=${SEED:-42} + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "============================================================" +echo "SHD Depth Comparison Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_comparison.py \ + --epochs $EPOCHS \ + --depths $DEPTHS \ + --hidden_dim $HIDDEN_DIM \ + --lambda_reg $LAMBDA_REG \ + --lr $LR \ + --seed $SEED \ + --cfg data_io/configs/shd.yaml \ + --out_dir runs/depth_comparison_shd \ + --device cuda + +# Generate plots +LATEST_RUN=$(ls -td runs/depth_comparison_shd/*/ 2>/dev/null | head -1) +if [ -n "$LATEST_RUN" ]; then + echo "Generating plots..." + python files/experiments/plot_depth_comparison.py --results_dir "$LATEST_RUN" +fi + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_scaling_asymmetric.sbatch b/scripts/run_depth_scaling_asymmetric.sbatch new file mode 100644 index 0000000..51a6191 --- /dev/null +++ b/scripts/run_depth_scaling_asymmetric.sbatch @@ -0,0 +1,62 @@ +#!/bin/bash +#SBATCH --job-name=snn_asymm +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_asymm.out +#SBATCH --error=runs/slurm_logs/%j_asymm.err + +# ============================================================ +# Asymmetric Lyapunov Regularization Experiment +# ============================================================ +# Hypothesis: Using asymmetric penalty will balance between +# preventing chaos and allowing learning. +# +# Asymmetric loss: +# - Strong penalty for chaos (lambda > 0): relu(lambda)^2 +# - Weak penalty for collapse (lambda < -1): 0.1 * relu(-lambda-1)^2 +# +# This allows dynamics in the "sweet spot" of slightly negative +# Lyapunov exponents (stable but not dead). +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "ASYMMETRIC Lyapunov Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Test depths: 4, 8, 12, 16 conv layers +# Using asymmetric loss + 20 epoch warmup +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type asymmetric \ + --warmup_epochs 20 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_asymm \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_scaling_cifar10.sbatch b/scripts/run_depth_scaling_cifar10.sbatch new file mode 100644 index 0000000..28fbf06 --- /dev/null +++ b/scripts/run_depth_scaling_cifar10.sbatch @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH --job-name=snn_cifar10_depth +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_cifar10_depth.out +#SBATCH --error=runs/slurm_logs/%j_cifar10_depth.err + +# ============================================================ +# CIFAR-10 Depth Scaling Benchmark (Baseline) +# ============================================================ +# CIFAR-10 is easier (10 classes) - used as baseline comparison. +# +# Reference from literature: +# - Spiking VGG 7 layers: ~88% accuracy +# - Spiking VGG 13 layers: ~91.6% accuracy +# +# This experiment validates our implementation against known results. +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "CIFAR-10 Depth Scaling Benchmark" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar10 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 100 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --data_dir ./data \ + --out_dir runs/depth_scaling \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_scaling_cifar100.sbatch b/scripts/run_depth_scaling_cifar100.sbatch new file mode 100644 index 0000000..79b63da --- /dev/null +++ b/scripts/run_depth_scaling_cifar100.sbatch @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --job-name=snn_cifar100_depth +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_cifar100_depth.out +#SBATCH --error=runs/slurm_logs/%j_cifar100_depth.err + +# ============================================================ +# CIFAR-100 Depth Scaling Benchmark +# ============================================================ +# KEY EXPERIMENT: Show that deep SNNs outperform shallow ones +# when trained with Lyapunov regularization. +# +# CIFAR-100 (100 classes) is ideal because: +# - Complex enough that shallow networks plateau +# - Deep networks can learn richer representations +# - Standard benchmark with known baselines +# +# Expected results: +# - Shallow (4 layers): Similar for both methods +# - Deep (16 layers): Vanilla fails/plateaus, Lyapunov succeeds +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "CIFAR-100 Depth Scaling Benchmark" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Test depths: 4, 8, 12, 16, 20 conv layers +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 20 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --data_dir ./data \ + --out_dir runs/depth_scaling \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_depth_scaling_hinge.sbatch b/scripts/run_depth_scaling_hinge.sbatch new file mode 100644 index 0000000..8ca3112 --- /dev/null +++ b/scripts/run_depth_scaling_hinge.sbatch @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --job-name=snn_hinge +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_hinge.out +#SBATCH --error=runs/slurm_logs/%j_hinge.err + +# ============================================================ +# Hinge Loss Lyapunov Regularization Experiment +# ============================================================ +# Hypothesis: Using hinge loss (only penalize chaos, not stability) +# will allow the network to learn while still preventing chaotic +# dynamics. +# +# Hinge loss: max(0, lambda)^2 +# - Only penalizes positive Lyapunov (chaos) +# - Allows negative Lyapunov (stable dynamics) without penalty +# - Combined with warmup to let network start learning first +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "HINGE LOSS Lyapunov Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Test depths: 4, 8, 12, 16 conv layers +# Using hinge loss + 20 epoch warmup +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type hinge \ + --warmup_epochs 20 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_hinge \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_diffonly_benchmark.sbatch b/scripts/run_diffonly_benchmark.sbatch new file mode 100644 index 0000000..19fdbba --- /dev/null +++ b/scripts/run_diffonly_benchmark.sbatch @@ -0,0 +1,33 @@ +#!/bin/bash +#SBATCH --job-name=lyap_bench +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=00:30:00 +#SBATCH --output=runs/slurm_logs/%j_diffonly_benchmark.out +#SBATCH --error=runs/slurm_logs/%j_diffonly_benchmark.err + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "============================================================" +echo "Lyapunov Diff-Only Storage Benchmark" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/lyapunov_diffonly_benchmark.py + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_extreme3.sbatch b/scripts/run_extreme3.sbatch new file mode 100644 index 0000000..7e6524e --- /dev/null +++ b/scripts/run_extreme3.sbatch @@ -0,0 +1,55 @@ +#!/bin/bash +#SBATCH --job-name=snn_ext3 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_extreme3.out +#SBATCH --error=runs/slurm_logs/%j_extreme3.err + +# ============================================================ +# Experiment: Higher Threshold (lambda > 3.0) +# ============================================================ +# Since extreme with threshold=2.0 worked at depth 4 (47.5%), +# let's try threshold=3.0 to allow even more chaos before +# intervening. This might help deeper networks. +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "EXTREME THRESHOLD 3.0 Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --lyap_threshold 3.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_extreme3 \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_extreme3_long.sbatch b/scripts/run_extreme3_long.sbatch new file mode 100644 index 0000000..dc87c0a --- /dev/null +++ b/scripts/run_extreme3_long.sbatch @@ -0,0 +1,52 @@ +#!/bin/bash +#SBATCH --job-name=snn_ext3_long +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=12:00:00 +#SBATCH --output=runs/slurm_logs/%j_extreme3_long.out +#SBATCH --error=runs/slurm_logs/%j_extreme3_long.err + +# ============================================================ +# Experiment: Extreme threshold 3.0 (WINNER from round 2) +# Extended time limit for full depth sweep +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data runs/extreme3_long + +echo "============================================================" +echo "EXTREME THRESHOLD 3.0 (Extended)" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --lyap_threshold 3.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/extreme3_long \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_extreme4.sbatch b/scripts/run_extreme4.sbatch new file mode 100644 index 0000000..e3cdeab --- /dev/null +++ b/scripts/run_extreme4.sbatch @@ -0,0 +1,54 @@ +#!/bin/bash +#SBATCH --job-name=snn_ext4 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_extreme4.out +#SBATCH --error=runs/slurm_logs/%j_extreme4.err + +# ============================================================ +# Experiment: Higher Threshold (lambda > 4.0) +# ============================================================ +# Even more permissive threshold. Only penalize truly +# explosive dynamics. May allow deeper networks to learn. +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "EXTREME THRESHOLD 4.0 Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --lyap_threshold 4.0 \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_extreme4 \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_extreme_penalty.sbatch b/scripts/run_extreme_penalty.sbatch new file mode 100644 index 0000000..0008797 --- /dev/null +++ b/scripts/run_extreme_penalty.sbatch @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=snn_extreme +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_extreme.out +#SBATCH --error=runs/slurm_logs/%j_extreme.err + +# ============================================================ +# Experiment 4: Extreme-Only Penalty (lambda > 2.0) +# ============================================================ +# Hypothesis: Instead of trying to push lambda toward 0 or -0.1, +# only penalize when dynamics become EXTREMELY chaotic (lambda > 2). +# +# Extreme reg_type: +# - penalty = max(0, lambda - 2.0)^2 +# - No penalty when lambda <= 2.0 +# - Allows moderate chaos while preventing explosion +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "EXTREME-ONLY PENALTY Experiment (lambda > 2.0)" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --warmup_epochs 10 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_extreme \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_grid_search.sbatch b/scripts/run_grid_search.sbatch new file mode 100644 index 0000000..07e9a74 --- /dev/null +++ b/scripts/run_grid_search.sbatch @@ -0,0 +1,62 @@ +#!/bin/bash +#SBATCH --job-name=snn_grid_search +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_grid_search.out +#SBATCH --error=runs/slurm_logs/%j_grid_search.err + +# ============================================================ +# Hyperparameter Grid Search for Lyapunov-Regularized SNN +# ============================================================ +# Uses CIFAR-10 (real data) with rate encoding +# Includes warmup for λ_reg to avoid killing learning early +# +# Grid: +# - Depths: 4, 6, 8, 10 +# - λ_reg: 0.01, 0.05, 0.1, 0.2, 0.3 +# - λ_target: 0.0, -0.05, -0.1, -0.2 +# +# Total: 4 depths × 5 λ_reg × 4 λ_target = 80 configurations +# Estimated time: ~4 min/config × 80 = ~5-6 hours +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "============================================================" +echo "Hyperparameter Grid Search" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Using CIFAR-10 (real data) instead of synthetic +# Reduced grid: 4 depths × 5 λ_reg × 4 λ_target = 80 configs +# With warmup to avoid killing learning early +python files/experiments/hyperparameter_grid_search.py \ + --depths 4 6 8 10 \ + --lambda_regs 0.01 0.05 0.1 0.2 0.3 \ + --lambda_targets 0.0 -0.05 -0.1 -0.2 \ + --hidden_dim 256 \ + --epochs 15 \ + --batch_size 128 \ + --T 8 \ + --lr 0.001 \ + --data_dir ./data \ + --out_dir runs/grid_search_cifar10 \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_posthoc.sbatch b/scripts/run_posthoc.sbatch new file mode 100644 index 0000000..ce81918 --- /dev/null +++ b/scripts/run_posthoc.sbatch @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=snn_posthoc +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_posthoc.out +#SBATCH --error=runs/slurm_logs/%j_posthoc.err + +# ============================================================ +# Experiment: Post-hoc Lyapunov Fine-tuning +# ============================================================ +# Strategy: +# 1. Train vanilla network for 100 epochs (learn features) +# 2. Fine-tune with Lyapunov regularization for 50 epochs +# +# This allows the network to learn first, then we stabilize +# the dynamics without fighting chaotic initialization. +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data runs/posthoc_finetune + +echo "============================================================" +echo "POST-HOC FINE-TUNING Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/posthoc_finetune.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --pretrain_epochs 100 \ + --finetune_epochs 50 \ + --batch_size 128 \ + --lr 0.001 \ + --finetune_lr 0.0001 \ + --lambda_reg 0.1 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --lyap_threshold 2.0 \ + --data_dir ./data \ + --out_dir runs/posthoc_finetune \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_scaled_grid_d12.sbatch b/scripts/run_scaled_grid_d12.sbatch new file mode 100644 index 0000000..eff5342 --- /dev/null +++ b/scripts/run_scaled_grid_d12.sbatch @@ -0,0 +1,42 @@ +#!/bin/bash +#SBATCH --job-name=scaled_d12 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=44:00:00 +#SBATCH --output=runs/slurm_logs/%j_scaled_grid_d12.out +#SBATCH --error=runs/slurm_logs/%j_scaled_grid_d12.err + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs runs/scaled_grid data + +echo "============================================================" +echo "SCALED REGULARIZATION GRID SEARCH - DEPTH 12" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +echo "Grid: λ_reg=[0.0005, 0.001, 0.002, 0.005] × reg_type=[mult_linear, mult_log]" +echo "Total: 8 experiments" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/scaled_reg_grid_search.py \ + --depth 12 \ + --epochs 100 \ + --batch_size 128 \ + --lr 0.001 \ + --data_dir ./data \ + --out_dir ./runs/scaled_grid + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_scaled_grid_d4.sbatch b/scripts/run_scaled_grid_d4.sbatch new file mode 100644 index 0000000..7629804 --- /dev/null +++ b/scripts/run_scaled_grid_d4.sbatch @@ -0,0 +1,42 @@ +#!/bin/bash +#SBATCH --job-name=scaled_d4 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=12:00:00 +#SBATCH --output=runs/slurm_logs/%j_scaled_grid_d4.out +#SBATCH --error=runs/slurm_logs/%j_scaled_grid_d4.err + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs runs/scaled_grid data + +echo "============================================================" +echo "SCALED REGULARIZATION GRID SEARCH - DEPTH 4" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +echo "Grid: λ_reg=[0.0005, 0.001, 0.002, 0.005] × reg_type=[mult_linear, mult_log]" +echo "Total: 8 experiments" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/scaled_reg_grid_search.py \ + --depth 4 \ + --epochs 100 \ + --batch_size 128 \ + --lr 0.001 \ + --data_dir ./data \ + --out_dir ./runs/scaled_grid + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_scaled_grid_d8.sbatch b/scripts/run_scaled_grid_d8.sbatch new file mode 100644 index 0000000..77d480f --- /dev/null +++ b/scripts/run_scaled_grid_d8.sbatch @@ -0,0 +1,42 @@ +#!/bin/bash +#SBATCH --job-name=scaled_d8 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=28:00:00 +#SBATCH --output=runs/slurm_logs/%j_scaled_grid_d8.out +#SBATCH --error=runs/slurm_logs/%j_scaled_grid_d8.err + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs runs/scaled_grid data + +echo "============================================================" +echo "SCALED REGULARIZATION GRID SEARCH - DEPTH 8" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +echo "Grid: λ_reg=[0.0005, 0.001, 0.002, 0.005] × reg_type=[mult_linear, mult_log]" +echo "Total: 8 experiments" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/scaled_reg_grid_search.py \ + --depth 8 \ + --epochs 100 \ + --batch_size 128 \ + --lr 0.001 \ + --data_dir ./data \ + --out_dir ./runs/scaled_grid + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_smnist.sbatch b/scripts/run_smnist.sbatch new file mode 100644 index 0000000..2913410 --- /dev/null +++ b/scripts/run_smnist.sbatch @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH --job-name=snn_smnist +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_smnist.out +#SBATCH --error=runs/slurm_logs/%j_smnist.err + +# ============================================================ +# Sequential MNIST Experiment +# ============================================================ +# This is a HARD benchmark: +# - 784 timesteps (one pixel at a time) +# - Requires long-range temporal memory +# - Shallow networks struggle, deep networks needed +# +# Expected: Lyapunov regularization significantly helps at depth 6+ +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "Sequential MNIST Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Sequential MNIST with depths up to 10 +python files/experiments/benchmark_experiment.py \ + --dataset smnist \ + --depths 2 4 6 8 10 \ + --hidden_dim 128 \ + --epochs 30 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.3 \ + --lambda_target -0.1 \ + --data_dir ./data \ + --out_dir runs/benchmark \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_speedup_benchmark.sbatch b/scripts/run_speedup_benchmark.sbatch new file mode 100644 index 0000000..8dd9dd9 --- /dev/null +++ b/scripts/run_speedup_benchmark.sbatch @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --job-name=lyap_speedup +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=32G +#SBATCH --time=01:00:00 +#SBATCH --output=runs/slurm_logs/%j_speedup.out +#SBATCH --error=runs/slurm_logs/%j_speedup.err + +# ============================================================ +# Lyapunov Computation Speedup Benchmark +# ============================================================ +# Tests different optimization approaches: +# - Baseline: Current sequential implementation +# - Approach A: Trajectory-as-batch (P=2) +# - Approach B: Global-norm divergence + single-scale renorm +# - Approach C: torch.compile +# - Combined: All optimizations together +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "============================================================" +echo "Lyapunov Speedup Benchmark" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Run main benchmark + scaling tests +python files/experiments/lyapunov_speedup_benchmark.py \ + --batch_size 64 \ + --T 4 \ + --hidden_dims 64 128 256 \ + --scaling + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_stable_init.sbatch b/scripts/run_stable_init.sbatch new file mode 100644 index 0000000..eefe6bb --- /dev/null +++ b/scripts/run_stable_init.sbatch @@ -0,0 +1,60 @@ +#!/bin/bash +#SBATCH --job-name=snn_stable +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_stable_init.out +#SBATCH --error=runs/slurm_logs/%j_stable_init.err + +# ============================================================ +# Experiment 3: Stability-Aware Initialization +# ============================================================ +# Hypothesis: The network starts chaotic (lambda~2-3) because of +# standard Kaiming initialization. Using smaller weights from the +# start should produce more stable dynamics. +# +# Stable init strategy: +# - Scale down weights by 0.5 +# - Use orthogonal init for linear layers (preserves gradient norm) +# - Should produce lambda closer to 0 initially +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "STABLE INITIALIZATION Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.1 \ + --lambda_target -0.1 \ + --reg_type squared \ + --warmup_epochs 20 \ + --stable_init \ + --data_dir ./data \ + --out_dir runs/depth_scaling_stable_init \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_target_1.sbatch b/scripts/run_target_1.sbatch new file mode 100644 index 0000000..ce6bc7d --- /dev/null +++ b/scripts/run_target_1.sbatch @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=snn_target1 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_target1.out +#SBATCH --error=runs/slurm_logs/%j_target1.err + +# ============================================================ +# Experiment 2: Smarter Target (lambda_target=1.0) +# ============================================================ +# Hypothesis: Targeting lambda=-0.1 (edge of chaos) is too aggressive +# when the network naturally starts at lambda~2-3 (chaotic). +# +# With lambda_target=1.0: +# - We aim to REDUCE chaos, not eliminate it +# - Penalty: (lambda - 1.0)^2 is smaller when lambda~2 +# - Allows some chaos while preventing extreme instability +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "SMARTER TARGET Experiment (lambda_target=1.0)" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.1 \ + --lambda_target 1.0 \ + --reg_type squared \ + --warmup_epochs 20 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_target1 \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_weak_extreme.sbatch b/scripts/run_weak_extreme.sbatch new file mode 100644 index 0000000..49e56c7 --- /dev/null +++ b/scripts/run_weak_extreme.sbatch @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=snn_wkext +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_weak_extreme.out +#SBATCH --error=runs/slurm_logs/%j_weak_extreme.err + +# ============================================================ +# Experiment: Weak Reg + Extreme Threshold +# ============================================================ +# Combine both approaches: +# - Very weak regularization (lambda_reg=0.01) +# - Only penalize extreme chaos (lambda > 3.0) +# +# This should give the network maximum freedom to learn while +# providing a safety net against explosive dynamics. +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "WEAK REG + EXTREME Experiment" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.01 \ + --lambda_target -0.1 \ + --reg_type extreme \ + --lyap_threshold 3.0 \ + --warmup_epochs 20 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_weak_extreme \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/run_weak_reg.sbatch b/scripts/run_weak_reg.sbatch new file mode 100644 index 0000000..2444d80 --- /dev/null +++ b/scripts/run_weak_reg.sbatch @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=snn_weak_reg +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=64G +#SBATCH --time=48:00:00 +#SBATCH --output=runs/slurm_logs/%j_weak_reg.out +#SBATCH --error=runs/slurm_logs/%j_weak_reg.err + +# ============================================================ +# Experiment 1: Weaker Regularization (lambda_reg=0.01) +# ============================================================ +# Hypothesis: The current lambda_reg=0.3 is too strong, causing +# the Lyapunov penalty to dominate the CE loss and prevent learning. +# +# With lambda_reg=0.01: +# - Penalty contribution: 0.01 * 4 = 0.04 (vs CE ~4.6) +# - Network can still learn while gently being regularized +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs data + +echo "============================================================" +echo "WEAK REGULARIZATION Experiment (lambda_reg=0.01)" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "Start: $(date)" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +python files/experiments/depth_scaling_benchmark.py \ + --dataset cifar100 \ + --depths 4 8 12 16 \ + --T 4 \ + --epochs 150 \ + --batch_size 128 \ + --lr 0.001 \ + --lambda_reg 0.01 \ + --lambda_target -0.1 \ + --reg_type squared \ + --warmup_epochs 20 \ + --data_dir ./data \ + --out_dir runs/depth_scaling_weak_reg \ + --device cuda + +echo "============================================================" +echo "Finished: $(date)" +echo "============================================================" diff --git a/scripts/submit.sh b/scripts/submit.sh new file mode 100755 index 0000000..e70d685 --- /dev/null +++ b/scripts/submit.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# ============================================================ +# Helper script to submit SLURM jobs +# ============================================================ +# +# Usage: +# ./scripts/submit.sh test # Quick interactive test +# ./scripts/submit.sh synthetic # Full synthetic experiment +# ./scripts/submit.sh shd # Full SHD experiment +# ./scripts/submit.sh custom # Show custom submission example +# ============================================================ + +set -e +cd "$(dirname "$0")/.." + +case "$1" in + test) + echo "Submitting quick test job (interactive partition)..." + sbatch scripts/test_interactive.sbatch + ;; + synthetic) + echo "Submitting synthetic data experiment..." + sbatch scripts/run_depth_experiment.sbatch + ;; + shd) + echo "Submitting SHD experiment..." + sbatch scripts/run_depth_experiment_shd.sbatch + ;; + custom) + echo "Custom submission example:" + echo "" + echo " # Override parameters:" + echo " sbatch --export=EPOCHS=100,DEPTHS=\"1 2 4 8\",LAMBDA_REG=0.2 scripts/run_depth_experiment.sbatch" + echo "" + echo " # Use different partition:" + echo " sbatch --partition=gpuA100x4 scripts/run_depth_experiment.sbatch" + echo "" + echo " # Use H200 GPUs for faster training:" + echo " sbatch --partition=gpuH200x8 scripts/run_depth_experiment_shd.sbatch" + ;; + status) + echo "Your current jobs:" + squeue -u $USER + ;; + *) + echo "Usage: $0 {test|synthetic|shd|custom|status}" + echo "" + echo " test - Quick test on interactive partition (~5 min)" + echo " synthetic - Full experiment with synthetic data (~2 hours)" + echo " shd - Full experiment with SHD dataset (~4 hours)" + echo " custom - Show custom submission examples" + echo " status - Show your current jobs" + exit 1 + ;; +esac diff --git a/scripts/test_interactive.sbatch b/scripts/test_interactive.sbatch new file mode 100644 index 0000000..e056417 --- /dev/null +++ b/scripts/test_interactive.sbatch @@ -0,0 +1,102 @@ +#!/bin/bash +#SBATCH --job-name=snn_test +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4-interactive +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=16G +#SBATCH --time=00:30:00 +#SBATCH --output=runs/slurm_logs/%j_test.out +#SBATCH --error=runs/slurm_logs/%j_test.err + +# ============================================================ +# Quick Test: Verify snnTorch model and Lyapunov computation +# ============================================================ +# Use interactive partition for fast turnaround +# +# Usage: +# sbatch scripts/test_interactive.sbatch +# ============================================================ + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "============================================================" +echo "Quick Test: SNN with Lyapunov Regularization" +echo "Job ID: $SLURM_JOB_ID | Node: $SLURM_NODELIST" +echo "============================================================" +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader +echo "============================================================" + +# Test 1: Model creation and forward pass +echo "Test 1: Model and Lyapunov computation..." +python -c " +import torch +import sys +sys.path.insert(0, '.') +from files.models.snn_snntorch import LyapunovSNN +from files.analysis.stability_monitor import StabilityMonitor + +device = torch.device('cuda') +model = LyapunovSNN(input_dim=100, hidden_dims=[128, 64], num_classes=10).to(device) +x = torch.randn(8, 50, 100, device=device) + +logits, lyap, recordings = model(x, compute_lyapunov=True, record_states=True) +print(f' Logits shape: {logits.shape}') +print(f' Lyapunov exponent: {lyap.item():.4f}') +print(f' Spikes shape: {recordings[\"spikes\"].shape}') +print(' PASSED') +" + +# Test 2: Training loop +echo "" +echo "Test 2: Training loop with Lyapunov regularization..." +python -c " +import torch +import torch.nn as nn +import torch.optim as optim +import sys +sys.path.insert(0, '.') +from files.models.snn_snntorch import LyapunovSNN + +device = torch.device('cuda') +model = LyapunovSNN(input_dim=100, hidden_dims=[128, 64, 32], num_classes=10).to(device) +optimizer = optim.Adam(model.parameters(), lr=1e-3) +ce_loss = nn.CrossEntropyLoss() + +x = torch.randn(16, 50, 100, device=device) +y = torch.randint(0, 10, (16,), device=device) + +for step in range(5): + optimizer.zero_grad() + logits, lyap, _ = model(x, compute_lyapunov=True, record_states=False) + loss = ce_loss(logits, y) + 0.1 * (lyap - 0.0) ** 2 + loss.backward() + optimizer.step() + print(f' Step {step+1}: loss={loss.item():.4f}, lyap={lyap.item():.4f}') + +print(' PASSED') +" + +# Test 3: Depth comparison (minimal) +echo "" +echo "Test 3: Depth comparison (2 epochs, depths 1,2,4)..." +python files/experiments/depth_comparison.py \ + --synthetic \ + --epochs 2 \ + --depths 1 2 4 \ + --hidden_dim 64 \ + --out_dir runs/test_output \ + --device cuda \ + --no-progress + +echo "" +echo "============================================================" +echo "All tests PASSED" +echo "============================================================" diff --git a/scripts/test_optimized.sbatch b/scripts/test_optimized.sbatch new file mode 100644 index 0000000..bc86f33 --- /dev/null +++ b/scripts/test_optimized.sbatch @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH --job-name=test_opt +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA40x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=1 +#SBATCH --mem=16G +#SBATCH --time=00:10:00 +#SBATCH --output=runs/slurm_logs/%j_test_opt.out +#SBATCH --error=runs/slurm_logs/%j_test_opt.err + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/snn-training" +cd "$PROJECT_DIR" + +mkdir -p runs/slurm_logs + +echo "Testing optimized SpikingVGG forward..." +python scripts/test_optimized_forward.py + +echo "Done!" diff --git a/scripts/test_optimized_forward.py b/scripts/test_optimized_forward.py new file mode 100644 index 0000000..71f8923 --- /dev/null +++ b/scripts/test_optimized_forward.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +"""Quick test to verify the optimized SpikingVGG forward works correctly. + +Tests: +1. Forward pass without Lyapunov +2. Forward pass with Lyapunov (global renorm) +3. Backward pass with gradients +4. Multiple training steps +5. Verify global renorm produces consistent perturbation norm +""" + +import sys +sys.path.insert(0, '/projects/bfqt/users/yurenh2/ml-projects/snn-training') + +import torch +import torch.nn as nn + +from files.experiments.depth_scaling_benchmark import SpikingVGG + +def test_forward(): + """Test that forward pass works with and without Lyapunov computation.""" + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + print(f"Testing on device: {device}") + print("Using: Global delta + Global renorm (Option 1 - textbook LE)") + + # Create model + model = SpikingVGG( + in_channels=3, + num_classes=10, + base_channels=64, + num_stages=3, + blocks_per_stage=2, + T=4, + ).to(device) + + print(f"Model depth: {model.depth} conv layers") + print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}") + + # Test input + B = 8 + x = torch.randn(B, 3, 32, 32, device=device) + y = torch.randint(0, 10, (B,), device=device) + + # Test 1: Forward without Lyapunov + print("\n[Test 1] Forward without Lyapunov...") + logits, lyap, _ = model(x, compute_lyapunov=False) + assert logits.shape == (B, 10), f"Expected (B, 10), got {logits.shape}" + assert lyap is None, "Expected lyap to be None" + print(f" Logits shape: {logits.shape} ✓") + + # Test 2: Forward with Lyapunov + print("\n[Test 2] Forward with Lyapunov...") + logits, lyap, _ = model(x, compute_lyapunov=True) + assert logits.shape == (B, 10), f"Expected (B, 10), got {logits.shape}" + assert lyap is not None, "Expected lyap to be a tensor" + assert isinstance(lyap.item(), float), "Expected lyap to be a scalar" + print(f" Logits shape: {logits.shape} ✓") + print(f" Lyapunov exponent: {lyap.item():.4f} ✓") + + # Test 3: Backward pass + print("\n[Test 3] Backward pass...") + criterion = nn.CrossEntropyLoss() + loss = criterion(logits, y) + 0.3 * (lyap ** 2) + loss.backward() + + grad_norm = sum(p.grad.norm().item()**2 for p in model.parameters() if p.grad is not None)**0.5 + print(f" Loss: {loss.item():.4f} ✓") + print(f" Gradient norm: {grad_norm:.4f} ✓") + + # Test 4: Check gradients are not NaN + has_nan = any(torch.isnan(p.grad).any() for p in model.parameters() if p.grad is not None) + assert not has_nan, "Found NaN in gradients!" + print(f" No NaN gradients ✓") + + # Test 5: Multiple forward-backward passes (training simulation) + print("\n[Test 4] Multiple training steps...") + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + + for step in range(5): + optimizer.zero_grad() + x_batch = torch.randn(B, 3, 32, 32, device=device) + y_batch = torch.randint(0, 10, (B,), device=device) + + logits, lyap, _ = model(x_batch, compute_lyapunov=True) + loss = criterion(logits, y_batch) + 0.3 * (lyap ** 2) + loss.backward() + optimizer.step() + + print(f" Step {step+1}: loss={loss.item():.4f}, λ={lyap.item():.4f}") + + print("\n" + "="*50) + print("ALL TESTS PASSED!") + print("="*50) + +if __name__ == "__main__": + test_forward() |
