From 93d77b197d457b1fdfa7341ecd59fc460b20d6b1 Mon Sep 17 00:00:00 2001
From: YurenHao0426 <blackhao0426@gmail.com>
Date: Mon, 9 Feb 2026 11:23:15 -0600
Subject: =?UTF-8?q?Fix=20init=20state:=20add=20logit=5Fbias=20so=20A?=
 =?UTF-8?q?=E2=89=881=20at=20init=20(dense=20connectivity)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add learnable logit_bias=15.0 to PredictorMLP, so σ(15/τ_init) ≈ 0.95
  at init, reproducing dense connectivity instead of random A≈0.25
- Fix dtype mismatch: cast A to model dtype (bfloat16) in DAGFormerOLMo.forward
- Fix YAML lr parsing: add type coercion in TrainConfig.from_yaml
- Fix device mismatch: call self.to(device) in StructurePredictor.__init__
- Add python -u for unbuffered SLURM output, TOKENIZERS_PARALLELISM=false
- Delete stale eval_cache.pt (built with buggy MLP input code)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 scripts/slurm_train.sh | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'scripts')

diff --git a/scripts/slurm_train.sh b/scripts/slurm_train.sh
index 6b283ea..e1df687 100644
--- a/scripts/slurm_train.sh
+++ b/scripts/slurm_train.sh
@@ -1,18 +1,9 @@
 #!/bin/bash
-#SBATCH --partition=gpuA40x4
-#SBATCH --account=bfqt-delta-gpu
-#SBATCH --nodes=1
-#SBATCH --gpus-per-node=1
-#SBATCH --time=02:00:00
-#SBATCH --mem=64g
-#SBATCH --job-name=dagformer-sanity
-#SBATCH --output=logs/sanity_%j.out
-#SBATCH --error=logs/sanity_%j.err
-
 export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache
 export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/transformers
 export HF_HUB_CACHE=/projects/bfqt/users/yurenh2/hf_cache/hub
 export HF_DATASETS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/datasets
+export TOKENIZERS_PARALLELISM=false
 
 export PYTHONPATH=/projects/bfqt/users/yurenh2/ml-projects/DAGFormer:$PYTHONPATH
 export PATH=$HOME/.local/bin:$PATH
@@ -27,4 +18,4 @@ echo "GPU: $(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader)"
 echo ""
 
 echo "=== Starting training ==="
-python3 scripts/train.py --config configs/sanity_check.yaml
+python3 -u scripts/train.py --config configs/sanity_check.yaml
-- 
cgit v1.2.3