1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
name: recursive_reasoning.transformers_baseline@Model_ACTV2 loss: name: losses@ACTLossHead loss_type: stablemax_cross_entropy halt_exploration_prob: 0.1 halt_max_steps: 16 H_cycles: 1 # kept for compatibility H_layers: 8 hidden_size: 512 num_heads: 12 expansion: 4 puzzle_emb_ndim: ${.hidden_size} pos_encodings: rope