{ "42": { "dfa": { "log": { "train_loss": [ 2.0261129596710203, 1.965970752182007, 1.9461470681762696, 1.931545509109497, 1.9184623413085937, 1.9065282035064697, 1.902437851715088, 1.895544966506958, 1.8950473132705687, 1.889725124206543, 1.8855420764541626, 1.880997481918335, 1.8747885485839844, 1.8749551737976073, 1.873095685119629, 1.8711490287017822, 1.8692577294540405, 1.865205725440979, 1.8637552904891967, 1.8656678380966187, 1.8631304251480103, 1.8602397272491455, 1.8614968471908568, 1.860624254989624, 1.8599734017944336, 1.8587864357757569, 1.854469165649414, 1.856863537902832, 1.8563300177001953, 1.8545278675079346 ], "train_acc": [ 0.26118, 0.28972, 0.30092, 0.3046, 0.30976, 0.31652, 0.31542, 0.31824, 0.3199, 0.32334, 0.32476, 0.32794, 0.32848, 0.32864, 0.32798, 0.33074, 0.33078, 0.33602, 0.33204, 0.33176, 0.33496, 0.33462, 0.33306, 0.33528, 0.33536, 0.33594, 0.33782, 0.33676, 0.33434, 0.33972 ], "test_acc": [ 0.3121, 0.3285, 0.3222, 0.32, 0.3344, 0.3365, 0.3411, 0.3402, 0.3386, 0.3497, 0.3454, 0.3599, 0.3558, 0.3517, 0.3509, 0.3522, 0.3602, 0.3593, 0.3563, 0.3609, 0.3561, 0.3571, 0.3611, 0.3599, 0.3629, 0.3627, 0.3615, 0.3611, 0.3613, 0.3617 ] }, "diagnostics": { "bp_cosine": [ 0.4050517678260803, -0.013029102236032486, 0.03315805643796921, -0.013960793614387512 ], "perturbation_rho": [ 0.12510338425636292, -0.006976948119699955, -0.015925724059343338, 0.011312087066471577 ], "nudging": { "0.001": [ -7.806112989783287e-06, -6.705522537231445e-08, -3.022141754627228e-07, 8.172355592250824e-08 ], "0.003": [ -2.3321015760302544e-05, -2.1257437765598297e-07, -9.727664291858673e-07, 2.891756594181061e-07 ], "0.01": [ -7.774354889988899e-05, -6.752088665962219e-07, -3.1027011573314667e-06, 1.0223593562841415e-06 ] }, "hidden_norms_per_layer": [ 10956.861328125, 18143.607421875, 21060.630859375, 23016.607421875, 24491.1171875 ], "bp_grad_norms_per_layer": [ 7.272880793607328e-06, 4.709177119366359e-06, 4.678456662077224e-06, 4.567728865367826e-06, 4.537882432487095e-06 ] }, "drift": { "embed.weight": 89.9981502906406, "embed.bias": 98.48191085272376, "blocks.0.ln.weight": 0.9167992472648621, "blocks.0.w1.weight": 10.6815522190785, "blocks.0.w1.bias": 7.828161751177281, "blocks.0.w2.weight": 38.808210617134726, "blocks.1.ln.weight": 0.9728876352310181, "blocks.1.w1.weight": 10.963974441809082, "blocks.1.w1.bias": 5.055703734088147, "blocks.1.w2.weight": 35.990717171803034, "blocks.2.ln.weight": 0.9287830591201782, "blocks.2.w1.weight": 11.93235751064561, "blocks.2.w1.bias": 5.599607529376271, "blocks.2.w2.weight": 43.70271229681688, "blocks.3.ln.weight": 0.9019887447357178, "blocks.3.w1.weight": 11.73703214757272, "blocks.3.w1.bias": 4.954978948196952, "blocks.3.w2.weight": 39.60266222485616, "out_ln.weight": 0.1381552666425705, "out_head.weight": 1.6594692518127092, "out_head.bias": 1.1554215734743463 } } }, "123": { "dfa": { "log": { "train_loss": [ 2.0199886853790283, 1.9583829278564453, 1.9399597728729248, 1.9226381231689453, 1.9109827480697632, 1.9015012389755248, 1.8941318827056886, 1.890287815246582, 1.8868233013153075, 1.8811580016708374, 1.8821455502319335, 1.879702535018921, 1.8747499548721314, 1.8705888650894165, 1.8737954347991943, 1.8678274332427978, 1.861484291419983, 1.8633367358398438, 1.864215413131714, 1.8632674326324463, 1.862329735069275, 1.863224948348999, 1.8597282915878295, 1.8569441051864624, 1.8570497719955443, 1.8558143846893311, 1.8551305682373047, 1.8570148430633544, 1.8583727548217774, 1.8555032357025147 ], "train_acc": [ 0.26116, 0.29306, 0.30176, 0.30766, 0.31666, 0.32172, 0.32304, 0.32316, 0.32528, 0.32762, 0.32708, 0.32632, 0.32938, 0.33256, 0.33252, 0.3339, 0.33636, 0.33564, 0.3344, 0.33576, 0.33646, 0.33592, 0.33694, 0.33802, 0.33924, 0.33946, 0.34026, 0.33802, 0.33666, 0.34056 ], "test_acc": [ 0.3089, 0.333, 0.3314, 0.3446, 0.3445, 0.3441, 0.3434, 0.3491, 0.3456, 0.3411, 0.3524, 0.3535, 0.3493, 0.3523, 0.3599, 0.3569, 0.3643, 0.3577, 0.3539, 0.3635, 0.3616, 0.3633, 0.3622, 0.3652, 0.3634, 0.3637, 0.3622, 0.3614, 0.3636, 0.3637 ] }, "diagnostics": { "bp_cosine": [ 0.4339396357536316, -0.10015994310379028, -0.017887549474835396, 0.040999654680490494 ], "perturbation_rho": [ 0.19203181564807892, -0.038277432322502136, 0.011904019862413406, 0.005113246850669384 ], "nudging": { "0.001": [ -1.0719290003180504e-05, 1.0670628398656845e-06, 8.614733815193176e-08, -5.248002707958221e-07 ], "0.003": [ -3.2095471397042274e-05, 3.2514799386262894e-06, 3.771856427192688e-07, -1.496635377407074e-06 ], "0.01": [ -0.0001069442369043827, 1.088087446987629e-05, 1.3296958059072495e-06, -4.966510459780693e-06 ] }, "hidden_norms_per_layer": [ 11421.578125, 15917.556640625, 17952.0625, 20818.056640625, 22884.724609375 ], "bp_grad_norms_per_layer": [ 8.24415656097699e-06, 5.0456683311495e-06, 5.334758498065639e-06, 5.458482974063372e-06, 4.856112354900688e-06 ] }, "drift": { "embed.weight": 86.77573804339026, "embed.bias": 84.55079592528128, "blocks.0.ln.weight": 0.873725414276123, "blocks.0.w1.weight": 10.119984774778466, "blocks.0.w1.bias": 8.290345155770092, "blocks.0.w2.weight": 38.8465868516302, "blocks.1.ln.weight": 0.8992828726768494, "blocks.1.w1.weight": 10.725943451191046, "blocks.1.w1.bias": 6.0767567772252775, "blocks.1.w2.weight": 34.06827682802752, "blocks.2.ln.weight": 0.8814435601234436, "blocks.2.w1.weight": 11.093024893301187, "blocks.2.w1.bias": 5.85466310572377, "blocks.2.w2.weight": 39.17455488648176, "blocks.3.ln.weight": 0.864281952381134, "blocks.3.w1.weight": 11.522907264038254, "blocks.3.w1.bias": 5.830217292197888, "blocks.3.w2.weight": 44.7400774921075, "out_ln.weight": 0.14324577152729034, "out_head.weight": 1.7398795739866528, "out_head.bias": 1.603565887046315 } } }, "456": { "dfa": { "log": { "train_loss": [ 2.0270494400787356, 1.9626524370956422, 1.9387351146697998, 1.9183588446426392, 1.9022310787582397, 1.8986060205841064, 1.889561926574707, 1.8879889819335938, 1.879786368637085, 1.8794735382461547, 1.8745473431777955, 1.8756215816497803, 1.871411202316284, 1.871703953819275, 1.8679741104507446, 1.8662110556793212, 1.8646783707427979, 1.8631719284820556, 1.8607554779052735, 1.8609626446151732, 1.8584174155426025, 1.856995953140259, 1.8584875451660157, 1.8541203423309327, 1.8528442667388916, 1.8555702542114259, 1.854158247909546, 1.8552892169570923, 1.8528213509750366, 1.853795170211792 ], "train_acc": [ 0.25994, 0.28998, 0.30092, 0.30864, 0.31382, 0.31866, 0.32452, 0.32284, 0.32676, 0.32548, 0.32872, 0.32832, 0.33044, 0.32716, 0.32984, 0.33082, 0.33492, 0.33234, 0.33334, 0.3352, 0.33426, 0.33602, 0.33428, 0.33574, 0.33652, 0.33572, 0.33734, 0.33416, 0.33524, 0.33676 ], "test_acc": [ 0.303, 0.3213, 0.3218, 0.3366, 0.3324, 0.3414, 0.3426, 0.3355, 0.3523, 0.357, 0.3577, 0.3603, 0.3571, 0.3588, 0.3587, 0.3621, 0.3612, 0.3582, 0.3596, 0.3601, 0.3617, 0.3598, 0.3607, 0.3598, 0.3627, 0.3609, 0.3619, 0.3611, 0.3612, 0.3614 ] }, "diagnostics": { "bp_cosine": [ 0.42865899205207825, -0.05478518456220627, 0.038068704307079315, -0.056306466460227966 ], "perturbation_rho": [ 0.20215260982513428, 0.011766819283366203, 0.013697815127670765, -0.005670403130352497 ], "nudging": { "0.001": [ -9.712064638733864e-06, 4.707835614681244e-07, -3.8545113056898117e-07, 4.920875653624535e-07 ], "0.003": [ -2.92585464194417e-05, 1.3945391401648521e-06, -1.1923257261514664e-06, 1.539476215839386e-06 ], "0.01": [ -9.755697101354599e-05, 4.76837158203125e-06, -4.0211016312241554e-06, 5.003763362765312e-06 ] }, "hidden_norms_per_layer": [ 10800.751953125, 18185.798828125, 21568.359375, 23534.529296875, 25767.765625 ], "bp_grad_norms_per_layer": [ 8.111597708193585e-06, 4.7823082240938675e-06, 4.949825324729318e-06, 4.701300895249005e-06, 4.799476755579235e-06 ] }, "drift": { "embed.weight": 91.29796969081434, "embed.bias": 132.64457736863756, "blocks.0.ln.weight": 0.9450518488883972, "blocks.0.w1.weight": 11.023232822335661, "blocks.0.w1.bias": 7.565836772828061, "blocks.0.w2.weight": 40.72087672870546, "blocks.1.ln.weight": 0.9085477590560913, "blocks.1.w1.weight": 10.945316270784486, "blocks.1.w1.bias": 4.626647858456354, "blocks.1.w2.weight": 34.33902143396201, "blocks.2.ln.weight": 0.900463879108429, "blocks.2.w1.weight": 11.644326056575537, "blocks.2.w1.bias": 5.303598149511605, "blocks.2.w2.weight": 40.633521007369026, "blocks.3.ln.weight": 0.898414671421051, "blocks.3.w1.weight": 11.992557831950368, "blocks.3.w1.bias": 4.96772779247091, "blocks.3.w2.weight": 42.20788924018908, "out_ln.weight": 0.14031164348125458, "out_head.weight": 1.830198193189751, "out_head.bias": 1.3683500872345247 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/dfa_canonical_lam1e-4_30ep", "methods": [ "dfa" ], "random_targets": false, "penalty_lam": 0.0001, "num_classes": 10 } }