diff options
Diffstat (limited to 'results/dfa_canonical_lam1e-4_30ep/results_cifar10.json')
| -rw-r--r-- | results/dfa_canonical_lam1e-4_30ep/results_cifar10.json | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json b/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json new file mode 100644 index 0000000..c5480c9 --- /dev/null +++ b/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json @@ -0,0 +1,549 @@ +{ + "42": { + "dfa": { + "log": { + "train_loss": [ + 2.0261129596710203, + 1.965970752182007, + 1.9461470681762696, + 1.931545509109497, + 1.9184623413085937, + 1.9065282035064697, + 1.902437851715088, + 1.895544966506958, + 1.8950473132705687, + 1.889725124206543, + 1.8855420764541626, + 1.880997481918335, + 1.8747885485839844, + 1.8749551737976073, + 1.873095685119629, + 1.8711490287017822, + 1.8692577294540405, + 1.865205725440979, + 1.8637552904891967, + 1.8656678380966187, + 1.8631304251480103, + 1.8602397272491455, + 1.8614968471908568, + 1.860624254989624, + 1.8599734017944336, + 1.8587864357757569, + 1.854469165649414, + 1.856863537902832, + 1.8563300177001953, + 1.8545278675079346 + ], + "train_acc": [ + 0.26118, + 0.28972, + 0.30092, + 0.3046, + 0.30976, + 0.31652, + 0.31542, + 0.31824, + 0.3199, + 0.32334, + 0.32476, + 0.32794, + 0.32848, + 0.32864, + 0.32798, + 0.33074, + 0.33078, + 0.33602, + 0.33204, + 0.33176, + 0.33496, + 0.33462, + 0.33306, + 0.33528, + 0.33536, + 0.33594, + 0.33782, + 0.33676, + 0.33434, + 0.33972 + ], + "test_acc": [ + 0.3121, + 0.3285, + 0.3222, + 0.32, + 0.3344, + 0.3365, + 0.3411, + 0.3402, + 0.3386, + 0.3497, + 0.3454, + 0.3599, + 0.3558, + 0.3517, + 0.3509, + 0.3522, + 0.3602, + 0.3593, + 0.3563, + 0.3609, + 0.3561, + 0.3571, + 0.3611, + 0.3599, + 0.3629, + 0.3627, + 0.3615, + 0.3611, + 0.3613, + 0.3617 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4050517678260803, + -0.013029102236032486, + 0.03315805643796921, + -0.013960793614387512 + ], + "perturbation_rho": [ + 0.12510338425636292, + -0.006976948119699955, + -0.015925724059343338, + 0.011312087066471577 + ], + "nudging": { + "0.001": [ + -7.806112989783287e-06, + -6.705522537231445e-08, + -3.022141754627228e-07, + 8.172355592250824e-08 + ], + "0.003": [ + -2.3321015760302544e-05, + -2.1257437765598297e-07, + -9.727664291858673e-07, + 2.891756594181061e-07 + ], + "0.01": [ + -7.774354889988899e-05, + -6.752088665962219e-07, + -3.1027011573314667e-06, + 1.0223593562841415e-06 + ] + }, + "hidden_norms_per_layer": [ + 10956.861328125, + 18143.607421875, + 21060.630859375, + 23016.607421875, + 24491.1171875 + ], + "bp_grad_norms_per_layer": [ + 7.272880793607328e-06, + 4.709177119366359e-06, + 4.678456662077224e-06, + 4.567728865367826e-06, + 4.537882432487095e-06 + ] + }, + "drift": { + "embed.weight": 89.9981502906406, + "embed.bias": 98.48191085272376, + "blocks.0.ln.weight": 0.9167992472648621, + "blocks.0.w1.weight": 10.6815522190785, + "blocks.0.w1.bias": 7.828161751177281, + "blocks.0.w2.weight": 38.808210617134726, + "blocks.1.ln.weight": 0.9728876352310181, + "blocks.1.w1.weight": 10.963974441809082, + "blocks.1.w1.bias": 5.055703734088147, + "blocks.1.w2.weight": 35.990717171803034, + "blocks.2.ln.weight": 0.9287830591201782, + "blocks.2.w1.weight": 11.93235751064561, + "blocks.2.w1.bias": 5.599607529376271, + "blocks.2.w2.weight": 43.70271229681688, + "blocks.3.ln.weight": 0.9019887447357178, + "blocks.3.w1.weight": 11.73703214757272, + "blocks.3.w1.bias": 4.954978948196952, + "blocks.3.w2.weight": 39.60266222485616, + "out_ln.weight": 0.1381552666425705, + "out_head.weight": 1.6594692518127092, + "out_head.bias": 1.1554215734743463 + } + } + }, + "123": { + "dfa": { + "log": { + "train_loss": [ + 2.0199886853790283, + 1.9583829278564453, + 1.9399597728729248, + 1.9226381231689453, + 1.9109827480697632, + 1.9015012389755248, + 1.8941318827056886, + 1.890287815246582, + 1.8868233013153075, + 1.8811580016708374, + 1.8821455502319335, + 1.879702535018921, + 1.8747499548721314, + 1.8705888650894165, + 1.8737954347991943, + 1.8678274332427978, + 1.861484291419983, + 1.8633367358398438, + 1.864215413131714, + 1.8632674326324463, + 1.862329735069275, + 1.863224948348999, + 1.8597282915878295, + 1.8569441051864624, + 1.8570497719955443, + 1.8558143846893311, + 1.8551305682373047, + 1.8570148430633544, + 1.8583727548217774, + 1.8555032357025147 + ], + "train_acc": [ + 0.26116, + 0.29306, + 0.30176, + 0.30766, + 0.31666, + 0.32172, + 0.32304, + 0.32316, + 0.32528, + 0.32762, + 0.32708, + 0.32632, + 0.32938, + 0.33256, + 0.33252, + 0.3339, + 0.33636, + 0.33564, + 0.3344, + 0.33576, + 0.33646, + 0.33592, + 0.33694, + 0.33802, + 0.33924, + 0.33946, + 0.34026, + 0.33802, + 0.33666, + 0.34056 + ], + "test_acc": [ + 0.3089, + 0.333, + 0.3314, + 0.3446, + 0.3445, + 0.3441, + 0.3434, + 0.3491, + 0.3456, + 0.3411, + 0.3524, + 0.3535, + 0.3493, + 0.3523, + 0.3599, + 0.3569, + 0.3643, + 0.3577, + 0.3539, + 0.3635, + 0.3616, + 0.3633, + 0.3622, + 0.3652, + 0.3634, + 0.3637, + 0.3622, + 0.3614, + 0.3636, + 0.3637 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4339396357536316, + -0.10015994310379028, + -0.017887549474835396, + 0.040999654680490494 + ], + "perturbation_rho": [ + 0.19203181564807892, + -0.038277432322502136, + 0.011904019862413406, + 0.005113246850669384 + ], + "nudging": { + "0.001": [ + -1.0719290003180504e-05, + 1.0670628398656845e-06, + 8.614733815193176e-08, + -5.248002707958221e-07 + ], + "0.003": [ + -3.2095471397042274e-05, + 3.2514799386262894e-06, + 3.771856427192688e-07, + -1.496635377407074e-06 + ], + "0.01": [ + -0.0001069442369043827, + 1.088087446987629e-05, + 1.3296958059072495e-06, + -4.966510459780693e-06 + ] + }, + "hidden_norms_per_layer": [ + 11421.578125, + 15917.556640625, + 17952.0625, + 20818.056640625, + 22884.724609375 + ], + "bp_grad_norms_per_layer": [ + 8.24415656097699e-06, + 5.0456683311495e-06, + 5.334758498065639e-06, + 5.458482974063372e-06, + 4.856112354900688e-06 + ] + }, + "drift": { + "embed.weight": 86.77573804339026, + "embed.bias": 84.55079592528128, + "blocks.0.ln.weight": 0.873725414276123, + "blocks.0.w1.weight": 10.119984774778466, + "blocks.0.w1.bias": 8.290345155770092, + "blocks.0.w2.weight": 38.8465868516302, + "blocks.1.ln.weight": 0.8992828726768494, + "blocks.1.w1.weight": 10.725943451191046, + "blocks.1.w1.bias": 6.0767567772252775, + "blocks.1.w2.weight": 34.06827682802752, + "blocks.2.ln.weight": 0.8814435601234436, + "blocks.2.w1.weight": 11.093024893301187, + "blocks.2.w1.bias": 5.85466310572377, + "blocks.2.w2.weight": 39.17455488648176, + "blocks.3.ln.weight": 0.864281952381134, + "blocks.3.w1.weight": 11.522907264038254, + "blocks.3.w1.bias": 5.830217292197888, + "blocks.3.w2.weight": 44.7400774921075, + "out_ln.weight": 0.14324577152729034, + "out_head.weight": 1.7398795739866528, + "out_head.bias": 1.603565887046315 + } + } + }, + "456": { + "dfa": { + "log": { + "train_loss": [ + 2.0270494400787356, + 1.9626524370956422, + 1.9387351146697998, + 1.9183588446426392, + 1.9022310787582397, + 1.8986060205841064, + 1.889561926574707, + 1.8879889819335938, + 1.879786368637085, + 1.8794735382461547, + 1.8745473431777955, + 1.8756215816497803, + 1.871411202316284, + 1.871703953819275, + 1.8679741104507446, + 1.8662110556793212, + 1.8646783707427979, + 1.8631719284820556, + 1.8607554779052735, + 1.8609626446151732, + 1.8584174155426025, + 1.856995953140259, + 1.8584875451660157, + 1.8541203423309327, + 1.8528442667388916, + 1.8555702542114259, + 1.854158247909546, + 1.8552892169570923, + 1.8528213509750366, + 1.853795170211792 + ], + "train_acc": [ + 0.25994, + 0.28998, + 0.30092, + 0.30864, + 0.31382, + 0.31866, + 0.32452, + 0.32284, + 0.32676, + 0.32548, + 0.32872, + 0.32832, + 0.33044, + 0.32716, + 0.32984, + 0.33082, + 0.33492, + 0.33234, + 0.33334, + 0.3352, + 0.33426, + 0.33602, + 0.33428, + 0.33574, + 0.33652, + 0.33572, + 0.33734, + 0.33416, + 0.33524, + 0.33676 + ], + "test_acc": [ + 0.303, + 0.3213, + 0.3218, + 0.3366, + 0.3324, + 0.3414, + 0.3426, + 0.3355, + 0.3523, + 0.357, + 0.3577, + 0.3603, + 0.3571, + 0.3588, + 0.3587, + 0.3621, + 0.3612, + 0.3582, + 0.3596, + 0.3601, + 0.3617, + 0.3598, + 0.3607, + 0.3598, + 0.3627, + 0.3609, + 0.3619, + 0.3611, + 0.3612, + 0.3614 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.42865899205207825, + -0.05478518456220627, + 0.038068704307079315, + -0.056306466460227966 + ], + "perturbation_rho": [ + 0.20215260982513428, + 0.011766819283366203, + 0.013697815127670765, + -0.005670403130352497 + ], + "nudging": { + "0.001": [ + -9.712064638733864e-06, + 4.707835614681244e-07, + -3.8545113056898117e-07, + 4.920875653624535e-07 + ], + "0.003": [ + -2.92585464194417e-05, + 1.3945391401648521e-06, + -1.1923257261514664e-06, + 1.539476215839386e-06 + ], + "0.01": [ + -9.755697101354599e-05, + 4.76837158203125e-06, + -4.0211016312241554e-06, + 5.003763362765312e-06 + ] + }, + "hidden_norms_per_layer": [ + 10800.751953125, + 18185.798828125, + 21568.359375, + 23534.529296875, + 25767.765625 + ], + "bp_grad_norms_per_layer": [ + 8.111597708193585e-06, + 4.7823082240938675e-06, + 4.949825324729318e-06, + 4.701300895249005e-06, + 4.799476755579235e-06 + ] + }, + "drift": { + "embed.weight": 91.29796969081434, + "embed.bias": 132.64457736863756, + "blocks.0.ln.weight": 0.9450518488883972, + "blocks.0.w1.weight": 11.023232822335661, + "blocks.0.w1.bias": 7.565836772828061, + "blocks.0.w2.weight": 40.72087672870546, + "blocks.1.ln.weight": 0.9085477590560913, + "blocks.1.w1.weight": 10.945316270784486, + "blocks.1.w1.bias": 4.626647858456354, + "blocks.1.w2.weight": 34.33902143396201, + "blocks.2.ln.weight": 0.900463879108429, + "blocks.2.w1.weight": 11.644326056575537, + "blocks.2.w1.bias": 5.303598149511605, + "blocks.2.w2.weight": 40.633521007369026, + "blocks.3.ln.weight": 0.898414671421051, + "blocks.3.w1.weight": 11.992557831950368, + "blocks.3.w1.bias": 4.96772779247091, + "blocks.3.w2.weight": 42.20788924018908, + "out_ln.weight": 0.14031164348125458, + "out_head.weight": 1.830198193189751, + "out_head.bias": 1.3683500872345247 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 30, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/dfa_canonical_lam1e-4_30ep", + "methods": [ + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0001, + "num_classes": 10 + } +}
\ No newline at end of file |
