summaryrefslogtreecommitdiff
path: root/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json
diff options
context:
space:
mode:
Diffstat (limited to 'results/dfa_canonical_lam1e-4_30ep/results_cifar10.json')
-rw-r--r--results/dfa_canonical_lam1e-4_30ep/results_cifar10.json549
1 files changed, 549 insertions, 0 deletions
diff --git a/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json b/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json
new file mode 100644
index 0000000..c5480c9
--- /dev/null
+++ b/results/dfa_canonical_lam1e-4_30ep/results_cifar10.json
@@ -0,0 +1,549 @@
+{
+ "42": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0261129596710203,
+ 1.965970752182007,
+ 1.9461470681762696,
+ 1.931545509109497,
+ 1.9184623413085937,
+ 1.9065282035064697,
+ 1.902437851715088,
+ 1.895544966506958,
+ 1.8950473132705687,
+ 1.889725124206543,
+ 1.8855420764541626,
+ 1.880997481918335,
+ 1.8747885485839844,
+ 1.8749551737976073,
+ 1.873095685119629,
+ 1.8711490287017822,
+ 1.8692577294540405,
+ 1.865205725440979,
+ 1.8637552904891967,
+ 1.8656678380966187,
+ 1.8631304251480103,
+ 1.8602397272491455,
+ 1.8614968471908568,
+ 1.860624254989624,
+ 1.8599734017944336,
+ 1.8587864357757569,
+ 1.854469165649414,
+ 1.856863537902832,
+ 1.8563300177001953,
+ 1.8545278675079346
+ ],
+ "train_acc": [
+ 0.26118,
+ 0.28972,
+ 0.30092,
+ 0.3046,
+ 0.30976,
+ 0.31652,
+ 0.31542,
+ 0.31824,
+ 0.3199,
+ 0.32334,
+ 0.32476,
+ 0.32794,
+ 0.32848,
+ 0.32864,
+ 0.32798,
+ 0.33074,
+ 0.33078,
+ 0.33602,
+ 0.33204,
+ 0.33176,
+ 0.33496,
+ 0.33462,
+ 0.33306,
+ 0.33528,
+ 0.33536,
+ 0.33594,
+ 0.33782,
+ 0.33676,
+ 0.33434,
+ 0.33972
+ ],
+ "test_acc": [
+ 0.3121,
+ 0.3285,
+ 0.3222,
+ 0.32,
+ 0.3344,
+ 0.3365,
+ 0.3411,
+ 0.3402,
+ 0.3386,
+ 0.3497,
+ 0.3454,
+ 0.3599,
+ 0.3558,
+ 0.3517,
+ 0.3509,
+ 0.3522,
+ 0.3602,
+ 0.3593,
+ 0.3563,
+ 0.3609,
+ 0.3561,
+ 0.3571,
+ 0.3611,
+ 0.3599,
+ 0.3629,
+ 0.3627,
+ 0.3615,
+ 0.3611,
+ 0.3613,
+ 0.3617
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4050517678260803,
+ -0.013029102236032486,
+ 0.03315805643796921,
+ -0.013960793614387512
+ ],
+ "perturbation_rho": [
+ 0.12510338425636292,
+ -0.006976948119699955,
+ -0.015925724059343338,
+ 0.011312087066471577
+ ],
+ "nudging": {
+ "0.001": [
+ -7.806112989783287e-06,
+ -6.705522537231445e-08,
+ -3.022141754627228e-07,
+ 8.172355592250824e-08
+ ],
+ "0.003": [
+ -2.3321015760302544e-05,
+ -2.1257437765598297e-07,
+ -9.727664291858673e-07,
+ 2.891756594181061e-07
+ ],
+ "0.01": [
+ -7.774354889988899e-05,
+ -6.752088665962219e-07,
+ -3.1027011573314667e-06,
+ 1.0223593562841415e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 10956.861328125,
+ 18143.607421875,
+ 21060.630859375,
+ 23016.607421875,
+ 24491.1171875
+ ],
+ "bp_grad_norms_per_layer": [
+ 7.272880793607328e-06,
+ 4.709177119366359e-06,
+ 4.678456662077224e-06,
+ 4.567728865367826e-06,
+ 4.537882432487095e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 89.9981502906406,
+ "embed.bias": 98.48191085272376,
+ "blocks.0.ln.weight": 0.9167992472648621,
+ "blocks.0.w1.weight": 10.6815522190785,
+ "blocks.0.w1.bias": 7.828161751177281,
+ "blocks.0.w2.weight": 38.808210617134726,
+ "blocks.1.ln.weight": 0.9728876352310181,
+ "blocks.1.w1.weight": 10.963974441809082,
+ "blocks.1.w1.bias": 5.055703734088147,
+ "blocks.1.w2.weight": 35.990717171803034,
+ "blocks.2.ln.weight": 0.9287830591201782,
+ "blocks.2.w1.weight": 11.93235751064561,
+ "blocks.2.w1.bias": 5.599607529376271,
+ "blocks.2.w2.weight": 43.70271229681688,
+ "blocks.3.ln.weight": 0.9019887447357178,
+ "blocks.3.w1.weight": 11.73703214757272,
+ "blocks.3.w1.bias": 4.954978948196952,
+ "blocks.3.w2.weight": 39.60266222485616,
+ "out_ln.weight": 0.1381552666425705,
+ "out_head.weight": 1.6594692518127092,
+ "out_head.bias": 1.1554215734743463
+ }
+ }
+ },
+ "123": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0199886853790283,
+ 1.9583829278564453,
+ 1.9399597728729248,
+ 1.9226381231689453,
+ 1.9109827480697632,
+ 1.9015012389755248,
+ 1.8941318827056886,
+ 1.890287815246582,
+ 1.8868233013153075,
+ 1.8811580016708374,
+ 1.8821455502319335,
+ 1.879702535018921,
+ 1.8747499548721314,
+ 1.8705888650894165,
+ 1.8737954347991943,
+ 1.8678274332427978,
+ 1.861484291419983,
+ 1.8633367358398438,
+ 1.864215413131714,
+ 1.8632674326324463,
+ 1.862329735069275,
+ 1.863224948348999,
+ 1.8597282915878295,
+ 1.8569441051864624,
+ 1.8570497719955443,
+ 1.8558143846893311,
+ 1.8551305682373047,
+ 1.8570148430633544,
+ 1.8583727548217774,
+ 1.8555032357025147
+ ],
+ "train_acc": [
+ 0.26116,
+ 0.29306,
+ 0.30176,
+ 0.30766,
+ 0.31666,
+ 0.32172,
+ 0.32304,
+ 0.32316,
+ 0.32528,
+ 0.32762,
+ 0.32708,
+ 0.32632,
+ 0.32938,
+ 0.33256,
+ 0.33252,
+ 0.3339,
+ 0.33636,
+ 0.33564,
+ 0.3344,
+ 0.33576,
+ 0.33646,
+ 0.33592,
+ 0.33694,
+ 0.33802,
+ 0.33924,
+ 0.33946,
+ 0.34026,
+ 0.33802,
+ 0.33666,
+ 0.34056
+ ],
+ "test_acc": [
+ 0.3089,
+ 0.333,
+ 0.3314,
+ 0.3446,
+ 0.3445,
+ 0.3441,
+ 0.3434,
+ 0.3491,
+ 0.3456,
+ 0.3411,
+ 0.3524,
+ 0.3535,
+ 0.3493,
+ 0.3523,
+ 0.3599,
+ 0.3569,
+ 0.3643,
+ 0.3577,
+ 0.3539,
+ 0.3635,
+ 0.3616,
+ 0.3633,
+ 0.3622,
+ 0.3652,
+ 0.3634,
+ 0.3637,
+ 0.3622,
+ 0.3614,
+ 0.3636,
+ 0.3637
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4339396357536316,
+ -0.10015994310379028,
+ -0.017887549474835396,
+ 0.040999654680490494
+ ],
+ "perturbation_rho": [
+ 0.19203181564807892,
+ -0.038277432322502136,
+ 0.011904019862413406,
+ 0.005113246850669384
+ ],
+ "nudging": {
+ "0.001": [
+ -1.0719290003180504e-05,
+ 1.0670628398656845e-06,
+ 8.614733815193176e-08,
+ -5.248002707958221e-07
+ ],
+ "0.003": [
+ -3.2095471397042274e-05,
+ 3.2514799386262894e-06,
+ 3.771856427192688e-07,
+ -1.496635377407074e-06
+ ],
+ "0.01": [
+ -0.0001069442369043827,
+ 1.088087446987629e-05,
+ 1.3296958059072495e-06,
+ -4.966510459780693e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 11421.578125,
+ 15917.556640625,
+ 17952.0625,
+ 20818.056640625,
+ 22884.724609375
+ ],
+ "bp_grad_norms_per_layer": [
+ 8.24415656097699e-06,
+ 5.0456683311495e-06,
+ 5.334758498065639e-06,
+ 5.458482974063372e-06,
+ 4.856112354900688e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 86.77573804339026,
+ "embed.bias": 84.55079592528128,
+ "blocks.0.ln.weight": 0.873725414276123,
+ "blocks.0.w1.weight": 10.119984774778466,
+ "blocks.0.w1.bias": 8.290345155770092,
+ "blocks.0.w2.weight": 38.8465868516302,
+ "blocks.1.ln.weight": 0.8992828726768494,
+ "blocks.1.w1.weight": 10.725943451191046,
+ "blocks.1.w1.bias": 6.0767567772252775,
+ "blocks.1.w2.weight": 34.06827682802752,
+ "blocks.2.ln.weight": 0.8814435601234436,
+ "blocks.2.w1.weight": 11.093024893301187,
+ "blocks.2.w1.bias": 5.85466310572377,
+ "blocks.2.w2.weight": 39.17455488648176,
+ "blocks.3.ln.weight": 0.864281952381134,
+ "blocks.3.w1.weight": 11.522907264038254,
+ "blocks.3.w1.bias": 5.830217292197888,
+ "blocks.3.w2.weight": 44.7400774921075,
+ "out_ln.weight": 0.14324577152729034,
+ "out_head.weight": 1.7398795739866528,
+ "out_head.bias": 1.603565887046315
+ }
+ }
+ },
+ "456": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0270494400787356,
+ 1.9626524370956422,
+ 1.9387351146697998,
+ 1.9183588446426392,
+ 1.9022310787582397,
+ 1.8986060205841064,
+ 1.889561926574707,
+ 1.8879889819335938,
+ 1.879786368637085,
+ 1.8794735382461547,
+ 1.8745473431777955,
+ 1.8756215816497803,
+ 1.871411202316284,
+ 1.871703953819275,
+ 1.8679741104507446,
+ 1.8662110556793212,
+ 1.8646783707427979,
+ 1.8631719284820556,
+ 1.8607554779052735,
+ 1.8609626446151732,
+ 1.8584174155426025,
+ 1.856995953140259,
+ 1.8584875451660157,
+ 1.8541203423309327,
+ 1.8528442667388916,
+ 1.8555702542114259,
+ 1.854158247909546,
+ 1.8552892169570923,
+ 1.8528213509750366,
+ 1.853795170211792
+ ],
+ "train_acc": [
+ 0.25994,
+ 0.28998,
+ 0.30092,
+ 0.30864,
+ 0.31382,
+ 0.31866,
+ 0.32452,
+ 0.32284,
+ 0.32676,
+ 0.32548,
+ 0.32872,
+ 0.32832,
+ 0.33044,
+ 0.32716,
+ 0.32984,
+ 0.33082,
+ 0.33492,
+ 0.33234,
+ 0.33334,
+ 0.3352,
+ 0.33426,
+ 0.33602,
+ 0.33428,
+ 0.33574,
+ 0.33652,
+ 0.33572,
+ 0.33734,
+ 0.33416,
+ 0.33524,
+ 0.33676
+ ],
+ "test_acc": [
+ 0.303,
+ 0.3213,
+ 0.3218,
+ 0.3366,
+ 0.3324,
+ 0.3414,
+ 0.3426,
+ 0.3355,
+ 0.3523,
+ 0.357,
+ 0.3577,
+ 0.3603,
+ 0.3571,
+ 0.3588,
+ 0.3587,
+ 0.3621,
+ 0.3612,
+ 0.3582,
+ 0.3596,
+ 0.3601,
+ 0.3617,
+ 0.3598,
+ 0.3607,
+ 0.3598,
+ 0.3627,
+ 0.3609,
+ 0.3619,
+ 0.3611,
+ 0.3612,
+ 0.3614
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.42865899205207825,
+ -0.05478518456220627,
+ 0.038068704307079315,
+ -0.056306466460227966
+ ],
+ "perturbation_rho": [
+ 0.20215260982513428,
+ 0.011766819283366203,
+ 0.013697815127670765,
+ -0.005670403130352497
+ ],
+ "nudging": {
+ "0.001": [
+ -9.712064638733864e-06,
+ 4.707835614681244e-07,
+ -3.8545113056898117e-07,
+ 4.920875653624535e-07
+ ],
+ "0.003": [
+ -2.92585464194417e-05,
+ 1.3945391401648521e-06,
+ -1.1923257261514664e-06,
+ 1.539476215839386e-06
+ ],
+ "0.01": [
+ -9.755697101354599e-05,
+ 4.76837158203125e-06,
+ -4.0211016312241554e-06,
+ 5.003763362765312e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 10800.751953125,
+ 18185.798828125,
+ 21568.359375,
+ 23534.529296875,
+ 25767.765625
+ ],
+ "bp_grad_norms_per_layer": [
+ 8.111597708193585e-06,
+ 4.7823082240938675e-06,
+ 4.949825324729318e-06,
+ 4.701300895249005e-06,
+ 4.799476755579235e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 91.29796969081434,
+ "embed.bias": 132.64457736863756,
+ "blocks.0.ln.weight": 0.9450518488883972,
+ "blocks.0.w1.weight": 11.023232822335661,
+ "blocks.0.w1.bias": 7.565836772828061,
+ "blocks.0.w2.weight": 40.72087672870546,
+ "blocks.1.ln.weight": 0.9085477590560913,
+ "blocks.1.w1.weight": 10.945316270784486,
+ "blocks.1.w1.bias": 4.626647858456354,
+ "blocks.1.w2.weight": 34.33902143396201,
+ "blocks.2.ln.weight": 0.900463879108429,
+ "blocks.2.w1.weight": 11.644326056575537,
+ "blocks.2.w1.bias": 5.303598149511605,
+ "blocks.2.w2.weight": 40.633521007369026,
+ "blocks.3.ln.weight": 0.898414671421051,
+ "blocks.3.w1.weight": 11.992557831950368,
+ "blocks.3.w1.bias": 4.96772779247091,
+ "blocks.3.w2.weight": 42.20788924018908,
+ "out_ln.weight": 0.14031164348125458,
+ "out_head.weight": 1.830198193189751,
+ "out_head.bias": 1.3683500872345247
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 256,
+ "num_blocks": 4,
+ "batch_size": 128,
+ "epochs": 30,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ],
+ "gpu": 0,
+ "output_dir": "results/dfa_canonical_lam1e-4_30ep",
+ "methods": [
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0001,
+ "num_classes": 10
+ }
+} \ No newline at end of file