{ "42": { "state_bridge": { "log": { "train_loss": [ 2.0474753201293945, 1.916932412071228, 1.8463650513076781, 1.8093244620132447, 1.7900167614746094, 1.770258097190857, 1.7575064269256593, 1.745316464805603, 1.7256561163330078, 1.710018780593872, 1.6960305325698852, 1.6879396067047119, 1.6709380169677734, 1.6666312908172607, 1.6575450637435913, 1.6553003553009034, 1.6458853662872315, 1.6390605908966065, 1.633693081512451, 1.6213745572280884, 1.6122850603866576, 1.6089301138305665, 1.5997764694976806, 1.6013678260421753, 1.6002555493545532, 1.5937991617202758, 1.5923587594223023, 1.5893851773071288, 1.5910608068466185, 1.5901478890609742 ], "train_acc": [ 0.24662, 0.30632, 0.34066, 0.35354, 0.3627, 0.37132, 0.37842, 0.38224, 0.39068, 0.39572, 0.39934, 0.40442, 0.41028, 0.41562, 0.4161, 0.41766, 0.41936, 0.42446, 0.42536, 0.43068, 0.43248, 0.43428, 0.43778, 0.44052, 0.43932, 0.4404, 0.44258, 0.44194, 0.44336, 0.44466 ], "test_acc": [ 0.273, 0.3524, 0.3676, 0.3756, 0.3906, 0.3879, 0.3819, 0.3966, 0.4068, 0.4162, 0.4183, 0.4298, 0.4321, 0.4389, 0.4217, 0.4379, 0.4363, 0.4406, 0.4425, 0.4463, 0.4538, 0.4566, 0.4496, 0.4453, 0.4486, 0.4567, 0.4575, 0.4563, 0.4565, 0.4564 ], "state_pred_error": [ 0.24535170847892762, 0.1636493504667282, 0.13540883115291596, 0.12653046741724014, 0.12561693742990493, 0.11761465485811233, 0.10871879986047744, 0.10335472867965698, 0.09837688980817795, 0.091632712829113, 0.0877949278664589, 0.08455404979228974, 0.08431078619241715, 0.07790626471281052, 0.0775115798163414, 0.07608327221155167, 0.07628034708738327, 0.07240497077941895, 0.07071494008302688, 0.06996692018032075, 0.06466230425357819, 0.0624802718091011, 0.058281462930440904, 0.055862715678215026, 0.05389112324714661, 0.05099856609106064, 0.04935575147509575, 0.04708607808470726, 0.04531161543607712, 0.04388566305398941 ] }, "diagnostics": { "bp_cosine": [ 0.42546752095222473, 0.36250773072242737, 0.31346678733825684, 0.2606384754180908 ], "perturbation_rho": [ 0.4038287401199341, 0.44145163893699646, 0.40815383195877075, 0.3253956735134125 ], "nudging": { "0.001": [ -0.0003594870213419199, -0.00023786397650837898, -0.0001701684668660164, -0.00012595904991030693 ], "0.003": [ -0.0010785695631057024, -0.0007136135827749968, -0.0005104630254209042, -0.00037785875611007214 ], "0.01": [ -0.0035966814029961824, -0.0023783869110047817, -0.0017011994495987892, -0.0012592736165970564 ] }, "hidden_norms_per_layer": [ 243.4176788330078, 251.25033569335938, 264.15234375, 281.2754821777344, 301.9956359863281 ], "bp_grad_norms_per_layer": [ 0.00025976746110245585, 0.00023080054961610585, 0.0002087712928187102, 0.0001913418382173404, 0.00017467232828494161 ] }, "drift": { "embed.weight": 7.101831683055874, "embed.bias": 26.55614727350677, "blocks.0.ln.weight": 0.13280124962329865, "blocks.0.w1.weight": 3.0818043823960677, "blocks.0.w1.bias": 6.322237223125591, "blocks.0.w2.weight": 14.259096230097354, "blocks.1.ln.weight": 0.11403189599514008, "blocks.1.w1.weight": 3.033562757580805, "blocks.1.w1.bias": 6.850830368282527, "blocks.1.w2.weight": 13.651065232090788, "blocks.2.ln.weight": 0.11589953303337097, "blocks.2.w1.weight": 3.2263993788002137, "blocks.2.w1.bias": 6.9524991530621545, "blocks.2.w2.weight": 14.671537599689291, "blocks.3.ln.weight": 0.12724463641643524, "blocks.3.w1.weight": 3.3413554165823025, "blocks.3.w1.bias": 7.662419316475547, "blocks.3.w2.weight": 14.679376824812177, "out_ln.weight": 0.21895238757133484, "out_head.weight": 1.1940135791700324, "out_head.bias": 2.222106740649061 } }, "credit_bridge": { "log": { "train_loss": [ 1.9963660776519776, 1.9372394968414306, 1.9241013562393188, 1.9278872109222411, 1.9258392727661133, 1.9213631185913085, 1.93245830078125, 1.9319513151550294, 1.9178282093048096, 1.9111720166015624, 1.9015198223876952, 1.8905008841705322, 1.8903594969940185, 1.8827447821807861, 1.8827315840911865, 1.8831548281478883, 1.8796609592437745, 1.8760715311050415, 1.874497896347046, 1.8719314217758178, 1.8737228869628906, 1.8702389026260375, 1.869253684387207, 1.8723084658813476, 1.8726538021087646, 1.866847327041626, 1.8720124649047851, 1.8690419888305665, 1.8700805447769164, 1.8736286736679078 ], "train_acc": [ 0.2772, 0.30878, 0.3143, 0.31462, 0.31578, 0.31758, 0.31572, 0.31458, 0.31894, 0.31912, 0.3223, 0.32764, 0.32844, 0.33476, 0.3338, 0.33586, 0.33744, 0.3355, 0.33672, 0.33632, 0.33976, 0.33918, 0.34024, 0.33654, 0.33996, 0.34146, 0.33952, 0.34062, 0.34076, 0.34376 ], "test_acc": [ 0.3299, 0.342, 0.3458, 0.3319, 0.3513, 0.3494, 0.3535, 0.3477, 0.3438, 0.3456, 0.3492, 0.3432, 0.3486, 0.3392, 0.3444, 0.3448, 0.3444, 0.3525, 0.3618, 0.3546, 0.3556, 0.3595, 0.3577, 0.3559, 0.3598, 0.3621, 0.3612, 0.3592, 0.3584, 0.3596 ], "value_loss": [ 0.666878916387558, 0.18937870089530945, 0.11881086151123046, 0.08265157832622529, 0.06769442874908448, 0.06252522818088531, 0.09614691172599793, 0.07957154913902283, 0.054948721503019334, 0.05558138638615608, 0.05110657853126526, 0.046604735120534896, 0.04109812472939491, 0.03985793245315552, 0.03339640676736832, 0.03308486737549305, 0.030077795300483705, 0.025715462546348572, 0.022470404601693153, 0.020539281535744667, 0.018625647016167642, 0.018597659103274346, 0.016686187164783477, 0.016902890469133854, 0.016258136838972568, 0.012850605883747339, 0.011790002823770046, 0.011391780233085156, 0.014738336679339409, 0.014527755738198757 ] }, "diagnostics": { "bp_cosine": [ 0.6566677093505859, 0.675422191619873, 0.6851179599761963, 0.6924338936805725 ], "perturbation_rho": [ 0.4113081693649292, 0.4249998927116394, 0.5678677558898926, 0.5010733604431152 ], "nudging": { "0.001": [ -5.1190661906730384e-05, -4.658541001845151e-05, -4.432544665178284e-05, -4.257681575836614e-05 ], "0.003": [ -0.00015361575060524046, -0.00013976145419292152, -0.0001330113154835999, -0.00012775413051713258 ], "0.01": [ -0.0005118446424603462, -0.0004658599500544369, -0.00044331286335363984, -0.00042594311526045203 ] }, "hidden_norms_per_layer": [ 5362.49755859375, 5384.90576171875, 5393.5771484375, 5410.9482421875, 5431.09033203125 ], "bp_grad_norms_per_layer": [ 2.3212431187857874e-05, 2.1681269572582096e-05, 2.1051570001873188e-05, 2.0064975615241565e-05, 1.878892544482369e-05 ] }, "drift": { "embed.weight": 44.5671938207566, "embed.bias": 58.96553186802584, "blocks.0.ln.weight": 0.29142487049102783, "blocks.0.w1.weight": 3.8377129757749735, "blocks.0.w1.bias": 6.352571784709687, "blocks.0.w2.weight": 18.724100714811932, "blocks.1.ln.weight": 0.2817494571208954, "blocks.1.w1.weight": 3.743751843873518, "blocks.1.w1.bias": 7.155651981858119, "blocks.1.w2.weight": 18.70848603498537, "blocks.2.ln.weight": 0.28254690766334534, "blocks.2.w1.weight": 3.7939537187971424, "blocks.2.w1.bias": 6.996760215683633, "blocks.2.w2.weight": 19.26348418356461, "blocks.3.ln.weight": 0.27882981300354004, "blocks.3.w1.weight": 3.869420244575826, "blocks.3.w1.bias": 7.142781074581749, "blocks.3.w2.weight": 20.36433189495994, "out_ln.weight": 0.09818978607654572, "out_head.weight": 2.0291022420933946, "out_head.bias": 2.1241749027066383 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42 ], "gpu": 0, "output_dir": "results/round38_sbcb_penalty_30ep", "methods": [ "state_bridge", "credit_bridge" ], "random_targets": false, "penalty_lam": 0.01, "num_classes": 10 } }