{ "a0.0_L2_s42": { "bp": { "test_acc": 0.9445, "mean_bp_cosine": 0.8363544940948486, "mean_rho": 0.9999966025352478, "mean_nudge_001": -0.014867921359837055, "mean_nudge_003": -0.038927674293518066, "mean_nudge_01": -0.08314211666584015, "bp_cosine_per_layer": [ 0.841028094291687, 0.8316808938980103 ], "rho_per_layer": [ 0.9999988079071045, 0.9999943971633911 ], "nudge_per_layer": [ -0.10806188732385635, -0.058222346007823944 ] }, "dfa": { "test_acc": 0.8905, "mean_bp_cosine": 0.028207830036990345, "mean_rho": -0.013231039047241211, "mean_nudge_001": -0.0004544301191344857, "mean_nudge_003": -0.0012695252080447972, "mean_nudge_01": -0.003105040406808257, "bp_cosine_per_layer": [ -0.002617625752463937, 0.059033285826444626 ], "rho_per_layer": [ -0.03406350687146187, 0.007601428776979446 ], "nudge_per_layer": [ -0.0010169181041419506, -0.005193162709474564 ] }, "state_bridge": { "test_acc": 0.825, "mean_bp_cosine": 0.5880793333053589, "mean_rho": 0.44639749079942703, "mean_nudge_001": -0.01224998920224607, "mean_nudge_003": -0.035131572745740414, "mean_nudge_01": -0.10013834200799465, "bp_cosine_per_layer": [ 0.8860390186309814, 0.29011964797973633 ], "rho_per_layer": [ 0.729070782661438, 0.16372419893741608 ], "nudge_per_layer": [ -0.17900359630584717, -0.021273087710142136 ], "state_pred_error_per_layer": [ 33793.6875, 17287.951171875 ], "mean_state_pred_error": 25540.8193359375 }, "credit_bridge": { "test_acc": 0.656, "mean_bp_cosine": 0.10665101930499077, "mean_rho": 0.23030418902635574, "mean_nudge_001": -0.006580323912203312, "mean_nudge_003": -0.019663945073261857, "mean_nudge_01": -0.06460809707641602, "bp_cosine_per_layer": [ 0.10150224715471268, 0.11179979145526886 ], "rho_per_layer": [ 0.24922937154769897, 0.2113790065050125 ], "nudge_per_layer": [ -0.10681234300136566, -0.02240385115146637 ], "final_value_loss": 0.5207448813438416, "final_term_loss": 0.20168962478637695, "final_bridge_loss": 0.10483724861145019, "final_tgrad_loss": 0.21421800775527955 } }, "a0.0_L8_s42": { "bp": { "test_acc": 0.948, "mean_bp_cosine": 0.6927967146039009, "mean_rho": 0.9999185726046562, "mean_nudge_001": -0.007844092877348885, "mean_nudge_003": -0.021484591998159885, "mean_nudge_01": -0.053326028981246054, "bp_cosine_per_layer": [ 0.7277177572250366, 0.7186546921730042, 0.7086087465286255, 0.6982603073120117, 0.686427116394043, 0.6760239601135254, 0.6675269603729248, 0.6591541767120361 ], "rho_per_layer": [ 0.999999463558197, 0.999997615814209, 0.9999939203262329, 0.9999786019325256, 0.9999496340751648, 0.99991774559021, 0.9998468160629272, 0.9996647834777832 ], "nudge_per_layer": [ -0.14217889308929443, -0.10304947197437286, -0.06661910563707352, -0.04270746558904648, -0.028325699269771576, -0.019565371796488762, -0.013980223797261715, -0.010182000696659088 ] }, "dfa": { "test_acc": 0.9465, "mean_bp_cosine": 0.012617309403140098, "mean_rho": 0.13837066129781306, "mean_nudge_001": -0.016161460865987465, "mean_nudge_003": -0.04494552121468587, "mean_nudge_01": -0.12098311571025988, "bp_cosine_per_layer": [ -0.003863303456455469, 0.017664283514022827, 0.01661803014576435, 0.018178246915340424, 0.015868376940488815, 0.014126626774668694, 0.014091677032411098, 0.008254537358880043 ], "rho_per_layer": [ -0.019435672089457512, 0.2941593527793884, 0.18103283643722534, 0.20650765299797058, 0.18732143938541412, 0.13352911174297333, 0.15822581946849823, -0.03437525033950806 ], "nudge_per_layer": [ 0.4985465407371521, -0.834585428237915, -0.4260305166244507, -0.15289700031280518, -0.03924320265650749, -0.009715870022773743, -0.002982086967676878, -0.0009573615971021354 ] }, "state_bridge": { "test_acc": 0.167, "mean_bp_cosine": -0.03321670075220595, "mean_rho": -3.657315392047167e-05, "mean_nudge_001": -146456576.0, "mean_nudge_003": -383148544.0, "mean_nudge_01": -1178039808.0, "bp_cosine_per_layer": [ 0.09916997700929642, -0.0925145074725151, -0.12530964612960815, -0.051633648574352264, -0.0899374783039093, -0.005518035963177681, 1.1753130820579827e-05, -2.0197142021061154e-06 ], "rho_per_layer": [ 0.0036257803440093994, -0.0018100417219102383, 0.0009338338859379292, -0.0030421577394008636, 0.0, 0.0, 0.0, 0.0 ], "nudge_per_layer": [ -9628057600.0, 254652416.0, -49807360.0, -1126400.0, 20480.0, 0.0, 0.0, 0.0 ], "state_pred_error_per_layer": [ 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33, 3.788521752616758e+33 ], "mean_state_pred_error": 3.788521752616758e+33 }, "credit_bridge": { "test_acc": 0.0965, "mean_bp_cosine": 0.10048330621793866, "mean_rho": -0.022956646920647472, "mean_nudge_001": -59174496.0, "mean_nudge_003": -195268096.0, "mean_nudge_01": -642210232.0, "bp_cosine_per_layer": [ 0.517741858959198, 0.33828628063201904, 0.4149113893508911, 0.23560978472232819, -0.15021789073944092, -0.1476898193359375, -0.3642715811729431, -0.040503572672605515 ], "rho_per_layer": [ -0.004317115992307663, 9.15133859962225e-05, 0.0010522708762437105, 0.0034065949730575085, -0.18388643860816956, 0.0, 0.0, 0.0 ], "nudge_per_layer": [ -5081367040.0, -74964928.0, 31930368.0, -13346304.0, 66048.0, 0.0, 0.0, 0.0 ], "final_value_loss": 7.270963528184324e+22, "final_term_loss": 7.270963401002671e+22, "final_bridge_loss": 1784726332749172.0, "final_tgrad_loss": 0.6689293840408325 } }, "a0.5_L2_s42": { "bp": { "test_acc": 0.8905, "mean_bp_cosine": 0.8927535116672516, "mean_rho": 0.9999973773956299, "mean_nudge_001": -0.017791217658668756, "mean_nudge_003": -0.04914894513785839, "mean_nudge_01": -0.12422022968530655, "bp_cosine_per_layer": [ 0.8970257639884949, 0.8884812593460083 ], "rho_per_layer": [ 0.9999991655349731, 0.9999955892562866 ], "nudge_per_layer": [ -0.16577741503715515, -0.08266304433345795 ] }, "dfa": { "test_acc": 0.8565, "mean_bp_cosine": 0.032235472928732634, "mean_rho": -0.0014879778027534485, "mean_nudge_001": -0.00016316675464622676, "mean_nudge_003": -0.00043464876944199204, "mean_nudge_01": -0.0008175973780453205, "bp_cosine_per_layer": [ -0.009920955635607243, 0.07439190149307251 ], "rho_per_layer": [ -0.054706037044525146, 0.05173008143901825 ], "nudge_per_layer": [ 0.004189381375908852, -0.005824576131999493 ] }, "state_bridge": { "test_acc": 0.825, "mean_bp_cosine": 0.645551860332489, "mean_rho": 0.5210425555706024, "mean_nudge_001": -0.012871089624240994, "mean_nudge_003": -0.03697468154132366, "mean_nudge_01": -0.10600546188652515, "bp_cosine_per_layer": [ 0.8819085955619812, 0.4091951251029968 ], "rho_per_layer": [ 0.761555552482605, 0.28052955865859985 ], "nudge_per_layer": [ -0.18185698986053467, -0.03015393391251564 ], "state_pred_error_per_layer": [ 7353.158203125, 2627.8740234375 ], "mean_state_pred_error": 4990.51611328125 }, "credit_bridge": { "test_acc": 0.692, "mean_bp_cosine": 0.1708241105079651, "mean_rho": 0.3076810836791992, "mean_nudge_001": -0.0058031873777508736, "mean_nudge_003": -0.017323508858680725, "mean_nudge_01": -0.05672750808298588, "bp_cosine_per_layer": [ 0.14308685064315796, 0.19856137037277222 ], "rho_per_layer": [ 0.26277071237564087, 0.35259145498275757 ], "nudge_per_layer": [ -0.08629482984542847, -0.02716018632054329 ], "final_value_loss": 0.28658444921970366, "final_term_loss": 0.040962481904029846, "final_bridge_loss": 0.0227132670879364, "final_tgrad_loss": 0.22290869793891907 } }, "a0.5_L8_s42": { "bp": { "test_acc": 0.8825, "mean_bp_cosine": 0.7004428058862686, "mean_rho": 0.9999219477176666, "mean_nudge_001": -0.013686691803741269, "mean_nudge_003": -0.03912358166417107, "mean_nudge_01": -0.11077304021455348, "bp_cosine_per_layer": [ 0.7438379526138306, 0.7323452234268188, 0.723551869392395, 0.7129155397415161, 0.6892737746238708, 0.6759694814682007, 0.6671421527862549, 0.658506453037262 ], "rho_per_layer": [ 0.9999985694885254, 0.9999985694885254, 0.9999951720237732, 0.9999872446060181, 0.9999734163284302, 0.9999377727508545, 0.9998385310173035, 0.9996463060379028 ], "nudge_per_layer": [ -0.3025854229927063, -0.20860256254673004, -0.13888010382652283, -0.09112250804901123, -0.0598360076546669, -0.039668694138526917, -0.026830831542611122, -0.018658190965652466 ] }, "dfa": { "test_acc": 0.8805, "mean_bp_cosine": 0.030201979679986835, "mean_rho": 0.14581625070422888, "mean_nudge_001": -0.007218341752377455, "mean_nudge_003": -0.021398383076302707, "mean_nudge_01": -0.06869508739328012, "bp_cosine_per_layer": [ -0.0057187266647815704, 0.03526674211025238, 0.03511466830968857, 0.043710630387067795, 0.04474484175443649, 0.03394380211830139, 0.03243564814329147, 0.022118231281638145 ], "rho_per_layer": [ -0.08895743638277054, 0.2165505439043045, 0.14878079295158386, 0.23983460664749146, 0.23414158821105957, 0.22946099936962128, 0.044252023100852966, 0.14246688783168793 ], "nudge_per_layer": [ 0.20587505400180817, -0.4487733840942383, -0.17917752265930176, -0.0796520859003067, -0.03180943429470062, -0.009406229481101036, -0.004543165676295757, -0.0020739310421049595 ] }, "state_bridge": { "test_acc": 0.1155, "mean_bp_cosine": 0.04451697081094608, "mean_rho": -0.013691710773855448, "mean_nudge_001": -294302.75, "mean_nudge_003": -880992.0, "mean_nudge_01": -2656252.75, "bp_cosine_per_layer": [ 0.30725735425949097, 0.12316203862428665, -0.0629364550113678, 0.007313757669180632, -0.009149492718279362, -0.008323092944920063, -0.006843051873147488, 0.005654708482325077 ], "rho_per_layer": [ -0.0015011467039585114, 0.0004240265116095543, 0.0019385055638849735, 0.0008086063899099827, -0.11120367795228958, 0.0, 0.0, 0.0 ], "nudge_per_layer": [ -20625168.0, -717604.0, -58672.0, 134824.0, 16604.0, -6.0, 0.0, 0.0 ], "state_pred_error_per_layer": [ 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29, 1.2424051312422632e+29 ], "mean_state_pred_error": 1.2424051312422632e+29 }, "credit_bridge": { "test_acc": 0.1095, "mean_bp_cosine": 0.0375741989701055, "mean_rho": 0.011375281232176349, "mean_nudge_001": 64012.0, "mean_nudge_003": 195061.0, "mean_nudge_01": 1148873.0, "bp_cosine_per_layer": [ 0.04903252795338631, 0.20776475965976715, 0.12520574033260345, -0.2015925794839859, -0.03666269779205322, 0.005083487834781408, 0.13890880346298218, 0.012853549793362617 ], "rho_per_layer": [ 0.011465835385024548, 0.00849771499633789, 0.0009214465972036123, -0.005531121976673603, 0.07564837485551834, 0.0, 0.0, 0.0 ], "nudge_per_layer": [ 10792360.0, -1553496.0, -27560.0, -69368.0, 49144.0, -96.0, 0.0, 0.0 ], "final_value_loss": 1.0739092229740712e+19, "final_term_loss": 5.965625762330856e+18, "final_bridge_loss": 4.773466424748804e+18, "final_tgrad_loss": 0.753796951675415 } }, "a1.0_L2_s42": { "bp": { "test_acc": 0.79, "mean_bp_cosine": 0.9718506336212158, "mean_rho": 0.9999988377094269, "mean_nudge_001": -0.030717147514224052, "mean_nudge_003": -0.08905789628624916, "mean_nudge_01": -0.2628655806183815, "bp_cosine_per_layer": [ 0.977530300617218, 0.9661709666252136 ], "rho_per_layer": [ 0.9999992847442627, 0.9999983906745911 ], "nudge_per_layer": [ -0.3279687762260437, -0.1977623850107193 ] }, "dfa": { "test_acc": 0.7575, "mean_bp_cosine": 0.03516392147867009, "mean_rho": -0.00011159107089042664, "mean_nudge_001": -0.000253174692261382, "mean_nudge_003": -0.0007429331817547791, "mean_nudge_01": -0.0022771726071368903, "bp_cosine_per_layer": [ -0.0018995754653587937, 0.07222741842269897 ], "rho_per_layer": [ -0.03529410809278488, 0.03507092595100403 ], "nudge_per_layer": [ 0.00033701310167089105, -0.004891358315944672 ] }, "state_bridge": { "test_acc": 0.7315, "mean_bp_cosine": 0.6242872625589371, "mean_rho": 0.576740100979805, "mean_nudge_001": -0.013815624406561255, "mean_nudge_003": -0.04068431770429015, "mean_nudge_01": -0.12692053988575935, "bp_cosine_per_layer": [ 0.7724592089653015, 0.47611531615257263 ], "rho_per_layer": [ 0.7083259224891663, 0.4451542794704437 ], "nudge_per_layer": [ -0.2117496132850647, -0.04209146648645401 ], "state_pred_error_per_layer": [ 2210.423828125, 264.2466125488281 ], "mean_state_pred_error": 1237.335220336914 }, "credit_bridge": { "test_acc": 0.556, "mean_bp_cosine": 0.1866571605205536, "mean_rho": 0.2675167992711067, "mean_nudge_001": -0.0047563593834638596, "mean_nudge_003": -0.014220114797353745, "mean_nudge_01": -0.04682535119354725, "bp_cosine_per_layer": [ 0.14650292694568634, 0.22681139409542084 ], "rho_per_layer": [ 0.22659505903720856, 0.3084385395050049 ], "nudge_per_layer": [ -0.06274554133415222, -0.030905161052942276 ], "final_value_loss": 0.24172186150550842, "final_term_loss": 0.06502777924537659, "final_bridge_loss": 0.03994156485795975, "final_tgrad_loss": 0.13675251703262328 } }, "a1.0_L8_s42": { "bp": { "test_acc": 0.753, "mean_bp_cosine": 0.8404746800661087, "mean_rho": 0.9999860152602196, "mean_nudge_001": -0.03076103754574433, "mean_nudge_003": -0.08962251944467425, "mean_nudge_01": -0.26998535776510835, "bp_cosine_per_layer": [ 0.8599272966384888, 0.8556707501411438, 0.8523739576339722, 0.848276674747467, 0.8402513265609741, 0.8327686190605164, 0.8243966698646545, 0.8101321458816528 ], "rho_per_layer": [ 0.9999995231628418, 0.9999991059303284, 0.9999986290931702, 0.9999972581863403, 0.999995231628418, 0.999988853931427, 0.999974250793457, 0.9999352693557739 ], "nudge_per_layer": [ -0.5608033537864685, -0.46859920024871826, -0.3716525733470917, -0.2780599594116211, -0.19847190380096436, -0.13576632738113403, -0.08930139243602753, -0.057228151708841324 ] }, "dfa": { "test_acc": 0.7235, "mean_bp_cosine": 0.04556959925685078, "mean_rho": 0.048565957229584455, "mean_nudge_001": -0.00017181782459374517, "mean_nudge_003": -0.0005837090320710558, "mean_nudge_01": -0.0014622680901084095, "bp_cosine_per_layer": [ -0.009654381312429905, 0.06123851239681244, 0.05049855262041092, 0.06316959857940674, 0.06684726476669312, 0.04511053115129471, 0.06236346811056137, 0.024983247742056847 ], "rho_per_layer": [ -0.04901190102100372, 0.05014052242040634, 0.041004884988069534, 0.0795036256313324, 0.11356800049543381, 0.03488320857286453, 0.06926114857196808, 0.049178168177604675 ], "nudge_per_layer": [ 0.009471571072936058, -0.008436895906925201, -0.0030148853547871113, -0.002505694981664419, -0.0025559633504599333, -0.0017773781437426805, -0.002164034638553858, -0.0007148634176701307 ] }, "state_bridge": { "test_acc": 0.669, "mean_bp_cosine": 0.17768670711666346, "mean_rho": 0.1722423667088151, "mean_nudge_001": -0.003032438595255371, "mean_nudge_003": -0.009077076290850528, "mean_nudge_01": -0.02989676801371388, "bp_cosine_per_layer": [ 0.4364655911922455, 0.2538519501686096, 0.12610256671905518, 0.11331324279308319, 0.12084640562534332, 0.12215165793895721, 0.1230584904551506, 0.12570375204086304 ], "rho_per_layer": [ 0.3833135962486267, 0.2661164402961731, 0.1022414043545723, 0.0808584988117218, 0.13698244094848633, 0.1416344791650772, 0.1372133493423462, 0.12957872450351715 ], "nudge_per_layer": [ -0.19196540117263794, -0.024129249155521393, -0.005033540539443493, -0.0036849211901426315, -0.003727084957063198, -0.003625791519880295, -0.0035160125698894262, -0.003492143005132675 ], "state_pred_error_per_layer": [ 10125.46875, 4586.849609375, 2924.994384765625, 2696.109130859375, 2272.20361328125, 2131.38916015625, 1861.77978515625, 1746.7655029296875 ], "mean_state_pred_error": 3543.1949920654297 }, "credit_bridge": { "test_acc": 0.574, "mean_bp_cosine": 0.14715833496302366, "mean_rho": 0.21127336705103517, "mean_nudge_001": -0.0017491802136646584, "mean_nudge_003": -0.005117673281347379, "mean_nudge_01": -0.017382657038979232, "bp_cosine_per_layer": [ 0.034526653587818146, 0.05641080439090729, 0.10281616449356079, 0.15385565161705017, 0.18335095047950745, 0.20484405755996704, 0.21195781230926514, 0.22950458526611328 ], "rho_per_layer": [ 0.036690596491098404, 0.08035410940647125, 0.14528854191303253, 0.19739355146884918, 0.2672373652458191, 0.3182218074798584, 0.32729244232177734, 0.3177085220813751 ], "nudge_per_layer": [ -0.04240123927593231, -0.01825123094022274, -0.012862971983850002, -0.012372580356895924, -0.012917900457978249, -0.01307743415236473, -0.013364783488214016, -0.013813115656375885 ], "final_value_loss": 0.5224067583084107, "final_term_loss": 0.30657225689888, "final_bridge_loss": 0.06283499038815499, "final_tgrad_loss": 0.15299950742721558 } } }