{ "config": { "d_hidden": 64, "output_dim": 10, "num_layers": 12, "sigma": 0.03, "batch_size": 256, "num_steps": 8000, "lr_fb": 0.001, "lam": 0.1, "K": 8, "ema_momentum": 0.995, "sigma_bridge": 0.1, "eval_every": 1000, "seed": 42, "gpu": 0, "output_dir": "results/toy_lq", "vnet_hidden": 256, "vnet_layers": 3, "term_grad_weight": 1.0, "fm_weight": 0.1 }, "log": { "steps": [ 1, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000 ], "dfa_costate_cos": [ -0.003210080186060319, 0.006517285481095314, 0.002081584728633364, 0.002482607301014165, 0.0003743169557613631, -0.0018058015266433358, 0.005970992535973589, 0.0013112322388527293, -0.00010686229992037018 ], "state_costate_cos": [ -0.008148168989767631, 0.942050834496816, 0.9447367936372757, 0.9457606921593348, 0.9468860725561777, 0.9432125687599182, 0.9417577485243479, 0.945394163330396, 0.9473433345556259 ], "credit_costate_cos": [ -0.011870964197441936, 0.8804089923699697, 0.9169842700163523, 0.9352772484223048, 0.9427581528822581, 0.9391989062229792, 0.9398341725269953, 0.9456242968638738, 0.9460541109244028 ], "dfa_rho": [ -0.012772266054525971, 0.015942092907304566, 0.008943260026474794, 0.004492536109561722, -0.007791806012392044, -0.007830069400370121, 0.014392409706488252, 0.0063066319562494755, -0.001311147507900993 ], "state_rho": [ -0.0018634579222028453, 0.9263874938090643, 0.9358506997426351, 0.9348766555388769, 0.9348586251338323, 0.9323162386814753, 0.9287882298231125, 0.9243461688359579, 0.9346484492222468 ], "credit_rho": [ -0.023325924955618877, 0.8106876164674759, 0.8797721316417059, 0.9208102275927862, 0.9290666033824285, 0.9226995905240377, 0.9258128056923548, 0.9266915867726008, 0.9364602218071619 ], "dfa_nudge": [ 0.0015840742271393538, -0.0023392424918711185, -0.0002702907659113407, -0.0010186488119264443, -0.0001735797462364038, 0.000983052421361208, -0.0020705487113445997, -0.0009097627674539884, 0.0002721068449318409 ], "state_nudge": [ 0.0024757004963854947, -0.33942782630523044, -0.35110870252052945, -0.35064691056807834, -0.3403419057528178, -0.3532385254899661, -0.33996084084113437, -0.35185040285189945, -0.3480343023935954 ], "credit_nudge": [ 0.006069206205817561, -0.31947339574495953, -0.3404841795563698, -0.3458903282880783, -0.3376796667774518, -0.3504715636372566, -0.3379351521531741, -0.3500200683871905, -0.3462969238559405 ], "bridge_residual": [], "state_bridge_loss": [ 66.01814270019531, 2.341611623764038, 1.9537389278411865, 1.9830116033554077, 2.0491604804992676, 2.0490386486053467, 2.1182405948638916, 2.373213291168213, 1.9122812747955322 ], "credit_bridge_loss": [ 132.09298706054688, 9.123503684997559, 8.516526222229004, 8.634014129638672, 8.720410346984863, 8.43734359741211, 10.247673034667969, 8.351385116577148, 8.474419593811035 ], "term_loss": [ 111.63633728027344, 4.155745029449463, 3.8754897117614746, 4.040826320648193, 4.010752201080322, 4.2646074295043945, 5.391899108886719, 3.27858829498291, 3.74959135055542 ], "bridge_loss": [ 6.45359421014291e-07, 0.22446846961975098, 0.18948684632778168, 0.14564603567123413, 0.1301368772983551, 0.1614246517419815, 0.19166265428066254, 0.19554881751537323, 0.15442883968353271 ], "term_grad_loss": [ 20.456655502319336, 4.743132591247559, 4.45129919052124, 4.447327136993408, 4.579296112060547, 4.011109352111816, 4.663890838623047, 4.877087593078613, 4.57021427154541 ], "fm_loss": [ 1.4957863925246784e-07, 0.001574160298332572, 0.0025115222670137882, 0.002142899436876178, 0.0022483705542981625, 0.0020201210863888264, 0.00219835271127522, 0.0016042720526456833, 0.0018535356502979994 ] }, "final_per_layer": { "dfa_costate_cos": [ -0.04993891716003418, -0.03484980762004852, 0.008264334872364998, -0.018589604645967484, -0.019490346312522888, 0.0018934037070721388, 0.025425557047128677, 0.046417269855737686, 0.06304077804088593, 0.03301604464650154, -0.06211906671524048, 0.005648006685078144 ], "state_costate_cos": [ 0.9454483985900879, 0.946227490901947, 0.9468960165977478, 0.9470230340957642, 0.9475629925727844, 0.9478208422660828, 0.9478966593742371, 0.9480605125427246, 0.9479074478149414, 0.9478251934051514, 0.9478766918182373, 0.9475747346878052 ], "credit_costate_cos": [ 0.9432812333106995, 0.9438751935958862, 0.9443729519844055, 0.9447157382965088, 0.9455237984657288, 0.9461240768432617, 0.9463104009628296, 0.9467931985855103, 0.9471821784973145, 0.9476308226585388, 0.9481172561645508, 0.9487224817276001 ], "dfa_rho": [ -0.0704549178481102, 0.031227122992277145, 0.009932642802596092, -0.015737976878881454, -0.045219600200653076, 0.013525029644370079, 0.03365146368741989, 0.04508150368928909, 0.05846859887242317, 7.291417568922043e-05, -0.06866942346096039, -0.007611127570271492 ], "state_rho": [ 0.935008704662323, 0.9310771822929382, 0.9295646548271179, 0.938683032989502, 0.9329910278320312, 0.9313007593154907, 0.9357653260231018, 0.9376221895217896, 0.9326146841049194, 0.9372754693031311, 0.9379282593727112, 0.9359501004219055 ], "credit_rho": [ 0.9306489825248718, 0.9282867312431335, 0.9364583492279053, 0.9361423850059509, 0.9323122501373291, 0.9380875825881958, 0.93974369764328, 0.9351372122764587, 0.941116213798523, 0.9382349252700806, 0.9381735324859619, 0.9431807994842529 ], "dfa_nudge": [ 0.020365234464406967, 0.014575351029634476, -0.002057630568742752, 0.006316322833299637, 0.007856002077460289, 0.0004078727215528488, -0.010462434962391853, -0.017474167048931122, -0.02493620105087757, -0.011619189754128456, 0.022399690002202988, -0.00210556760430336 ], "state_nudge": [ -0.34937936067581177, -0.3488026261329651, -0.3481972813606262, -0.34827157855033875, -0.3484804630279541, -0.3478168845176697, -0.34806668758392334, -0.3481366038322449, -0.3481329381465912, -0.3476967513561249, -0.3463967740535736, -0.34703367948532104 ], "credit_nudge": [ -0.34637150168418884, -0.345956414937973, -0.3455013632774353, -0.34578341245651245, -0.3462638854980469, -0.3458409905433655, -0.3462728261947632, -0.3466273546218872, -0.3469495475292206, -0.34689074754714966, -0.3459630608558655, -0.34714198112487793 ] } }