diff options
| author | YurenHao0426 <Blackhao0426@gmail.com> | 2026-05-23 12:04:04 -0500 |
|---|---|---|
| committer | YurenHao0426 <Blackhao0426@gmail.com> | 2026-05-23 12:04:04 -0500 |
| commit | fe4d92760f9d9ce9d9f41eb0fe69dd9eadc1534c (patch) | |
| tree | 33c95a3d7bcc5cbfc2233f60e3ed319aa4e07c9c | |
| parent | 152821462023690df5d2bf90812e1cb5b1ca7274 (diff) | |
Add HRM-Orth v1 (codex round 2 Q6 pivot)
Patch HRM Block with Lipschitz-bounded ops:
- attention → cosine-normalized softmax attn
- SwiGLU → OrthLinear (Cayley + weak diag scale) + MaxMin + OrthLinear
- rms_norm + add → weighted residual (1-σ(w))·h + σ(w)·f(h)
- Weak orthogonality: diag(s) with s_i ∈ [0.95, 1.0] for compression directions
Keeps HRM ACT framework + H_level/L_level + cycles unchanged.
Predicted +5-7pp vs SRM v1 (codex Q5 decomp):
+1.5-2.5 (remove ReLU rank-kill via MaxMin)
+2.0-3.0 (remove AOL attenuation via Cayley)
+1.0-1.5 (orthogonal residual flow)
Also adds: train_hrm_orth.py trainer, SRM v1 run logs, .gitignore ckpts/.codex
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
| -rw-r--r-- | models/srm/hrm_orth_v1.py | 371 | ||||
| -rw-r--r-- | runs/srm_aol_v1_sudoku1k_3k.json | 15517 | ||||
| -rw-r--r-- | runs/srm_smoke_500_h256.json | 2712 | ||||
| -rw-r--r-- | scripts/train_hrm_orth.py | 203 |
4 files changed, 18803 insertions, 0 deletions
diff --git a/models/srm/hrm_orth_v1.py b/models/srm/hrm_orth_v1.py new file mode 100644 index 0000000..70ec561 --- /dev/null +++ b/models/srm/hrm_orth_v1.py @@ -0,0 +1,371 @@ +"""HRM-Orth v1 — orthogonal patch of HRM per codex round 2 recommendation. + +CORE IDEA (codex Q6 pivot, after pure-orthogonal retract Q1): +Keep HRM's H_level/L_level/ACT structure, just patch the inner Block: + - Attention → cosine-normalized attention (≈ Lipschitz-bounded) + - SwiGLU MLP → CayleyOrth linear + MaxMin + CayleyOrth linear + - rms_norm + add → weighted residual: h_new = (1-σ(w)) · h + σ(w) · f(h) + - "Weak orthogonality": diag(s) scaling with most s≈1, some s∈[0.90, 0.97] for compression + +Per codex Q5 decomp: target +5~+7pp over SRM v1 (0.39 → 0.43-0.46). +Per codex Q3: Cayley used (we have it from srm_aol_v1); Householder would be faster but more impl. +""" +from typing import Tuple, List, Dict, Optional +from dataclasses import dataclass +import math + +import torch +import torch.nn.functional as F +from torch import nn +from pydantic import BaseModel + +from models.common import trunc_normal_init_ +from models.layers import rms_norm, SwiGLU, Attention, RotaryEmbedding, CosSin, CastedEmbedding, CastedLinear +from models.sparse_embedding import CastedSparseEmbedding +from models.srm.srm_aol_v1 import CayleyOrthogonal + + +def maxmin(x: torch.Tensor, group: int = 2) -> torch.Tensor: + """1-Lipschitz norm-preserving activation (Anil et al. 2019 GroupSort). + + Pairs adjacent dims; outputs (min, max) per pair. Permutation a.e. → ||∇|| = 1. + Strictly better than ReLU under norm constraints (no rank-kill). + """ + *prefix, d = x.shape + if d % group != 0: + pad = group - (d % group) + x = F.pad(x, (0, pad)) + d = d + pad + xg = x.reshape(*prefix, d // group, group) + sorted_vals, _ = xg.sort(dim=-1) + return sorted_vals.reshape(*prefix, d) + + +def cosine_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, + tau: float = 8.0) -> torch.Tensor: + """Cosine-normalized softmax attention. Approximately Lipschitz-bounded + (exact bound depends on tau and value norms — see LipsFormer Qi 2023).""" + q = F.normalize(q, dim=-1) + k = F.normalize(k, dim=-1) + attn = (q @ k.transpose(-2, -1)) * tau + attn = attn.softmax(dim=-1) + return attn @ v + + +class OrthLinear(nn.Module): + """Orthogonal linear layer via Cayley. Allows optional row-scaling diag(s) + where s_i ∈ [s_min, 1] to introduce 'weak orthogonality' (codex Q1 fix). + + If s_min < 1, the operator is contractive in some directions: + Lip = max(s) ≤ 1, det = prod(s) ≤ 1 (weak contraction in compressing modes) + """ + def __init__(self, dim: int, s_min: float = 0.95, learn_scale: bool = True): + super().__init__() + self.Q = CayleyOrthogonal(dim) + self.s_min = s_min + # diag scale: sigmoid -> [s_min, 1] + if learn_scale and s_min < 1.0: + self.log_s_raw = nn.Parameter(torch.zeros(dim)) # init sigmoid(0)=0.5 → scale=(s_min+1)/2 + else: + self.register_buffer("log_s_raw", torch.zeros(dim)) + self.learn_scale = learn_scale + + def scale_diag(self) -> torch.Tensor: + if self.s_min >= 1.0 or not self.learn_scale: + return torch.ones_like(self.log_s_raw) + # Affine map sigmoid → [s_min, 1] + return self.s_min + (1.0 - self.s_min) * torch.sigmoid(self.log_s_raw) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + Q = self.Q() # (d, d) orthogonal + s = self.scale_diag().to(Q.dtype) # (d,) in [s_min, 1] + Qs = Q * s.unsqueeze(0) # rescale columns + return F.linear(x, Qs) + + +@dataclass +class HierarchicalReasoningModel_ACTV1InnerCarry: + z_H: torch.Tensor + z_L: torch.Tensor + + +@dataclass +class HierarchicalReasoningModel_ACTV1Carry: + inner_carry: HierarchicalReasoningModel_ACTV1InnerCarry + + steps: torch.Tensor + halted: torch.Tensor + + current_data: Dict[str, torch.Tensor] + + +class HierarchicalReasoningModel_ACTV1Config(BaseModel): + batch_size: int + seq_len: int + puzzle_emb_ndim: int = 0 + num_puzzle_identifiers: int + vocab_size: int + + H_cycles: int + L_cycles: int + + H_layers: int + L_layers: int + + # Transformer config + hidden_size: int + expansion: float + num_heads: int + pos_encodings: str + + rms_norm_eps: float = 1e-5 + rope_theta: float = 10000.0 + + # Halting Q-learning config + halt_max_steps: int + halt_exploration_prob: float + + forward_dtype: str = "bfloat16" + + +class HierarchicalReasoningModel_ACTV1Block(nn.Module): + """Orthogonal-patched HRM Block. + + Replaces (attn + SwiGLU + rms_norm) with (cosine attn + Orth-MLP + weighted residual). + The original class name preserved so the ReasoningModule wrapper is unchanged. + """ + def __init__(self, config: HierarchicalReasoningModel_ACTV1Config) -> None: + super().__init__() + d = config.hidden_size + s_min = getattr(config, "orth_s_min", 0.95) + cosine_tau = getattr(config, "cosine_attn_tau", 8.0) + + # Lipschitz-bounded cosine attention: orthogonal Q/K/V/O projections + self.q_proj = OrthLinear(d, s_min=1.0, learn_scale=False) # strict orth for q/k + self.k_proj = OrthLinear(d, s_min=1.0, learn_scale=False) + self.v_proj = OrthLinear(d, s_min=s_min, learn_scale=True) # weak orth on values + self.o_proj = OrthLinear(d, s_min=s_min, learn_scale=True) + self.cosine_tau = cosine_tau + + # Orth-MLP: OrthLinear -> MaxMin -> OrthLinear (no expansion; uses original d) + self.mlp_in = OrthLinear(d, s_min=s_min, learn_scale=True) + self.mlp_out = OrthLinear(d, s_min=s_min, learn_scale=True) + + # Weighted residual gates (init sigmoid(0)=0.5 → balanced residual) + self.w_attn_logit = nn.Parameter(torch.zeros(())) + self.w_mlp_logit = nn.Parameter(torch.zeros(())) + + def forward(self, cos_sin: CosSin, hidden_states: torch.Tensor) -> torch.Tensor: + # Cosine attention + q = self.q_proj(hidden_states) + k = self.k_proj(hidden_states) + v = self.v_proj(hidden_states) + attn_out = self.o_proj(cosine_attention(q, k, v, tau=self.cosine_tau)) + w_attn = torch.sigmoid(self.w_attn_logit) + hidden_states = (1.0 - w_attn) * hidden_states + w_attn * attn_out + + # Orth-MLP with MaxMin + mlp_out = self.mlp_out(maxmin(self.mlp_in(hidden_states), group=2)) + w_mlp = torch.sigmoid(self.w_mlp_logit) + hidden_states = (1.0 - w_mlp) * hidden_states + w_mlp * mlp_out + return hidden_states + + +class HierarchicalReasoningModel_ACTV1ReasoningModule(nn.Module): + def __init__(self, layers: List[HierarchicalReasoningModel_ACTV1Block]): + super().__init__() + + self.layers = torch.nn.ModuleList(layers) + + def forward(self, hidden_states: torch.Tensor, input_injection: torch.Tensor, **kwargs) -> torch.Tensor: + # Input injection (add) + hidden_states = hidden_states + input_injection + # Layers + for layer in self.layers: + hidden_states = layer(hidden_states=hidden_states, **kwargs) + + return hidden_states + + +class HierarchicalReasoningModel_ACTV1_Inner(nn.Module): + def __init__(self, config: HierarchicalReasoningModel_ACTV1Config) -> None: + super().__init__() + self.config = config + self.forward_dtype = getattr(torch, self.config.forward_dtype) + + # I/O + self.embed_scale = math.sqrt(self.config.hidden_size) + embed_init_std = 1.0 / self.embed_scale + + self.embed_tokens = CastedEmbedding(self.config.vocab_size, self.config.hidden_size, init_std=embed_init_std, cast_to=self.forward_dtype) + self.lm_head = CastedLinear(self.config.hidden_size, self.config.vocab_size, bias=False) + self.q_head = CastedLinear(self.config.hidden_size, 2, bias=True) + + self.puzzle_emb_len = -(self.config.puzzle_emb_ndim // -self.config.hidden_size) # ceil div + if self.config.puzzle_emb_ndim > 0: + # Zero init puzzle embeddings + self.puzzle_emb = CastedSparseEmbedding(self.config.num_puzzle_identifiers, self.config.puzzle_emb_ndim, + batch_size=self.config.batch_size, init_std=0, cast_to=self.forward_dtype) + + # LM Blocks + if self.config.pos_encodings == "rope": + self.rotary_emb = RotaryEmbedding(dim=self.config.hidden_size // self.config.num_heads, + max_position_embeddings=self.config.seq_len + self.puzzle_emb_len, + base=self.config.rope_theta) + elif self.config.pos_encodings == "learned": + self.embed_pos = CastedEmbedding(self.config.seq_len + self.puzzle_emb_len, self.config.hidden_size, init_std=embed_init_std, cast_to=self.forward_dtype) + else: + raise NotImplementedError() + + # Reasoning Layers + self.H_level = HierarchicalReasoningModel_ACTV1ReasoningModule(layers=[HierarchicalReasoningModel_ACTV1Block(self.config) for _i in range(self.config.H_layers)]) + self.L_level = HierarchicalReasoningModel_ACTV1ReasoningModule(layers=[HierarchicalReasoningModel_ACTV1Block(self.config) for _i in range(self.config.L_layers)]) + + # Initial states + self.H_init = nn.Buffer(trunc_normal_init_(torch.empty(self.config.hidden_size, dtype=self.forward_dtype), std=1), persistent=True) + self.L_init = nn.Buffer(trunc_normal_init_(torch.empty(self.config.hidden_size, dtype=self.forward_dtype), std=1), persistent=True) + + # Q head special init + # Init Q to (almost) zero for faster learning during bootstrapping + with torch.no_grad(): + self.q_head.weight.zero_() + self.q_head.bias.fill_(-5) # type: ignore + + def _input_embeddings(self, input: torch.Tensor, puzzle_identifiers: torch.Tensor): + # Token embedding + embedding = self.embed_tokens(input.to(torch.int32)) + + # Puzzle embeddings + if self.config.puzzle_emb_ndim > 0: + puzzle_embedding = self.puzzle_emb(puzzle_identifiers) + + pad_count = self.puzzle_emb_len * self.config.hidden_size - puzzle_embedding.shape[-1] + if pad_count > 0: + puzzle_embedding = F.pad(puzzle_embedding, (0, pad_count)) + + embedding = torch.cat((puzzle_embedding.view(-1, self.puzzle_emb_len, self.config.hidden_size), embedding), dim=-2) + + # Position embeddings + if self.config.pos_encodings == "learned": + # scale by 1/sqrt(2) to maintain forward variance + embedding = 0.707106781 * (embedding + self.embed_pos.embedding_weight.to(self.forward_dtype)) + + # Scale + return self.embed_scale * embedding + + def empty_carry(self, batch_size: int): + return HierarchicalReasoningModel_ACTV1InnerCarry( + z_H=torch.empty(batch_size, self.config.seq_len + self.puzzle_emb_len, self.config.hidden_size, dtype=self.forward_dtype), + z_L=torch.empty(batch_size, self.config.seq_len + self.puzzle_emb_len, self.config.hidden_size, dtype=self.forward_dtype), + ) + + def reset_carry(self, reset_flag: torch.Tensor, carry: HierarchicalReasoningModel_ACTV1InnerCarry): + return HierarchicalReasoningModel_ACTV1InnerCarry( + z_H=torch.where(reset_flag.view(-1, 1, 1), self.H_init, carry.z_H), + z_L=torch.where(reset_flag.view(-1, 1, 1), self.L_init, carry.z_L), + ) + + def forward(self, carry: HierarchicalReasoningModel_ACTV1InnerCarry, batch: Dict[str, torch.Tensor]) -> Tuple[HierarchicalReasoningModel_ACTV1InnerCarry, torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + seq_info = dict( + cos_sin=self.rotary_emb() if hasattr(self, "rotary_emb") else None, + ) + + # Input encoding + input_embeddings = self._input_embeddings(batch["inputs"], batch["puzzle_identifiers"]) + + # Forward iterations + with torch.no_grad(): + z_H, z_L = carry.z_H, carry.z_L + + for _H_step in range(self.config.H_cycles): + for _L_step in range(self.config.L_cycles): + if not ((_H_step == self.config.H_cycles - 1) and (_L_step == self.config.L_cycles - 1)): + z_L = self.L_level(z_L, z_H + input_embeddings, **seq_info) + + if not (_H_step == self.config.H_cycles - 1): + z_H = self.H_level(z_H, z_L, **seq_info) + + assert not z_H.requires_grad and not z_L.requires_grad + + # 1-step grad + z_L = self.L_level(z_L, z_H + input_embeddings, **seq_info) + z_H = self.H_level(z_H, z_L, **seq_info) + + # LM Outputs + new_carry = HierarchicalReasoningModel_ACTV1InnerCarry(z_H=z_H.detach(), z_L=z_L.detach()) # New carry no grad + output = self.lm_head(z_H)[:, self.puzzle_emb_len:] + + # Q head + q_logits = self.q_head(z_H[:, 0]).to(torch.float32) + + return new_carry, output, (q_logits[..., 0], q_logits[..., 1]) + + +class HierarchicalReasoningModel_ACTV1(nn.Module): + """ACT wrapper.""" + + def __init__(self, config_dict: dict): + super().__init__() + self.config = HierarchicalReasoningModel_ACTV1Config(**config_dict) + self.inner = HierarchicalReasoningModel_ACTV1_Inner(self.config) + + @property + def puzzle_emb(self): + return self.inner.puzzle_emb + + def initial_carry(self, batch: Dict[str, torch.Tensor]): + batch_size = batch["inputs"].shape[0] + + return HierarchicalReasoningModel_ACTV1Carry( + inner_carry=self.inner.empty_carry(batch_size), # Empty is expected, it will be reseted in first pass as all sequences are halted. + + steps=torch.zeros((batch_size, ), dtype=torch.int32), + halted=torch.ones((batch_size, ), dtype=torch.bool), # Default to halted + + current_data={k: torch.empty_like(v) for k, v in batch.items()} + ) + + def forward(self, carry: HierarchicalReasoningModel_ACTV1Carry, batch: Dict[str, torch.Tensor]) -> Tuple[HierarchicalReasoningModel_ACTV1Carry, Dict[str, torch.Tensor]]: + # Update data, carry (removing halted sequences) + new_inner_carry = self.inner.reset_carry(carry.halted, carry.inner_carry) + + new_steps = torch.where(carry.halted, 0, carry.steps) + + new_current_data = {k: torch.where(carry.halted.view((-1, ) + (1, ) * (batch[k].ndim - 1)), batch[k], v) for k, v in carry.current_data.items()} + + # Forward inner model + new_inner_carry, logits, (q_halt_logits, q_continue_logits) = self.inner(new_inner_carry, new_current_data) + + outputs = { + "logits": logits, + "q_halt_logits": q_halt_logits, + "q_continue_logits": q_continue_logits + } + + with torch.no_grad(): + # Step + new_steps = new_steps + 1 + is_last_step = new_steps >= self.config.halt_max_steps + + halted = is_last_step + + # if training, and ACT is enabled + if self.training and (self.config.halt_max_steps > 1): + # Halt signal + # NOTE: During evaluation, always use max steps, this is to guarantee the same halting steps inside a batch for batching purposes + halted = halted | (q_halt_logits > q_continue_logits) + + # Exploration + min_halt_steps = (torch.rand_like(q_halt_logits) < self.config.halt_exploration_prob) * torch.randint_like(new_steps, low=2, high=self.config.halt_max_steps + 1) + + halted = halted & (new_steps >= min_halt_steps) + + # Compute target Q + # NOTE: No replay buffer and target networks for computing target Q-value. + # As batch_size is large, there're many parallel envs. + # Similar concept as PQN https://arxiv.org/abs/2407.04811 + next_q_halt_logits, next_q_continue_logits = self.inner(new_inner_carry, new_current_data)[-1] + + outputs["target_q_continue"] = torch.sigmoid(torch.where(is_last_step, next_q_halt_logits, torch.maximum(next_q_halt_logits, next_q_continue_logits))) + + return HierarchicalReasoningModel_ACTV1Carry(new_inner_carry, new_steps, halted, new_current_data), outputs diff --git a/runs/srm_aol_v1_sudoku1k_3k.json b/runs/srm_aol_v1_sudoku1k_3k.json new file mode 100644 index 0000000..4944806 --- /dev/null +++ b/runs/srm_aol_v1_sudoku1k_3k.json @@ -0,0 +1,15517 @@ +{ + "args": { + "data_path": "/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000", + "n_steps": 3000, + "batch_size": 8, + "lr": 0.0001, + "puzzle_emb_lr": 0.0001, + "warmup_steps": 200, + "weight_decay": 1.0, + "hidden_size": 512, + "n_iters": 12, + "n_aol_layers": 2, + "kappa": 0.9, + "eta": 1.0, + "alpha": 1.0, + "k_lyap": 2, + "lyap_iters": 8, + "lyap_every": 50, + "seed": 42, + "eval_every": 300, + "eval_n": 512, + "eval_batch_size": 32, + "out": "runs/srm_aol_v1_sudoku1k_3k.json", + "save_ckpt": "ckpts/srm_aol_v1_3k.pt" + }, + "n_params": 3700018, + "initial_acc": 0.0, + "initial_tok_acc": 0.11108699845679013, + "initial_lip": { + "lip_emp_mean": 0.09653933346271515, + "lip_emp_max": 0.1220538467168808, + "lip_emp_99p": 0.12071612477302551, + "lip_theoretical_bound": 0.9, + "passes_bound": true + }, + "steps": [ + { + "step": 0, + "lr": 0.0, + "sup_loss": 2.6486240918413437, + "lyap1_mean": -6.549736022949219, + "lyap1_max": -6.539831161499023, + "lyap_spec_mean": [ + -6.549736022949219, + -6.549976348876953 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1, + "lr": 5e-07, + "sup_loss": 2.66581192100626 + }, + { + "step": 2, + "lr": 1e-06, + "sup_loss": 2.6551254999064735 + }, + { + "step": 3, + "lr": 1.5e-06, + "sup_loss": 2.6923477307570765 + }, + { + "step": 4, + "lr": 2e-06, + "sup_loss": 2.645281303080112 + }, + { + "step": 5, + "lr": 2.5e-06, + "sup_loss": 2.683779022703776 + }, + { + "step": 6, + "lr": 3e-06, + "sup_loss": 2.668961340068308 + }, + { + "step": 7, + "lr": 3.5e-06, + "sup_loss": 2.691453761754646 + }, + { + "step": 8, + "lr": 4e-06, + "sup_loss": 2.67131585905981 + }, + { + "step": 9, + "lr": 4.5e-06, + "sup_loss": 2.6396303064503073 + }, + { + "step": 10, + "lr": 5e-06, + "sup_loss": 2.6777360122204783 + }, + { + "step": 11, + "lr": 5.500000000000001e-06, + "sup_loss": 2.644418895220248 + }, + { + "step": 12, + "lr": 6e-06, + "sup_loss": 2.5983125688845425 + }, + { + "step": 13, + "lr": 6.5000000000000004e-06, + "sup_loss": 2.655729060731042 + }, + { + "step": 14, + "lr": 7e-06, + "sup_loss": 2.625770367908041 + }, + { + "step": 15, + "lr": 7.5e-06, + "sup_loss": 2.6155870751135746 + }, + { + "step": 16, + "lr": 8e-06, + "sup_loss": 2.5964094162748763 + }, + { + "step": 17, + "lr": 8.5e-06, + "sup_loss": 2.617146447229883 + }, + { + "step": 18, + "lr": 9e-06, + "sup_loss": 2.60610974842071 + }, + { + "step": 19, + "lr": 9.5e-06, + "sup_loss": 2.6014442318943773 + }, + { + "step": 20, + "lr": 1e-05, + "sup_loss": 2.5962810756013224 + }, + { + "step": 21, + "lr": 1.0500000000000001e-05, + "sup_loss": 2.6082165910390334 + }, + { + "step": 22, + "lr": 1.1000000000000001e-05, + "sup_loss": 2.553679666296391 + }, + { + "step": 23, + "lr": 1.15e-05, + "sup_loss": 2.571934004236155 + }, + { + "step": 24, + "lr": 1.2e-05, + "sup_loss": 2.5629419901326527 + }, + { + "step": 25, + "lr": 1.25e-05, + "sup_loss": 2.53322149097866 + }, + { + "step": 26, + "lr": 1.3000000000000001e-05, + "sup_loss": 2.5600389565491857 + }, + { + "step": 27, + "lr": 1.3500000000000001e-05, + "sup_loss": 2.5287331587153155 + }, + { + "step": 28, + "lr": 1.4e-05, + "sup_loss": 2.4996639796202493 + }, + { + "step": 29, + "lr": 1.4500000000000002e-05, + "sup_loss": 2.4658839702933375 + }, + { + "step": 30, + "lr": 1.5e-05, + "sup_loss": 2.502888471186299 + }, + { + "step": 31, + "lr": 1.55e-05, + "sup_loss": 2.486028037755685 + }, + { + "step": 32, + "lr": 1.6e-05, + "sup_loss": 2.455803139363649 + }, + { + "step": 33, + "lr": 1.65e-05, + "sup_loss": 2.450766423850345 + }, + { + "step": 34, + "lr": 1.7e-05, + "sup_loss": 2.4599090138447104 + }, + { + "step": 35, + "lr": 1.7500000000000002e-05, + "sup_loss": 2.411981690314369 + }, + { + "step": 36, + "lr": 1.8e-05, + "sup_loss": 2.3960091921334774 + }, + { + "step": 37, + "lr": 1.8500000000000002e-05, + "sup_loss": 2.412117418660942 + }, + { + "step": 38, + "lr": 1.9e-05, + "sup_loss": 2.3877505016014466 + }, + { + "step": 39, + "lr": 1.95e-05, + "sup_loss": 2.3738033299289767 + }, + { + "step": 40, + "lr": 2e-05, + "sup_loss": 2.3774336792493087 + }, + { + "step": 41, + "lr": 2.05e-05, + "sup_loss": 2.3386668042199688 + }, + { + "step": 42, + "lr": 2.1000000000000002e-05, + "sup_loss": 2.323580736430218 + }, + { + "step": 43, + "lr": 2.15e-05, + "sup_loss": 2.3230888710275712 + }, + { + "step": 44, + "lr": 2.2000000000000003e-05, + "sup_loss": 2.2962211806303623 + }, + { + "step": 45, + "lr": 2.25e-05, + "sup_loss": 2.28249863397299 + }, + { + "step": 46, + "lr": 2.3e-05, + "sup_loss": 2.277612049889994 + }, + { + "step": 47, + "lr": 2.3500000000000002e-05, + "sup_loss": 2.274061397690501 + }, + { + "step": 48, + "lr": 2.4e-05, + "sup_loss": 2.2542018275664475 + }, + { + "step": 49, + "lr": 2.45e-05, + "sup_loss": 2.239544759545745 + }, + { + "step": 50, + "lr": 2.5e-05, + "sup_loss": 2.230678213861076, + "lyap1_mean": -6.591014862060547, + "lyap1_max": -6.573784828186035, + "lyap_spec_mean": [ + -6.591015815734863, + -6.592367649078369 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 51, + "lr": 2.5500000000000003e-05, + "sup_loss": 2.2250846620499685 + }, + { + "step": 52, + "lr": 2.6000000000000002e-05, + "sup_loss": 2.1904298900992263 + }, + { + "step": 53, + "lr": 2.65e-05, + "sup_loss": 2.1887838225160507 + }, + { + "step": 54, + "lr": 2.7000000000000002e-05, + "sup_loss": 2.175130077373692 + }, + { + "step": 55, + "lr": 2.7500000000000004e-05, + "sup_loss": 2.1657483386708676 + }, + { + "step": 56, + "lr": 2.8e-05, + "sup_loss": 2.1557390028955203 + }, + { + "step": 57, + "lr": 2.85e-05, + "sup_loss": 2.144618694787544 + }, + { + "step": 58, + "lr": 2.9000000000000004e-05, + "sup_loss": 2.1388971830385146 + }, + { + "step": 59, + "lr": 2.95e-05, + "sup_loss": 2.0973153612516695 + }, + { + "step": 60, + "lr": 3e-05, + "sup_loss": 2.113670049835654 + }, + { + "step": 61, + "lr": 3.0500000000000003e-05, + "sup_loss": 2.1207479296026577 + }, + { + "step": 62, + "lr": 3.1e-05, + "sup_loss": 2.082421628039469 + }, + { + "step": 63, + "lr": 3.15e-05, + "sup_loss": 2.070673099710961 + }, + { + "step": 64, + "lr": 3.2e-05, + "sup_loss": 2.0589569566583137 + }, + { + "step": 65, + "lr": 3.2500000000000004e-05, + "sup_loss": 2.070935305666071 + }, + { + "step": 66, + "lr": 3.3e-05, + "sup_loss": 2.0365844955544405 + }, + { + "step": 67, + "lr": 3.35e-05, + "sup_loss": 2.026026646992839 + }, + { + "step": 68, + "lr": 3.4e-05, + "sup_loss": 2.0246294504732814 + }, + { + "step": 69, + "lr": 3.4500000000000005e-05, + "sup_loss": 2.0130169588066664 + }, + { + "step": 70, + "lr": 3.5000000000000004e-05, + "sup_loss": 2.004195702224374 + }, + { + "step": 71, + "lr": 3.55e-05, + "sup_loss": 1.9922717092146127 + }, + { + "step": 72, + "lr": 3.6e-05, + "sup_loss": 1.97546507270582 + }, + { + "step": 73, + "lr": 3.65e-05, + "sup_loss": 1.960182337896599 + }, + { + "step": 74, + "lr": 3.7000000000000005e-05, + "sup_loss": 1.9617315183181 + }, + { + "step": 75, + "lr": 3.7500000000000003e-05, + "sup_loss": 1.961098848528872 + }, + { + "step": 76, + "lr": 3.8e-05, + "sup_loss": 1.943396801656729 + }, + { + "step": 77, + "lr": 3.85e-05, + "sup_loss": 1.9379228284285501 + }, + { + "step": 78, + "lr": 3.9e-05, + "sup_loss": 1.9422939315838272 + }, + { + "step": 79, + "lr": 3.9500000000000005e-05, + "sup_loss": 1.933639374366441 + }, + { + "step": 80, + "lr": 4e-05, + "sup_loss": 1.9194373470910142 + }, + { + "step": 81, + "lr": 4.0499999999999995e-05, + "sup_loss": 1.9106562939134015 + }, + { + "step": 82, + "lr": 4.1e-05, + "sup_loss": 1.8834361065300573 + }, + { + "step": 83, + "lr": 4.15e-05, + "sup_loss": 1.8928524633986752 + }, + { + "step": 84, + "lr": 4.2000000000000004e-05, + "sup_loss": 1.8907009072972123 + }, + { + "step": 85, + "lr": 4.25e-05, + "sup_loss": 1.8737958365290759 + }, + { + "step": 86, + "lr": 4.3e-05, + "sup_loss": 1.8552463666904502 + }, + { + "step": 87, + "lr": 4.350000000000001e-05, + "sup_loss": 1.8610634932528922 + }, + { + "step": 88, + "lr": 4.4000000000000006e-05, + "sup_loss": 1.8726442477793834 + }, + { + "step": 89, + "lr": 4.45e-05, + "sup_loss": 1.850989213349782 + }, + { + "step": 90, + "lr": 4.5e-05, + "sup_loss": 1.8364123661818825 + }, + { + "step": 91, + "lr": 4.55e-05, + "sup_loss": 1.8378995686718504 + }, + { + "step": 92, + "lr": 4.6e-05, + "sup_loss": 1.8212552337449657 + }, + { + "step": 93, + "lr": 4.6500000000000005e-05, + "sup_loss": 1.8123856149451596 + }, + { + "step": 94, + "lr": 4.7000000000000004e-05, + "sup_loss": 1.8244577364592562 + }, + { + "step": 95, + "lr": 4.7499999999999996e-05, + "sup_loss": 1.8097464716660854 + }, + { + "step": 96, + "lr": 4.8e-05, + "sup_loss": 1.8091811736013546 + }, + { + "step": 97, + "lr": 4.85e-05, + "sup_loss": 1.7978660925661378 + }, + { + "step": 98, + "lr": 4.9e-05, + "sup_loss": 1.8035262566864094 + }, + { + "step": 99, + "lr": 4.9500000000000004e-05, + "sup_loss": 1.7838572069451848 + }, + { + "step": 100, + "lr": 5e-05, + "sup_loss": 1.765573612709, + "lyap1_mean": -6.645934104919434, + "lyap1_max": -6.629775524139404, + "lyap_spec_mean": [ + -6.645934104919434, + -6.647939682006836 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 101, + "lr": 5.050000000000001e-05, + "sup_loss": 1.7718292123582209 + }, + { + "step": 102, + "lr": 5.1000000000000006e-05, + "sup_loss": 1.769429620156411 + }, + { + "step": 103, + "lr": 5.15e-05, + "sup_loss": 1.7761541532021423 + }, + { + "step": 104, + "lr": 5.2000000000000004e-05, + "sup_loss": 1.7557021065122689 + }, + { + "step": 105, + "lr": 5.25e-05, + "sup_loss": 1.753522430314226 + }, + { + "step": 106, + "lr": 5.3e-05, + "sup_loss": 1.7449709258162907 + }, + { + "step": 107, + "lr": 5.3500000000000006e-05, + "sup_loss": 1.7406835412211987 + }, + { + "step": 108, + "lr": 5.4000000000000005e-05, + "sup_loss": 1.7392730822746327 + }, + { + "step": 109, + "lr": 5.4499999999999997e-05, + "sup_loss": 1.714261023587064 + }, + { + "step": 110, + "lr": 5.500000000000001e-05, + "sup_loss": 1.7228040943129044 + }, + { + "step": 111, + "lr": 5.55e-05, + "sup_loss": 1.713869433114336 + }, + { + "step": 112, + "lr": 5.6e-05, + "sup_loss": 1.7400222199535633 + }, + { + "step": 113, + "lr": 5.6500000000000005e-05, + "sup_loss": 1.7196513559501116 + }, + { + "step": 114, + "lr": 5.7e-05, + "sup_loss": 1.7139161832678387 + }, + { + "step": 115, + "lr": 5.75e-05, + "sup_loss": 1.6685873901751527 + }, + { + "step": 116, + "lr": 5.800000000000001e-05, + "sup_loss": 1.6906523084999094 + }, + { + "step": 117, + "lr": 5.85e-05, + "sup_loss": 1.7144940216860909 + }, + { + "step": 118, + "lr": 5.9e-05, + "sup_loss": 1.691214392504199 + }, + { + "step": 119, + "lr": 5.95e-05, + "sup_loss": 1.6684485982436852 + }, + { + "step": 120, + "lr": 6e-05, + "sup_loss": 1.6762212844060034 + }, + { + "step": 121, + "lr": 6.050000000000001e-05, + "sup_loss": 1.690185344754696 + }, + { + "step": 122, + "lr": 6.1000000000000005e-05, + "sup_loss": 1.6975748985069121 + }, + { + "step": 123, + "lr": 6.15e-05, + "sup_loss": 1.6919530875603126 + }, + { + "step": 124, + "lr": 6.2e-05, + "sup_loss": 1.6901255229489793 + }, + { + "step": 125, + "lr": 6.25e-05, + "sup_loss": 1.6840904073211553 + }, + { + "step": 126, + "lr": 6.3e-05, + "sup_loss": 1.6630555519917176 + }, + { + "step": 127, + "lr": 6.350000000000001e-05, + "sup_loss": 1.6489561237615573 + }, + { + "step": 128, + "lr": 6.4e-05, + "sup_loss": 1.6580297586433415 + }, + { + "step": 129, + "lr": 6.45e-05, + "sup_loss": 1.6427564358402926 + }, + { + "step": 130, + "lr": 6.500000000000001e-05, + "sup_loss": 1.6278183600917866 + }, + { + "step": 131, + "lr": 6.55e-05, + "sup_loss": 1.6517253500466267 + }, + { + "step": 132, + "lr": 6.6e-05, + "sup_loss": 1.642541918937894 + }, + { + "step": 133, + "lr": 6.65e-05, + "sup_loss": 1.6741262579367695 + }, + { + "step": 134, + "lr": 6.7e-05, + "sup_loss": 1.6500508583749833 + }, + { + "step": 135, + "lr": 6.75e-05, + "sup_loss": 1.664009166849713 + }, + { + "step": 136, + "lr": 6.8e-05, + "sup_loss": 1.679325086824157 + }, + { + "step": 137, + "lr": 6.85e-05, + "sup_loss": 1.6595201772880044 + }, + { + "step": 138, + "lr": 6.900000000000001e-05, + "sup_loss": 1.6545919591600673 + }, + { + "step": 139, + "lr": 6.950000000000001e-05, + "sup_loss": 1.63273531828923 + }, + { + "step": 140, + "lr": 7.000000000000001e-05, + "sup_loss": 1.6542915070866298 + }, + { + "step": 141, + "lr": 7.05e-05, + "sup_loss": 1.6823128764175808 + }, + { + "step": 142, + "lr": 7.1e-05, + "sup_loss": 1.6355818925673775 + }, + { + "step": 143, + "lr": 7.15e-05, + "sup_loss": 1.619038028521917 + }, + { + "step": 144, + "lr": 7.2e-05, + "sup_loss": 1.6702294066959542 + }, + { + "step": 145, + "lr": 7.25e-05, + "sup_loss": 1.6191061524743915 + }, + { + "step": 146, + "lr": 7.3e-05, + "sup_loss": 1.6462805192394308 + }, + { + "step": 147, + "lr": 7.350000000000001e-05, + "sup_loss": 1.6383527155615467 + }, + { + "step": 148, + "lr": 7.400000000000001e-05, + "sup_loss": 1.5680034185298657 + }, + { + "step": 149, + "lr": 7.45e-05, + "sup_loss": 1.599579545671956 + }, + { + "step": 150, + "lr": 7.500000000000001e-05, + "sup_loss": 1.6107381043908648, + "lyap1_mean": -6.680443286895752, + "lyap1_max": -6.667646408081055, + "lyap_spec_mean": [ + -6.680443286895752, + -6.679490089416504 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 151, + "lr": 7.55e-05, + "sup_loss": 1.6331943361444723 + }, + { + "step": 152, + "lr": 7.6e-05, + "sup_loss": 1.6186159446667354 + }, + { + "step": 153, + "lr": 7.65e-05, + "sup_loss": 1.5932738298918827 + }, + { + "step": 154, + "lr": 7.7e-05, + "sup_loss": 1.634134278191433 + }, + { + "step": 155, + "lr": 7.75e-05, + "sup_loss": 1.622729318115855 + }, + { + "step": 156, + "lr": 7.8e-05, + "sup_loss": 1.6343562972433312 + }, + { + "step": 157, + "lr": 7.850000000000001e-05, + "sup_loss": 1.6019808938889186 + }, + { + "step": 158, + "lr": 7.900000000000001e-05, + "sup_loss": 1.603522979171088 + }, + { + "step": 159, + "lr": 7.950000000000001e-05, + "sup_loss": 1.6363473474570325 + }, + { + "step": 160, + "lr": 8e-05, + "sup_loss": 1.626051532683992 + }, + { + "step": 161, + "lr": 8.05e-05, + "sup_loss": 1.5572688308525355 + }, + { + "step": 162, + "lr": 8.099999999999999e-05, + "sup_loss": 1.5986040847940421 + }, + { + "step": 163, + "lr": 8.150000000000002e-05, + "sup_loss": 1.603001444161075 + }, + { + "step": 164, + "lr": 8.2e-05, + "sup_loss": 1.5680708701938013 + }, + { + "step": 165, + "lr": 8.25e-05, + "sup_loss": 1.591887953679735 + }, + { + "step": 166, + "lr": 8.3e-05, + "sup_loss": 1.597447414563637 + }, + { + "step": 167, + "lr": 8.35e-05, + "sup_loss": 1.570439822396015 + }, + { + "step": 168, + "lr": 8.400000000000001e-05, + "sup_loss": 1.619639569777274 + }, + { + "step": 169, + "lr": 8.450000000000001e-05, + "sup_loss": 1.5742025152753893 + }, + { + "step": 170, + "lr": 8.5e-05, + "sup_loss": 1.5920516144406869 + }, + { + "step": 171, + "lr": 8.55e-05, + "sup_loss": 1.590451636793067 + }, + { + "step": 172, + "lr": 8.6e-05, + "sup_loss": 1.5892578802898323 + }, + { + "step": 173, + "lr": 8.65e-05, + "sup_loss": 1.5723286605739668 + }, + { + "step": 174, + "lr": 8.700000000000001e-05, + "sup_loss": 1.5650768664153798 + }, + { + "step": 175, + "lr": 8.750000000000001e-05, + "sup_loss": 1.5641350770345277 + }, + { + "step": 176, + "lr": 8.800000000000001e-05, + "sup_loss": 1.5718087293880798 + }, + { + "step": 177, + "lr": 8.85e-05, + "sup_loss": 1.5721301108609709 + }, + { + "step": 178, + "lr": 8.9e-05, + "sup_loss": 1.5767578138300593 + }, + { + "step": 179, + "lr": 8.95e-05, + "sup_loss": 1.5962674555438876 + }, + { + "step": 180, + "lr": 9e-05, + "sup_loss": 1.5748646924170986 + }, + { + "step": 181, + "lr": 9.05e-05, + "sup_loss": 1.542304370712881 + }, + { + "step": 182, + "lr": 9.1e-05, + "sup_loss": 1.5795521123482055 + }, + { + "step": 183, + "lr": 9.15e-05, + "sup_loss": 1.5756055688798924 + }, + { + "step": 184, + "lr": 9.2e-05, + "sup_loss": 1.5636117185517475 + }, + { + "step": 185, + "lr": 9.250000000000001e-05, + "sup_loss": 1.5521299906273722 + }, + { + "step": 186, + "lr": 9.300000000000001e-05, + "sup_loss": 1.5249520877796907 + }, + { + "step": 187, + "lr": 9.350000000000001e-05, + "sup_loss": 1.567615583985836 + }, + { + "step": 188, + "lr": 9.400000000000001e-05, + "sup_loss": 1.5446897807231654 + }, + { + "step": 189, + "lr": 9.45e-05, + "sup_loss": 1.58593493753458 + }, + { + "step": 190, + "lr": 9.499999999999999e-05, + "sup_loss": 1.5590768697036184 + }, + { + "step": 191, + "lr": 9.550000000000002e-05, + "sup_loss": 1.5738989490910151 + }, + { + "step": 192, + "lr": 9.6e-05, + "sup_loss": 1.6060686251789913 + }, + { + "step": 193, + "lr": 9.65e-05, + "sup_loss": 1.5595447834098037 + }, + { + "step": 194, + "lr": 9.7e-05, + "sup_loss": 1.5807995257589502 + }, + { + "step": 195, + "lr": 9.75e-05, + "sup_loss": 1.5895042670753294 + }, + { + "step": 196, + "lr": 9.8e-05, + "sup_loss": 1.5759125863427577 + }, + { + "step": 197, + "lr": 9.850000000000001e-05, + "sup_loss": 1.5592385905945854 + }, + { + "step": 198, + "lr": 9.900000000000001e-05, + "sup_loss": 1.5639919986110766 + }, + { + "step": 199, + "lr": 9.95e-05, + "sup_loss": 1.580161702594515 + }, + { + "step": 200, + "lr": 0.0001, + "sup_loss": 1.5477458694954944, + "lyap1_mean": -6.6868791580200195, + "lyap1_max": -6.654043197631836, + "lyap_spec_mean": [ + -6.6868791580200195, + -6.687141418457031 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 201, + "lr": 0.0001, + "sup_loss": 1.5723668194450264 + }, + { + "step": 202, + "lr": 0.0001, + "sup_loss": 1.564699514409567 + }, + { + "step": 203, + "lr": 0.0001, + "sup_loss": 1.5604428029308708 + }, + { + "step": 204, + "lr": 0.0001, + "sup_loss": 1.5861776721359018 + }, + { + "step": 205, + "lr": 0.0001, + "sup_loss": 1.5782700080930012 + }, + { + "step": 206, + "lr": 0.0001, + "sup_loss": 1.5520980953107555 + }, + { + "step": 207, + "lr": 0.0001, + "sup_loss": 1.558527482520001 + }, + { + "step": 208, + "lr": 0.0001, + "sup_loss": 1.5830556008576437 + }, + { + "step": 209, + "lr": 0.0001, + "sup_loss": 1.56305622089171 + }, + { + "step": 210, + "lr": 0.0001, + "sup_loss": 1.5714398581531885 + }, + { + "step": 211, + "lr": 0.0001, + "sup_loss": 1.529996003908584 + }, + { + "step": 212, + "lr": 0.0001, + "sup_loss": 1.5587963770169526 + }, + { + "step": 213, + "lr": 0.0001, + "sup_loss": 1.5629355573124428 + }, + { + "step": 214, + "lr": 0.0001, + "sup_loss": 1.5662224576257553 + }, + { + "step": 215, + "lr": 0.0001, + "sup_loss": 1.5407444113099638 + }, + { + "step": 216, + "lr": 0.0001, + "sup_loss": 1.584385505016842 + }, + { + "step": 217, + "lr": 0.0001, + "sup_loss": 1.5656855026823495 + }, + { + "step": 218, + "lr": 0.0001, + "sup_loss": 1.5463318649792894 + }, + { + "step": 219, + "lr": 0.0001, + "sup_loss": 1.558034354842874 + }, + { + "step": 220, + "lr": 0.0001, + "sup_loss": 1.5842701040177278 + }, + { + "step": 221, + "lr": 0.0001, + "sup_loss": 1.51059326690561 + }, + { + "step": 222, + "lr": 0.0001, + "sup_loss": 1.5525216998009261 + }, + { + "step": 223, + "lr": 0.0001, + "sup_loss": 1.5482722635197943 + }, + { + "step": 224, + "lr": 0.0001, + "sup_loss": 1.4838554858912665 + }, + { + "step": 225, + "lr": 0.0001, + "sup_loss": 1.566661886621028 + }, + { + "step": 226, + "lr": 0.0001, + "sup_loss": 1.5400955015597735 + }, + { + "step": 227, + "lr": 0.0001, + "sup_loss": 1.5687526858500491 + }, + { + "step": 228, + "lr": 0.0001, + "sup_loss": 1.5691507976198855 + }, + { + "step": 229, + "lr": 0.0001, + "sup_loss": 1.53198097838484 + }, + { + "step": 230, + "lr": 0.0001, + "sup_loss": 1.5148413488005252 + }, + { + "step": 231, + "lr": 0.0001, + "sup_loss": 1.5493080655522284 + }, + { + "step": 232, + "lr": 0.0001, + "sup_loss": 1.5442163396736155 + }, + { + "step": 233, + "lr": 0.0001, + "sup_loss": 1.54046977806173 + }, + { + "step": 234, + "lr": 0.0001, + "sup_loss": 1.5425880946086366 + }, + { + "step": 235, + "lr": 0.0001, + "sup_loss": 1.575076912686921 + }, + { + "step": 236, + "lr": 0.0001, + "sup_loss": 1.5428485905649254 + }, + { + "step": 237, + "lr": 0.0001, + "sup_loss": 1.5395443580469992 + }, + { + "step": 238, + "lr": 0.0001, + "sup_loss": 1.5883071635836998 + }, + { + "step": 239, + "lr": 0.0001, + "sup_loss": 1.534849519729787 + }, + { + "step": 240, + "lr": 0.0001, + "sup_loss": 1.5710274969168985 + }, + { + "step": 241, + "lr": 0.0001, + "sup_loss": 1.5784896160961035 + }, + { + "step": 242, + "lr": 0.0001, + "sup_loss": 1.537032529248547 + }, + { + "step": 243, + "lr": 0.0001, + "sup_loss": 1.5410510463366764 + }, + { + "step": 244, + "lr": 0.0001, + "sup_loss": 1.567677100071934 + }, + { + "step": 245, + "lr": 0.0001, + "sup_loss": 1.5256795166825063 + }, + { + "step": 246, + "lr": 0.0001, + "sup_loss": 1.5455988643443055 + }, + { + "step": 247, + "lr": 0.0001, + "sup_loss": 1.5324020729077692 + }, + { + "step": 248, + "lr": 0.0001, + "sup_loss": 1.5249119581282295 + }, + { + "step": 249, + "lr": 0.0001, + "sup_loss": 1.5124648889243315 + }, + { + "step": 250, + "lr": 0.0001, + "sup_loss": 1.5978004357536462, + "lyap1_mean": -6.694179534912109, + "lyap1_max": -6.644731521606445, + "lyap_spec_mean": [ + -6.694179534912109, + -6.69418478012085 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 251, + "lr": 0.0001, + "sup_loss": 1.5384092154284206 + }, + { + "step": 252, + "lr": 0.0001, + "sup_loss": 1.5343629147099467 + }, + { + "step": 253, + "lr": 0.0001, + "sup_loss": 1.5566998256192501 + }, + { + "step": 254, + "lr": 0.0001, + "sup_loss": 1.527463187647169 + }, + { + "step": 255, + "lr": 0.0001, + "sup_loss": 1.5466020877298672 + }, + { + "step": 256, + "lr": 0.0001, + "sup_loss": 1.5252548207327843 + }, + { + "step": 257, + "lr": 0.0001, + "sup_loss": 1.5328926705773882 + }, + { + "step": 258, + "lr": 0.0001, + "sup_loss": 1.5257264021083035 + }, + { + "step": 259, + "lr": 0.0001, + "sup_loss": 1.5482693336878994 + }, + { + "step": 260, + "lr": 0.0001, + "sup_loss": 1.5246880613977345 + }, + { + "step": 261, + "lr": 0.0001, + "sup_loss": 1.5368057639664512 + }, + { + "step": 262, + "lr": 0.0001, + "sup_loss": 1.520905230766904 + }, + { + "step": 263, + "lr": 0.0001, + "sup_loss": 1.5415538284759558 + }, + { + "step": 264, + "lr": 0.0001, + "sup_loss": 1.5264283512779455 + }, + { + "step": 265, + "lr": 0.0001, + "sup_loss": 1.5484230592639259 + }, + { + "step": 266, + "lr": 0.0001, + "sup_loss": 1.5003885457020067 + }, + { + "step": 267, + "lr": 0.0001, + "sup_loss": 1.556837774591551 + }, + { + "step": 268, + "lr": 0.0001, + "sup_loss": 1.5553436077656102 + }, + { + "step": 269, + "lr": 0.0001, + "sup_loss": 1.5453542722634106 + }, + { + "step": 270, + "lr": 0.0001, + "sup_loss": 1.5652166192745618 + }, + { + "step": 271, + "lr": 0.0001, + "sup_loss": 1.5215748778019837 + }, + { + "step": 272, + "lr": 0.0001, + "sup_loss": 1.5438651864196689 + }, + { + "step": 273, + "lr": 0.0001, + "sup_loss": 1.5408136797939596 + }, + { + "step": 274, + "lr": 0.0001, + "sup_loss": 1.5026193032525657 + }, + { + "step": 275, + "lr": 0.0001, + "sup_loss": 1.5477253397291126 + }, + { + "step": 276, + "lr": 0.0001, + "sup_loss": 1.5336627827427467 + }, + { + "step": 277, + "lr": 0.0001, + "sup_loss": 1.5270861895780656 + }, + { + "step": 278, + "lr": 0.0001, + "sup_loss": 1.518488015782166 + }, + { + "step": 279, + "lr": 0.0001, + "sup_loss": 1.5134708507259873 + }, + { + "step": 280, + "lr": 0.0001, + "sup_loss": 1.5431403123968697 + }, + { + "step": 281, + "lr": 0.0001, + "sup_loss": 1.541801403650025 + }, + { + "step": 282, + "lr": 0.0001, + "sup_loss": 1.519480337561989 + }, + { + "step": 283, + "lr": 0.0001, + "sup_loss": 1.5299565131627453 + }, + { + "step": 284, + "lr": 0.0001, + "sup_loss": 1.547917587276493 + }, + { + "step": 285, + "lr": 0.0001, + "sup_loss": 1.4938832063120242 + }, + { + "step": 286, + "lr": 0.0001, + "sup_loss": 1.525396268292315 + }, + { + "step": 287, + "lr": 0.0001, + "sup_loss": 1.541475554162341 + }, + { + "step": 288, + "lr": 0.0001, + "sup_loss": 1.5072059477977395 + }, + { + "step": 289, + "lr": 0.0001, + "sup_loss": 1.5456634379236454 + }, + { + "step": 290, + "lr": 0.0001, + "sup_loss": 1.5283500594160828 + }, + { + "step": 291, + "lr": 0.0001, + "sup_loss": 1.5115168973991229 + }, + { + "step": 292, + "lr": 0.0001, + "sup_loss": 1.5210975319461015 + }, + { + "step": 293, + "lr": 0.0001, + "sup_loss": 1.5147547171891933 + }, + { + "step": 294, + "lr": 0.0001, + "sup_loss": 1.540661142483917 + }, + { + "step": 295, + "lr": 0.0001, + "sup_loss": 1.5180601465535941 + }, + { + "step": 296, + "lr": 0.0001, + "sup_loss": 1.5469400636944477 + }, + { + "step": 297, + "lr": 0.0001, + "sup_loss": 1.5209362658980428 + }, + { + "step": 298, + "lr": 0.0001, + "sup_loss": 1.5442122135802556 + }, + { + "step": 299, + "lr": 0.0001, + "sup_loss": 1.5299168861048704 + }, + { + "step": 300, + "lr": 0.0001, + "sup_loss": 1.5367603794149578, + "lyap1_mean": -6.731600761413574, + "lyap1_max": -6.705668926239014, + "lyap_spec_mean": [ + -6.731600761413574, + -6.729957103729248 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 301, + "lr": 0.0001, + "sup_loss": 1.5267677902729708 + }, + { + "step": 302, + "lr": 0.0001, + "sup_loss": 1.5490004131566173 + }, + { + "step": 303, + "lr": 0.0001, + "sup_loss": 1.5324941437266195 + }, + { + "step": 304, + "lr": 0.0001, + "sup_loss": 1.5190387495269355 + }, + { + "step": 305, + "lr": 0.0001, + "sup_loss": 1.509078282908147 + }, + { + "step": 306, + "lr": 0.0001, + "sup_loss": 1.5385538534217427 + }, + { + "step": 307, + "lr": 0.0001, + "sup_loss": 1.5221630289120884 + }, + { + "step": 308, + "lr": 0.0001, + "sup_loss": 1.531880840558637 + }, + { + "step": 309, + "lr": 0.0001, + "sup_loss": 1.5322693013837414 + }, + { + "step": 310, + "lr": 0.0001, + "sup_loss": 1.5456882921503572 + }, + { + "step": 311, + "lr": 0.0001, + "sup_loss": 1.5482640300985837 + }, + { + "step": 312, + "lr": 0.0001, + "sup_loss": 1.4953605908689824 + }, + { + "step": 313, + "lr": 0.0001, + "sup_loss": 1.504275592869123 + }, + { + "step": 314, + "lr": 0.0001, + "sup_loss": 1.5314498158594896 + }, + { + "step": 315, + "lr": 0.0001, + "sup_loss": 1.5487909165979727 + }, + { + "step": 316, + "lr": 0.0001, + "sup_loss": 1.5354934367073998 + }, + { + "step": 317, + "lr": 0.0001, + "sup_loss": 1.5613511901676795 + }, + { + "step": 318, + "lr": 0.0001, + "sup_loss": 1.5212551833796315 + }, + { + "step": 319, + "lr": 0.0001, + "sup_loss": 1.5354066475975543 + }, + { + "step": 320, + "lr": 0.0001, + "sup_loss": 1.513892551706198 + }, + { + "step": 321, + "lr": 0.0001, + "sup_loss": 1.5247447921083763 + }, + { + "step": 322, + "lr": 0.0001, + "sup_loss": 1.5663392109423493 + }, + { + "step": 323, + "lr": 0.0001, + "sup_loss": 1.5469443724563157 + }, + { + "step": 324, + "lr": 0.0001, + "sup_loss": 1.5241835576596356 + }, + { + "step": 325, + "lr": 0.0001, + "sup_loss": 1.56112866420628 + }, + { + "step": 326, + "lr": 0.0001, + "sup_loss": 1.514327623223098 + }, + { + "step": 327, + "lr": 0.0001, + "sup_loss": 1.5357172586130452 + }, + { + "step": 328, + "lr": 0.0001, + "sup_loss": 1.52557571995858 + }, + { + "step": 329, + "lr": 0.0001, + "sup_loss": 1.5063130482506772 + }, + { + "step": 330, + "lr": 0.0001, + "sup_loss": 1.510772981261169 + }, + { + "step": 331, + "lr": 0.0001, + "sup_loss": 1.525386254408428 + }, + { + "step": 332, + "lr": 0.0001, + "sup_loss": 1.5372994463442498 + }, + { + "step": 333, + "lr": 0.0001, + "sup_loss": 1.4956818160326795 + }, + { + "step": 334, + "lr": 0.0001, + "sup_loss": 1.5172208481119904 + }, + { + "step": 335, + "lr": 0.0001, + "sup_loss": 1.5278558278741006 + }, + { + "step": 336, + "lr": 0.0001, + "sup_loss": 1.5067972329936372 + }, + { + "step": 337, + "lr": 0.0001, + "sup_loss": 1.5190452238016992 + }, + { + "step": 338, + "lr": 0.0001, + "sup_loss": 1.5084175022670525 + }, + { + "step": 339, + "lr": 0.0001, + "sup_loss": 1.525797596254893 + }, + { + "step": 340, + "lr": 0.0001, + "sup_loss": 1.5319172302493027 + }, + { + "step": 341, + "lr": 0.0001, + "sup_loss": 1.538821554040033 + }, + { + "step": 342, + "lr": 0.0001, + "sup_loss": 1.5317685528260372 + }, + { + "step": 343, + "lr": 0.0001, + "sup_loss": 1.4926112633097048 + }, + { + "step": 344, + "lr": 0.0001, + "sup_loss": 1.5147849643214368 + }, + { + "step": 345, + "lr": 0.0001, + "sup_loss": 1.547711727370116 + }, + { + "step": 346, + "lr": 0.0001, + "sup_loss": 1.515529849808384 + }, + { + "step": 347, + "lr": 0.0001, + "sup_loss": 1.5049246647550845 + }, + { + "step": 348, + "lr": 0.0001, + "sup_loss": 1.501525262570652 + }, + { + "step": 349, + "lr": 0.0001, + "sup_loss": 1.49081882244944 + }, + { + "step": 350, + "lr": 0.0001, + "sup_loss": 1.5288388316196229, + "lyap1_mean": -6.768826484680176, + "lyap1_max": -6.735835075378418, + "lyap_spec_mean": [ + -6.768826484680176, + -6.769077777862549 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 351, + "lr": 0.0001, + "sup_loss": 1.5415546296312443 + }, + { + "step": 352, + "lr": 0.0001, + "sup_loss": 1.4937428007082285 + }, + { + "step": 353, + "lr": 0.0001, + "sup_loss": 1.5377792072256968 + }, + { + "step": 354, + "lr": 0.0001, + "sup_loss": 1.5407405719391567 + }, + { + "step": 355, + "lr": 0.0001, + "sup_loss": 1.5367920979990177 + }, + { + "step": 356, + "lr": 0.0001, + "sup_loss": 1.5307866517061364 + }, + { + "step": 357, + "lr": 0.0001, + "sup_loss": 1.5383308453236424 + }, + { + "step": 358, + "lr": 0.0001, + "sup_loss": 1.4966814733345377 + }, + { + "step": 359, + "lr": 0.0001, + "sup_loss": 1.5222646538877784 + }, + { + "step": 360, + "lr": 0.0001, + "sup_loss": 1.5257339137152008 + }, + { + "step": 361, + "lr": 0.0001, + "sup_loss": 1.5164813046994263 + }, + { + "step": 362, + "lr": 0.0001, + "sup_loss": 1.5275411407536763 + }, + { + "step": 363, + "lr": 0.0001, + "sup_loss": 1.530546309389911 + }, + { + "step": 364, + "lr": 0.0001, + "sup_loss": 1.5231208048578198 + }, + { + "step": 365, + "lr": 0.0001, + "sup_loss": 1.516284883637423 + }, + { + "step": 366, + "lr": 0.0001, + "sup_loss": 1.4956760481170648 + }, + { + "step": 367, + "lr": 0.0001, + "sup_loss": 1.5348023348014486 + }, + { + "step": 368, + "lr": 0.0001, + "sup_loss": 1.5469043091619528 + }, + { + "step": 369, + "lr": 0.0001, + "sup_loss": 1.508701474439553 + }, + { + "step": 370, + "lr": 0.0001, + "sup_loss": 1.5035497253083892 + }, + { + "step": 371, + "lr": 0.0001, + "sup_loss": 1.50805146455965 + }, + { + "step": 372, + "lr": 0.0001, + "sup_loss": 1.5235418304604582 + }, + { + "step": 373, + "lr": 0.0001, + "sup_loss": 1.5051946201321436 + }, + { + "step": 374, + "lr": 0.0001, + "sup_loss": 1.5760435357157099 + }, + { + "step": 375, + "lr": 0.0001, + "sup_loss": 1.5358853569822477 + }, + { + "step": 376, + "lr": 0.0001, + "sup_loss": 1.4922115545345278 + }, + { + "step": 377, + "lr": 0.0001, + "sup_loss": 1.5195063290024502 + }, + { + "step": 378, + "lr": 0.0001, + "sup_loss": 1.5025468941472218 + }, + { + "step": 379, + "lr": 0.0001, + "sup_loss": 1.5147771276300177 + }, + { + "step": 380, + "lr": 0.0001, + "sup_loss": 1.5896931221452932 + }, + { + "step": 381, + "lr": 0.0001, + "sup_loss": 1.4996664797783066 + }, + { + "step": 382, + "lr": 0.0001, + "sup_loss": 1.5329751270993626 + }, + { + "step": 383, + "lr": 0.0001, + "sup_loss": 1.4888887510594488 + }, + { + "step": 384, + "lr": 0.0001, + "sup_loss": 1.5017783762237125 + }, + { + "step": 385, + "lr": 0.0001, + "sup_loss": 1.4954119391046687 + }, + { + "step": 386, + "lr": 0.0001, + "sup_loss": 1.5197900278040006 + }, + { + "step": 387, + "lr": 0.0001, + "sup_loss": 1.5426613533741342 + }, + { + "step": 388, + "lr": 0.0001, + "sup_loss": 1.5429999638477723 + }, + { + "step": 389, + "lr": 0.0001, + "sup_loss": 1.532546033613357 + }, + { + "step": 390, + "lr": 0.0001, + "sup_loss": 1.4919258952284065 + }, + { + "step": 391, + "lr": 0.0001, + "sup_loss": 1.5219337390273724 + }, + { + "step": 392, + "lr": 0.0001, + "sup_loss": 1.5478330957124837 + }, + { + "step": 393, + "lr": 0.0001, + "sup_loss": 1.5359589244340948 + }, + { + "step": 394, + "lr": 0.0001, + "sup_loss": 1.5412368161721592 + }, + { + "step": 395, + "lr": 0.0001, + "sup_loss": 1.5205820683421143 + }, + { + "step": 396, + "lr": 0.0001, + "sup_loss": 1.5159798976856904 + }, + { + "step": 397, + "lr": 0.0001, + "sup_loss": 1.4777741617731257 + }, + { + "step": 398, + "lr": 0.0001, + "sup_loss": 1.5215005801879995 + }, + { + "step": 399, + "lr": 0.0001, + "sup_loss": 1.5079616952732788 + }, + { + "step": 400, + "lr": 0.0001, + "sup_loss": 1.495910037773762, + "lyap1_mean": -6.776495933532715, + "lyap1_max": -6.758118629455566, + "lyap_spec_mean": [ + -6.776495933532715, + -6.777163982391357 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 401, + "lr": 0.0001, + "sup_loss": 1.5217005574706701 + }, + { + "step": 402, + "lr": 0.0001, + "sup_loss": 1.535998508545498 + }, + { + "step": 403, + "lr": 0.0001, + "sup_loss": 1.4938829571330041 + }, + { + "step": 404, + "lr": 0.0001, + "sup_loss": 1.5042338652329252 + }, + { + "step": 405, + "lr": 0.0001, + "sup_loss": 1.538283850584171 + }, + { + "step": 406, + "lr": 0.0001, + "sup_loss": 1.5170335668757478 + }, + { + "step": 407, + "lr": 0.0001, + "sup_loss": 1.5043685155341067 + }, + { + "step": 408, + "lr": 0.0001, + "sup_loss": 1.5252170557965918 + }, + { + "step": 409, + "lr": 0.0001, + "sup_loss": 1.5543400645748704 + }, + { + "step": 410, + "lr": 0.0001, + "sup_loss": 1.518284223363948 + }, + { + "step": 411, + "lr": 0.0001, + "sup_loss": 1.530843710395433 + }, + { + "step": 412, + "lr": 0.0001, + "sup_loss": 1.523526609751549 + }, + { + "step": 413, + "lr": 0.0001, + "sup_loss": 1.5347577761597417 + }, + { + "step": 414, + "lr": 0.0001, + "sup_loss": 1.5482723268724043 + }, + { + "step": 415, + "lr": 0.0001, + "sup_loss": 1.5106411249861507 + }, + { + "step": 416, + "lr": 0.0001, + "sup_loss": 1.5377137499597227 + }, + { + "step": 417, + "lr": 0.0001, + "sup_loss": 1.5470692533854244 + }, + { + "step": 418, + "lr": 0.0001, + "sup_loss": 1.530579133413735 + }, + { + "step": 419, + "lr": 0.0001, + "sup_loss": 1.513712776987345 + }, + { + "step": 420, + "lr": 0.0001, + "sup_loss": 1.5275970838549073 + }, + { + "step": 421, + "lr": 0.0001, + "sup_loss": 1.5065633750579157 + }, + { + "step": 422, + "lr": 0.0001, + "sup_loss": 1.5208309190224893 + }, + { + "step": 423, + "lr": 0.0001, + "sup_loss": 1.4901535565874426 + }, + { + "step": 424, + "lr": 0.0001, + "sup_loss": 1.541493181989591 + }, + { + "step": 425, + "lr": 0.0001, + "sup_loss": 1.5069517474786296 + }, + { + "step": 426, + "lr": 0.0001, + "sup_loss": 1.524255888570083 + }, + { + "step": 427, + "lr": 0.0001, + "sup_loss": 1.509566015485319 + }, + { + "step": 428, + "lr": 0.0001, + "sup_loss": 1.5065644131228995 + }, + { + "step": 429, + "lr": 0.0001, + "sup_loss": 1.5168142824769222 + }, + { + "step": 430, + "lr": 0.0001, + "sup_loss": 1.5094533303891793 + }, + { + "step": 431, + "lr": 0.0001, + "sup_loss": 1.5310067010010535 + }, + { + "step": 432, + "lr": 0.0001, + "sup_loss": 1.4936585680050196 + }, + { + "step": 433, + "lr": 0.0001, + "sup_loss": 1.5170796998766567 + }, + { + "step": 434, + "lr": 0.0001, + "sup_loss": 1.4663801573932203 + }, + { + "step": 435, + "lr": 0.0001, + "sup_loss": 1.5053680594486931 + }, + { + "step": 436, + "lr": 0.0001, + "sup_loss": 1.5028730726827486 + }, + { + "step": 437, + "lr": 0.0001, + "sup_loss": 1.490572772297063 + }, + { + "step": 438, + "lr": 0.0001, + "sup_loss": 1.5032827873330354 + }, + { + "step": 439, + "lr": 0.0001, + "sup_loss": 1.5226119048921678 + }, + { + "step": 440, + "lr": 0.0001, + "sup_loss": 1.5597227382635503 + }, + { + "step": 441, + "lr": 0.0001, + "sup_loss": 1.5237574660485953 + }, + { + "step": 442, + "lr": 0.0001, + "sup_loss": 1.5347296789628524 + }, + { + "step": 443, + "lr": 0.0001, + "sup_loss": 1.5230422566997506 + }, + { + "step": 444, + "lr": 0.0001, + "sup_loss": 1.5491641821715534 + }, + { + "step": 445, + "lr": 0.0001, + "sup_loss": 1.509394757469705 + }, + { + "step": 446, + "lr": 0.0001, + "sup_loss": 1.4962407714481252 + }, + { + "step": 447, + "lr": 0.0001, + "sup_loss": 1.522649675786416 + }, + { + "step": 448, + "lr": 0.0001, + "sup_loss": 1.5226048456225543 + }, + { + "step": 449, + "lr": 0.0001, + "sup_loss": 1.5331250593048695 + }, + { + "step": 450, + "lr": 0.0001, + "sup_loss": 1.515925917849977, + "lyap1_mean": -6.795431137084961, + "lyap1_max": -6.768151760101318, + "lyap_spec_mean": [ + -6.795431137084961, + -6.794274806976318 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 451, + "lr": 0.0001, + "sup_loss": 1.5282498362302581 + }, + { + "step": 452, + "lr": 0.0001, + "sup_loss": 1.5132511622880294 + }, + { + "step": 453, + "lr": 0.0001, + "sup_loss": 1.5061698451036576 + }, + { + "step": 454, + "lr": 0.0001, + "sup_loss": 1.5327067666562653 + }, + { + "step": 455, + "lr": 0.0001, + "sup_loss": 1.5432148891759199 + }, + { + "step": 456, + "lr": 0.0001, + "sup_loss": 1.487606374372353 + }, + { + "step": 457, + "lr": 0.0001, + "sup_loss": 1.5161990353568249 + }, + { + "step": 458, + "lr": 0.0001, + "sup_loss": 1.4965745475211065 + }, + { + "step": 459, + "lr": 0.0001, + "sup_loss": 1.5198828941042917 + }, + { + "step": 460, + "lr": 0.0001, + "sup_loss": 1.5305336263472862 + }, + { + "step": 461, + "lr": 0.0001, + "sup_loss": 1.5149052893451012 + }, + { + "step": 462, + "lr": 0.0001, + "sup_loss": 1.5025796388903865 + }, + { + "step": 463, + "lr": 0.0001, + "sup_loss": 1.5869712464150953 + }, + { + "step": 464, + "lr": 0.0001, + "sup_loss": 1.5057992201875046 + }, + { + "step": 465, + "lr": 0.0001, + "sup_loss": 1.5120564804847023 + }, + { + "step": 466, + "lr": 0.0001, + "sup_loss": 1.5120215616736532 + }, + { + "step": 467, + "lr": 0.0001, + "sup_loss": 1.5091493754340413 + }, + { + "step": 468, + "lr": 0.0001, + "sup_loss": 1.5323652889230723 + }, + { + "step": 469, + "lr": 0.0001, + "sup_loss": 1.5354809126070437 + }, + { + "step": 470, + "lr": 0.0001, + "sup_loss": 1.5160184614151824 + }, + { + "step": 471, + "lr": 0.0001, + "sup_loss": 1.5393198861487136 + }, + { + "step": 472, + "lr": 0.0001, + "sup_loss": 1.5095728851643273 + }, + { + "step": 473, + "lr": 0.0001, + "sup_loss": 1.4950199933108161 + }, + { + "step": 474, + "lr": 0.0001, + "sup_loss": 1.4880223885745154 + }, + { + "step": 475, + "lr": 0.0001, + "sup_loss": 1.5220763223709426 + }, + { + "step": 476, + "lr": 0.0001, + "sup_loss": 1.5291296437464985 + }, + { + "step": 477, + "lr": 0.0001, + "sup_loss": 1.4880739155134965 + }, + { + "step": 478, + "lr": 0.0001, + "sup_loss": 1.5351122331439055 + }, + { + "step": 479, + "lr": 0.0001, + "sup_loss": 1.522225478193996 + }, + { + "step": 480, + "lr": 0.0001, + "sup_loss": 1.53495790560135 + }, + { + "step": 481, + "lr": 0.0001, + "sup_loss": 1.5253319198843884 + }, + { + "step": 482, + "lr": 0.0001, + "sup_loss": 1.54836948093061 + }, + { + "step": 483, + "lr": 0.0001, + "sup_loss": 1.5153069188564097 + }, + { + "step": 484, + "lr": 0.0001, + "sup_loss": 1.5081205638874275 + }, + { + "step": 485, + "lr": 0.0001, + "sup_loss": 1.515517684638477 + }, + { + "step": 486, + "lr": 0.0001, + "sup_loss": 1.548798319957227 + }, + { + "step": 487, + "lr": 0.0001, + "sup_loss": 1.508224619597713 + }, + { + "step": 488, + "lr": 0.0001, + "sup_loss": 1.5081144270667044 + }, + { + "step": 489, + "lr": 0.0001, + "sup_loss": 1.5316173594952238 + }, + { + "step": 490, + "lr": 0.0001, + "sup_loss": 1.5109067315946623 + }, + { + "step": 491, + "lr": 0.0001, + "sup_loss": 1.5217231020214306 + }, + { + "step": 492, + "lr": 0.0001, + "sup_loss": 1.5250587129141693 + }, + { + "step": 493, + "lr": 0.0001, + "sup_loss": 1.4877844106809268 + }, + { + "step": 494, + "lr": 0.0001, + "sup_loss": 1.5344331622723024 + }, + { + "step": 495, + "lr": 0.0001, + "sup_loss": 1.5260100240880243 + }, + { + "step": 496, + "lr": 0.0001, + "sup_loss": 1.500453052131941 + }, + { + "step": 497, + "lr": 0.0001, + "sup_loss": 1.5305699544559332 + }, + { + "step": 498, + "lr": 0.0001, + "sup_loss": 1.5256666107826882 + }, + { + "step": 499, + "lr": 0.0001, + "sup_loss": 1.5013564830363537 + }, + { + "step": 500, + "lr": 0.0001, + "sup_loss": 1.5188585494176774, + "lyap1_mean": -6.791590213775635, + "lyap1_max": -6.772494792938232, + "lyap_spec_mean": [ + -6.791589736938477, + -6.79250431060791 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 501, + "lr": 0.0001, + "sup_loss": 1.5419606241978687 + }, + { + "step": 502, + "lr": 0.0001, + "sup_loss": 1.5421828984991521 + }, + { + "step": 503, + "lr": 0.0001, + "sup_loss": 1.5357003899215982 + }, + { + "step": 504, + "lr": 0.0001, + "sup_loss": 1.530721237113792 + }, + { + "step": 505, + "lr": 0.0001, + "sup_loss": 1.511567722152634 + }, + { + "step": 506, + "lr": 0.0001, + "sup_loss": 1.5323639499719792 + }, + { + "step": 507, + "lr": 0.0001, + "sup_loss": 1.5010858492960906 + }, + { + "step": 508, + "lr": 0.0001, + "sup_loss": 1.515861816104251 + }, + { + "step": 509, + "lr": 0.0001, + "sup_loss": 1.507546999875379 + }, + { + "step": 510, + "lr": 0.0001, + "sup_loss": 1.4473171150456647 + }, + { + "step": 511, + "lr": 0.0001, + "sup_loss": 1.5250415366020011 + }, + { + "step": 512, + "lr": 0.0001, + "sup_loss": 1.5356975150173802 + }, + { + "step": 513, + "lr": 0.0001, + "sup_loss": 1.5144988164026012 + }, + { + "step": 514, + "lr": 0.0001, + "sup_loss": 1.5490970785299154 + }, + { + "step": 515, + "lr": 0.0001, + "sup_loss": 1.496113593839379 + }, + { + "step": 516, + "lr": 0.0001, + "sup_loss": 1.498487162807379 + }, + { + "step": 517, + "lr": 0.0001, + "sup_loss": 1.4981849056946255 + }, + { + "step": 518, + "lr": 0.0001, + "sup_loss": 1.5373433675463626 + }, + { + "step": 519, + "lr": 0.0001, + "sup_loss": 1.4812035208840872 + }, + { + "step": 520, + "lr": 0.0001, + "sup_loss": 1.47771356385523 + }, + { + "step": 521, + "lr": 0.0001, + "sup_loss": 1.5449288085839221 + }, + { + "step": 522, + "lr": 0.0001, + "sup_loss": 1.5070971701758975 + }, + { + "step": 523, + "lr": 0.0001, + "sup_loss": 1.5270652188211973 + }, + { + "step": 524, + "lr": 0.0001, + "sup_loss": 1.5359717697211641 + }, + { + "step": 525, + "lr": 0.0001, + "sup_loss": 1.5163803211806575 + }, + { + "step": 526, + "lr": 0.0001, + "sup_loss": 1.4873522597396416 + }, + { + "step": 527, + "lr": 0.0001, + "sup_loss": 1.4766349808193844 + }, + { + "step": 528, + "lr": 0.0001, + "sup_loss": 1.4398690054531398 + }, + { + "step": 529, + "lr": 0.0001, + "sup_loss": 1.4616981072030113 + }, + { + "step": 530, + "lr": 0.0001, + "sup_loss": 1.5029602902675654 + }, + { + "step": 531, + "lr": 0.0001, + "sup_loss": 1.5234289793130582 + }, + { + "step": 532, + "lr": 0.0001, + "sup_loss": 1.5249104934379372 + }, + { + "step": 533, + "lr": 0.0001, + "sup_loss": 1.4803204314707359 + }, + { + "step": 534, + "lr": 0.0001, + "sup_loss": 1.5328382853747362 + }, + { + "step": 535, + "lr": 0.0001, + "sup_loss": 1.5179482902634496 + }, + { + "step": 536, + "lr": 0.0001, + "sup_loss": 1.5211828598536354 + }, + { + "step": 537, + "lr": 0.0001, + "sup_loss": 1.5274874021689337 + }, + { + "step": 538, + "lr": 0.0001, + "sup_loss": 1.500564786247306 + }, + { + "step": 539, + "lr": 0.0001, + "sup_loss": 1.521037452581265 + }, + { + "step": 540, + "lr": 0.0001, + "sup_loss": 1.538061478112434 + }, + { + "step": 541, + "lr": 0.0001, + "sup_loss": 1.5381210991518974 + }, + { + "step": 542, + "lr": 0.0001, + "sup_loss": 1.5013994631824021 + }, + { + "step": 543, + "lr": 0.0001, + "sup_loss": 1.5654164527576346 + }, + { + "step": 544, + "lr": 0.0001, + "sup_loss": 1.5144121736857774 + }, + { + "step": 545, + "lr": 0.0001, + "sup_loss": 1.5275361742225335 + }, + { + "step": 546, + "lr": 0.0001, + "sup_loss": 1.5246118056555094 + }, + { + "step": 547, + "lr": 0.0001, + "sup_loss": 1.5038810676440577 + }, + { + "step": 548, + "lr": 0.0001, + "sup_loss": 1.4733352301839726 + }, + { + "step": 549, + "lr": 0.0001, + "sup_loss": 1.5239435882358878 + }, + { + "step": 550, + "lr": 0.0001, + "sup_loss": 1.5175359020051566, + "lyap1_mean": -6.765993118286133, + "lyap1_max": -6.736642837524414, + "lyap_spec_mean": [ + -6.765993118286133, + -6.764594554901123 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 551, + "lr": 0.0001, + "sup_loss": 1.4968662422053387 + }, + { + "step": 552, + "lr": 0.0001, + "sup_loss": 1.4896592338924552 + }, + { + "step": 553, + "lr": 0.0001, + "sup_loss": 1.533292957859652 + }, + { + "step": 554, + "lr": 0.0001, + "sup_loss": 1.514270599044644 + }, + { + "step": 555, + "lr": 0.0001, + "sup_loss": 1.5250075259201878 + }, + { + "step": 556, + "lr": 0.0001, + "sup_loss": 1.505292036558018 + }, + { + "step": 557, + "lr": 0.0001, + "sup_loss": 1.5370056435293413 + }, + { + "step": 558, + "lr": 0.0001, + "sup_loss": 1.4656788697742547 + }, + { + "step": 559, + "lr": 0.0001, + "sup_loss": 1.5362274302772925 + }, + { + "step": 560, + "lr": 0.0001, + "sup_loss": 1.5245343080299176 + }, + { + "step": 561, + "lr": 0.0001, + "sup_loss": 1.5081152106879827 + }, + { + "step": 562, + "lr": 0.0001, + "sup_loss": 1.4994066555448664 + }, + { + "step": 563, + "lr": 0.0001, + "sup_loss": 1.5197447553727503 + }, + { + "step": 564, + "lr": 0.0001, + "sup_loss": 1.4796461514181756 + }, + { + "step": 565, + "lr": 0.0001, + "sup_loss": 1.527453622104409 + }, + { + "step": 566, + "lr": 0.0001, + "sup_loss": 1.5211536061599533 + }, + { + "step": 567, + "lr": 0.0001, + "sup_loss": 1.5222198327095844 + }, + { + "step": 568, + "lr": 0.0001, + "sup_loss": 1.5104475006960905 + }, + { + "step": 569, + "lr": 0.0001, + "sup_loss": 1.556950775037779 + }, + { + "step": 570, + "lr": 0.0001, + "sup_loss": 1.489439387709364 + }, + { + "step": 571, + "lr": 0.0001, + "sup_loss": 1.5247285889255449 + }, + { + "step": 572, + "lr": 0.0001, + "sup_loss": 1.520800154045211 + }, + { + "step": 573, + "lr": 0.0001, + "sup_loss": 1.554697797404213 + }, + { + "step": 574, + "lr": 0.0001, + "sup_loss": 1.4829480801351373 + }, + { + "step": 575, + "lr": 0.0001, + "sup_loss": 1.4764715080232702 + }, + { + "step": 576, + "lr": 0.0001, + "sup_loss": 1.5269003596506046 + }, + { + "step": 577, + "lr": 0.0001, + "sup_loss": 1.5191089025254334 + }, + { + "step": 578, + "lr": 0.0001, + "sup_loss": 1.4969294335143073 + }, + { + "step": 579, + "lr": 0.0001, + "sup_loss": 1.5173509812616253 + }, + { + "step": 580, + "lr": 0.0001, + "sup_loss": 1.5670732878743052 + }, + { + "step": 581, + "lr": 0.0001, + "sup_loss": 1.5069296991136054 + }, + { + "step": 582, + "lr": 0.0001, + "sup_loss": 1.5263190153038793 + }, + { + "step": 583, + "lr": 0.0001, + "sup_loss": 1.513668570188107 + }, + { + "step": 584, + "lr": 0.0001, + "sup_loss": 1.5396932472814553 + }, + { + "step": 585, + "lr": 0.0001, + "sup_loss": 1.520343959935798 + }, + { + "step": 586, + "lr": 0.0001, + "sup_loss": 1.5568224663848986 + }, + { + "step": 587, + "lr": 0.0001, + "sup_loss": 1.4930463816877826 + }, + { + "step": 588, + "lr": 0.0001, + "sup_loss": 1.5195441393658515 + }, + { + "step": 589, + "lr": 0.0001, + "sup_loss": 1.5099460511089329 + }, + { + "step": 590, + "lr": 0.0001, + "sup_loss": 1.56036194709874 + }, + { + "step": 591, + "lr": 0.0001, + "sup_loss": 1.5326861985624607 + }, + { + "step": 592, + "lr": 0.0001, + "sup_loss": 1.4728517589052517 + }, + { + "step": 593, + "lr": 0.0001, + "sup_loss": 1.5061764131961108 + }, + { + "step": 594, + "lr": 0.0001, + "sup_loss": 1.5160519949374989 + }, + { + "step": 595, + "lr": 0.0001, + "sup_loss": 1.5009149320838588 + }, + { + "step": 596, + "lr": 0.0001, + "sup_loss": 1.512843956656185 + }, + { + "step": 597, + "lr": 0.0001, + "sup_loss": 1.495373448987571 + }, + { + "step": 598, + "lr": 0.0001, + "sup_loss": 1.5300604215201528 + }, + { + "step": 599, + "lr": 0.0001, + "sup_loss": 1.5271361167507316 + }, + { + "step": 600, + "lr": 0.0001, + "sup_loss": 1.4746719875157162, + "lyap1_mean": -6.706287384033203, + "lyap1_max": -6.6853485107421875, + "lyap_spec_mean": [ + -6.706287384033203, + -6.7072367668151855 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 601, + "lr": 0.0001, + "sup_loss": 1.5336775026150677 + }, + { + "step": 602, + "lr": 0.0001, + "sup_loss": 1.5291573467841881 + }, + { + "step": 603, + "lr": 0.0001, + "sup_loss": 1.5263495770623876 + }, + { + "step": 604, + "lr": 0.0001, + "sup_loss": 1.503451382896976 + }, + { + "step": 605, + "lr": 0.0001, + "sup_loss": 1.5398634285105617 + }, + { + "step": 606, + "lr": 0.0001, + "sup_loss": 1.519265140334756 + }, + { + "step": 607, + "lr": 0.0001, + "sup_loss": 1.5138468165331156 + }, + { + "step": 608, + "lr": 0.0001, + "sup_loss": 1.5262239900340073 + }, + { + "step": 609, + "lr": 0.0001, + "sup_loss": 1.526919684295831 + }, + { + "step": 610, + "lr": 0.0001, + "sup_loss": 1.519283512267841 + }, + { + "step": 611, + "lr": 0.0001, + "sup_loss": 1.5264654126295605 + }, + { + "step": 612, + "lr": 0.0001, + "sup_loss": 1.518608014385772 + }, + { + "step": 613, + "lr": 0.0001, + "sup_loss": 1.483369232582482 + }, + { + "step": 614, + "lr": 0.0001, + "sup_loss": 1.5194200147255776 + }, + { + "step": 615, + "lr": 0.0001, + "sup_loss": 1.509659573928428 + }, + { + "step": 616, + "lr": 0.0001, + "sup_loss": 1.4614213786949402 + }, + { + "step": 617, + "lr": 0.0001, + "sup_loss": 1.519985781853087 + }, + { + "step": 618, + "lr": 0.0001, + "sup_loss": 1.5153756064518529 + }, + { + "step": 619, + "lr": 0.0001, + "sup_loss": 1.4953872974472855 + }, + { + "step": 620, + "lr": 0.0001, + "sup_loss": 1.50964356755188 + }, + { + "step": 621, + "lr": 0.0001, + "sup_loss": 1.5166842103395488 + }, + { + "step": 622, + "lr": 0.0001, + "sup_loss": 1.4827529031202311 + }, + { + "step": 623, + "lr": 0.0001, + "sup_loss": 1.5089609676135278 + }, + { + "step": 624, + "lr": 0.0001, + "sup_loss": 1.4179098266299353 + }, + { + "step": 625, + "lr": 0.0001, + "sup_loss": 1.5462449333163302 + }, + { + "step": 626, + "lr": 0.0001, + "sup_loss": 1.491704724136731 + }, + { + "step": 627, + "lr": 0.0001, + "sup_loss": 1.5296925248055357 + }, + { + "step": 628, + "lr": 0.0001, + "sup_loss": 1.495717675145098 + }, + { + "step": 629, + "lr": 0.0001, + "sup_loss": 1.5094886039242152 + }, + { + "step": 630, + "lr": 0.0001, + "sup_loss": 1.5537490687309552 + }, + { + "step": 631, + "lr": 0.0001, + "sup_loss": 1.5671074028980196 + }, + { + "step": 632, + "lr": 0.0001, + "sup_loss": 1.5291528396725056 + }, + { + "step": 633, + "lr": 0.0001, + "sup_loss": 1.542836361718557 + }, + { + "step": 634, + "lr": 0.0001, + "sup_loss": 1.4853697228480627 + }, + { + "step": 635, + "lr": 0.0001, + "sup_loss": 1.5018737111762246 + }, + { + "step": 636, + "lr": 0.0001, + "sup_loss": 1.4920718193587548 + }, + { + "step": 637, + "lr": 0.0001, + "sup_loss": 1.4889679738842707 + }, + { + "step": 638, + "lr": 0.0001, + "sup_loss": 1.5186764177161212 + }, + { + "step": 639, + "lr": 0.0001, + "sup_loss": 1.5236976351211728 + }, + { + "step": 640, + "lr": 0.0001, + "sup_loss": 1.528840155667301 + }, + { + "step": 641, + "lr": 0.0001, + "sup_loss": 1.53291243411655 + }, + { + "step": 642, + "lr": 0.0001, + "sup_loss": 1.569600070320969 + }, + { + "step": 643, + "lr": 0.0001, + "sup_loss": 1.5498230357632412 + }, + { + "step": 644, + "lr": 0.0001, + "sup_loss": 1.5296614304295055 + }, + { + "step": 645, + "lr": 0.0001, + "sup_loss": 1.5494526011339775 + }, + { + "step": 646, + "lr": 0.0001, + "sup_loss": 1.529450068928431 + }, + { + "step": 647, + "lr": 0.0001, + "sup_loss": 1.5170902165194509 + }, + { + "step": 648, + "lr": 0.0001, + "sup_loss": 1.5290718973416557 + }, + { + "step": 649, + "lr": 0.0001, + "sup_loss": 1.4815761998823769 + }, + { + "step": 650, + "lr": 0.0001, + "sup_loss": 1.5051635719305994, + "lyap1_mean": -6.6470232009887695, + "lyap1_max": -6.628139972686768, + "lyap_spec_mean": [ + -6.647023677825928, + -6.642392635345459 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 651, + "lr": 0.0001, + "sup_loss": 1.5094047529090602 + }, + { + "step": 652, + "lr": 0.0001, + "sup_loss": 1.4997054919315504 + }, + { + "step": 653, + "lr": 0.0001, + "sup_loss": 1.5162976197918159 + }, + { + "step": 654, + "lr": 0.0001, + "sup_loss": 1.4983194976232481 + }, + { + "step": 655, + "lr": 0.0001, + "sup_loss": 1.5127993812520648 + }, + { + "step": 656, + "lr": 0.0001, + "sup_loss": 1.5261014664213866 + }, + { + "step": 657, + "lr": 0.0001, + "sup_loss": 1.4951740231453452 + }, + { + "step": 658, + "lr": 0.0001, + "sup_loss": 1.4817909871854027 + }, + { + "step": 659, + "lr": 0.0001, + "sup_loss": 1.525519173918642 + }, + { + "step": 660, + "lr": 0.0001, + "sup_loss": 1.5023221059259964 + }, + { + "step": 661, + "lr": 0.0001, + "sup_loss": 1.5465929129836793 + }, + { + "step": 662, + "lr": 0.0001, + "sup_loss": 1.511803813875915 + }, + { + "step": 663, + "lr": 0.0001, + "sup_loss": 1.5285347352646839 + }, + { + "step": 664, + "lr": 0.0001, + "sup_loss": 1.5219787259105593 + }, + { + "step": 665, + "lr": 0.0001, + "sup_loss": 1.5184666352898202 + }, + { + "step": 666, + "lr": 0.0001, + "sup_loss": 1.512331622154555 + }, + { + "step": 667, + "lr": 0.0001, + "sup_loss": 1.5048972547747164 + }, + { + "step": 668, + "lr": 0.0001, + "sup_loss": 1.5267053968581568 + }, + { + "step": 669, + "lr": 0.0001, + "sup_loss": 1.520783856316102 + }, + { + "step": 670, + "lr": 0.0001, + "sup_loss": 1.4967002075237184 + }, + { + "step": 671, + "lr": 0.0001, + "sup_loss": 1.5185580478265714 + }, + { + "step": 672, + "lr": 0.0001, + "sup_loss": 1.5326260352431025 + }, + { + "step": 673, + "lr": 0.0001, + "sup_loss": 1.5119897806211688 + }, + { + "step": 674, + "lr": 0.0001, + "sup_loss": 1.5012395810129249 + }, + { + "step": 675, + "lr": 0.0001, + "sup_loss": 1.4716738682943544 + }, + { + "step": 676, + "lr": 0.0001, + "sup_loss": 1.531889695586606 + }, + { + "step": 677, + "lr": 0.0001, + "sup_loss": 1.5015395979322816 + }, + { + "step": 678, + "lr": 0.0001, + "sup_loss": 1.5185524883068886 + }, + { + "step": 679, + "lr": 0.0001, + "sup_loss": 1.5329890913841109 + }, + { + "step": 680, + "lr": 0.0001, + "sup_loss": 1.4862086841848878 + }, + { + "step": 681, + "lr": 0.0001, + "sup_loss": 1.5081771314593237 + }, + { + "step": 682, + "lr": 0.0001, + "sup_loss": 1.5057919255036136 + }, + { + "step": 683, + "lr": 0.0001, + "sup_loss": 1.5287738631947454 + }, + { + "step": 684, + "lr": 0.0001, + "sup_loss": 1.5412355279543468 + }, + { + "step": 685, + "lr": 0.0001, + "sup_loss": 1.5046593794438914 + }, + { + "step": 686, + "lr": 0.0001, + "sup_loss": 1.4978752595818452 + }, + { + "step": 687, + "lr": 0.0001, + "sup_loss": 1.5352642460660617 + }, + { + "step": 688, + "lr": 0.0001, + "sup_loss": 1.4983927779483674 + }, + { + "step": 689, + "lr": 0.0001, + "sup_loss": 1.480772186943008 + }, + { + "step": 690, + "lr": 0.0001, + "sup_loss": 1.5157947002219192 + }, + { + "step": 691, + "lr": 0.0001, + "sup_loss": 1.5287968981414712 + }, + { + "step": 692, + "lr": 0.0001, + "sup_loss": 1.5115796750350312 + }, + { + "step": 693, + "lr": 0.0001, + "sup_loss": 1.5193259959766254 + }, + { + "step": 694, + "lr": 0.0001, + "sup_loss": 1.5110918958324207 + }, + { + "step": 695, + "lr": 0.0001, + "sup_loss": 1.5326753899638526 + }, + { + "step": 696, + "lr": 0.0001, + "sup_loss": 1.505106765876873 + }, + { + "step": 697, + "lr": 0.0001, + "sup_loss": 1.500845572940783 + }, + { + "step": 698, + "lr": 0.0001, + "sup_loss": 1.5316710552295576 + }, + { + "step": 699, + "lr": 0.0001, + "sup_loss": 1.4883620179723598 + }, + { + "step": 700, + "lr": 0.0001, + "sup_loss": 1.5566363929525928, + "lyap1_mean": -6.538327217102051, + "lyap1_max": -6.495978832244873, + "lyap_spec_mean": [ + -6.538326740264893, + -6.544689655303955 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 701, + "lr": 0.0001, + "sup_loss": 1.5287810715506045 + }, + { + "step": 702, + "lr": 0.0001, + "sup_loss": 1.516066487082322 + }, + { + "step": 703, + "lr": 0.0001, + "sup_loss": 1.5137401942306308 + }, + { + "step": 704, + "lr": 0.0001, + "sup_loss": 1.5152303193706085 + }, + { + "step": 705, + "lr": 0.0001, + "sup_loss": 1.5325047690615334 + }, + { + "step": 706, + "lr": 0.0001, + "sup_loss": 1.5041348246173127 + }, + { + "step": 707, + "lr": 0.0001, + "sup_loss": 1.4751987746757644 + }, + { + "step": 708, + "lr": 0.0001, + "sup_loss": 1.5284869096887617 + }, + { + "step": 709, + "lr": 0.0001, + "sup_loss": 1.534997664041701 + }, + { + "step": 710, + "lr": 0.0001, + "sup_loss": 1.5226603736537316 + }, + { + "step": 711, + "lr": 0.0001, + "sup_loss": 1.5188651865398364 + }, + { + "step": 712, + "lr": 0.0001, + "sup_loss": 1.5112746657157887 + }, + { + "step": 713, + "lr": 0.0001, + "sup_loss": 1.4818948793235172 + }, + { + "step": 714, + "lr": 0.0001, + "sup_loss": 1.5122376161315374 + }, + { + "step": 715, + "lr": 0.0001, + "sup_loss": 1.5158005977584161 + }, + { + "step": 716, + "lr": 0.0001, + "sup_loss": 1.5415213410054467 + }, + { + "step": 717, + "lr": 0.0001, + "sup_loss": 1.5010530300237304 + }, + { + "step": 718, + "lr": 0.0001, + "sup_loss": 1.510064641051084 + }, + { + "step": 719, + "lr": 0.0001, + "sup_loss": 1.488125871007282 + }, + { + "step": 720, + "lr": 0.0001, + "sup_loss": 1.5183722180715826 + }, + { + "step": 721, + "lr": 0.0001, + "sup_loss": 1.496824062230423 + }, + { + "step": 722, + "lr": 0.0001, + "sup_loss": 1.5280516945962228 + }, + { + "step": 723, + "lr": 0.0001, + "sup_loss": 1.528585838141417 + }, + { + "step": 724, + "lr": 0.0001, + "sup_loss": 1.5381686652232152 + }, + { + "step": 725, + "lr": 0.0001, + "sup_loss": 1.484148119050607 + }, + { + "step": 726, + "lr": 0.0001, + "sup_loss": 1.5143109927597054 + }, + { + "step": 727, + "lr": 0.0001, + "sup_loss": 1.511654948679459 + }, + { + "step": 728, + "lr": 0.0001, + "sup_loss": 1.532639600339649 + }, + { + "step": 729, + "lr": 0.0001, + "sup_loss": 1.5156672857928635 + }, + { + "step": 730, + "lr": 0.0001, + "sup_loss": 1.515736949131998 + }, + { + "step": 731, + "lr": 0.0001, + "sup_loss": 1.5381653918029696 + }, + { + "step": 732, + "lr": 0.0001, + "sup_loss": 1.4817686566279522 + }, + { + "step": 733, + "lr": 0.0001, + "sup_loss": 1.5350881711968158 + }, + { + "step": 734, + "lr": 0.0001, + "sup_loss": 1.5283042316217763 + }, + { + "step": 735, + "lr": 0.0001, + "sup_loss": 1.5119280295262 + }, + { + "step": 736, + "lr": 0.0001, + "sup_loss": 1.502075078675215 + }, + { + "step": 737, + "lr": 0.0001, + "sup_loss": 1.5380265645418765 + }, + { + "step": 738, + "lr": 0.0001, + "sup_loss": 1.5176291152544727 + }, + { + "step": 739, + "lr": 0.0001, + "sup_loss": 1.5477875553011247 + }, + { + "step": 740, + "lr": 0.0001, + "sup_loss": 1.527247302561592 + }, + { + "step": 741, + "lr": 0.0001, + "sup_loss": 1.494716759296525 + }, + { + "step": 742, + "lr": 0.0001, + "sup_loss": 1.5283759340142355 + }, + { + "step": 743, + "lr": 0.0001, + "sup_loss": 1.5006663614011584 + }, + { + "step": 744, + "lr": 0.0001, + "sup_loss": 1.5210244403816247 + }, + { + "step": 745, + "lr": 0.0001, + "sup_loss": 1.5236166901633954 + }, + { + "step": 746, + "lr": 0.0001, + "sup_loss": 1.5106387834402368 + }, + { + "step": 747, + "lr": 0.0001, + "sup_loss": 1.5039683432611493 + }, + { + "step": 748, + "lr": 0.0001, + "sup_loss": 1.5093944438836167 + }, + { + "step": 749, + "lr": 0.0001, + "sup_loss": 1.5085239521803453 + }, + { + "step": 750, + "lr": 0.0001, + "sup_loss": 1.5201361058761036, + "lyap1_mean": -6.414738655090332, + "lyap1_max": -6.371342658996582, + "lyap_spec_mean": [ + -6.414738655090332, + -6.415633678436279 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 751, + "lr": 0.0001, + "sup_loss": 1.5149260923412404 + }, + { + "step": 752, + "lr": 0.0001, + "sup_loss": 1.5341902670259726 + }, + { + "step": 753, + "lr": 0.0001, + "sup_loss": 1.5147095931145356 + }, + { + "step": 754, + "lr": 0.0001, + "sup_loss": 1.512407652155099 + }, + { + "step": 755, + "lr": 0.0001, + "sup_loss": 1.4839033381321656 + }, + { + "step": 756, + "lr": 0.0001, + "sup_loss": 1.5099877524355274 + }, + { + "step": 757, + "lr": 0.0001, + "sup_loss": 1.5043745540005147 + }, + { + "step": 758, + "lr": 0.0001, + "sup_loss": 1.5015430156634932 + }, + { + "step": 759, + "lr": 0.0001, + "sup_loss": 1.5379622323258437 + }, + { + "step": 760, + "lr": 0.0001, + "sup_loss": 1.5275275436239428 + }, + { + "step": 761, + "lr": 0.0001, + "sup_loss": 1.5204969609836256 + }, + { + "step": 762, + "lr": 0.0001, + "sup_loss": 1.5145343673588703 + }, + { + "step": 763, + "lr": 0.0001, + "sup_loss": 1.520435885713137 + }, + { + "step": 764, + "lr": 0.0001, + "sup_loss": 1.5362379995653999 + }, + { + "step": 765, + "lr": 0.0001, + "sup_loss": 1.53280429578943 + }, + { + "step": 766, + "lr": 0.0001, + "sup_loss": 1.5430548616331885 + }, + { + "step": 767, + "lr": 0.0001, + "sup_loss": 1.5005863037222265 + }, + { + "step": 768, + "lr": 0.0001, + "sup_loss": 1.5098696165419137 + }, + { + "step": 769, + "lr": 0.0001, + "sup_loss": 1.5180128099853307 + }, + { + "step": 770, + "lr": 0.0001, + "sup_loss": 1.5169463420755118 + }, + { + "step": 771, + "lr": 0.0001, + "sup_loss": 1.5186083043919298 + }, + { + "step": 772, + "lr": 0.0001, + "sup_loss": 1.5065191618683647 + }, + { + "step": 773, + "lr": 0.0001, + "sup_loss": 1.5100622769355176 + }, + { + "step": 774, + "lr": 0.0001, + "sup_loss": 1.5401475174904948 + }, + { + "step": 775, + "lr": 0.0001, + "sup_loss": 1.4939349933729393 + }, + { + "step": 776, + "lr": 0.0001, + "sup_loss": 1.5297048457366553 + }, + { + "step": 777, + "lr": 0.0001, + "sup_loss": 1.5339535742003936 + }, + { + "step": 778, + "lr": 0.0001, + "sup_loss": 1.5257886680044794 + }, + { + "step": 779, + "lr": 0.0001, + "sup_loss": 1.4923725842943585 + }, + { + "step": 780, + "lr": 0.0001, + "sup_loss": 1.5433369771805847 + }, + { + "step": 781, + "lr": 0.0001, + "sup_loss": 1.466749345377613 + }, + { + "step": 782, + "lr": 0.0001, + "sup_loss": 1.5100988400396453 + }, + { + "step": 783, + "lr": 0.0001, + "sup_loss": 1.4935840894307528 + }, + { + "step": 784, + "lr": 0.0001, + "sup_loss": 1.5288095409000635 + }, + { + "step": 785, + "lr": 0.0001, + "sup_loss": 1.5140798793701746 + }, + { + "step": 786, + "lr": 0.0001, + "sup_loss": 1.5038278893428967 + }, + { + "step": 787, + "lr": 0.0001, + "sup_loss": 1.4897931579635242 + }, + { + "step": 788, + "lr": 0.0001, + "sup_loss": 1.510297365347192 + }, + { + "step": 789, + "lr": 0.0001, + "sup_loss": 1.5096970725912824 + }, + { + "step": 790, + "lr": 0.0001, + "sup_loss": 1.4563140072803142 + }, + { + "step": 791, + "lr": 0.0001, + "sup_loss": 1.5235519797386794 + }, + { + "step": 792, + "lr": 0.0001, + "sup_loss": 1.554555089402592 + }, + { + "step": 793, + "lr": 0.0001, + "sup_loss": 1.5309656221122645 + }, + { + "step": 794, + "lr": 0.0001, + "sup_loss": 1.5196128662663126 + }, + { + "step": 795, + "lr": 0.0001, + "sup_loss": 1.4970403383711195 + }, + { + "step": 796, + "lr": 0.0001, + "sup_loss": 1.5065915402287726 + }, + { + "step": 797, + "lr": 0.0001, + "sup_loss": 1.4820203727089465 + }, + { + "step": 798, + "lr": 0.0001, + "sup_loss": 1.4932871722006784 + }, + { + "step": 799, + "lr": 0.0001, + "sup_loss": 1.5271928852253274 + }, + { + "step": 800, + "lr": 0.0001, + "sup_loss": 1.5189512274214638, + "lyap1_mean": -6.264998435974121, + "lyap1_max": -6.218259811401367, + "lyap_spec_mean": [ + -6.264998435974121, + -6.257623672485352 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 801, + "lr": 0.0001, + "sup_loss": 1.4794003888566465 + }, + { + "step": 802, + "lr": 0.0001, + "sup_loss": 1.5031133014922253 + }, + { + "step": 803, + "lr": 0.0001, + "sup_loss": 1.5162473584480844 + }, + { + "step": 804, + "lr": 0.0001, + "sup_loss": 1.5171519313246833 + }, + { + "step": 805, + "lr": 0.0001, + "sup_loss": 1.5373420552897847 + }, + { + "step": 806, + "lr": 0.0001, + "sup_loss": 1.5206732390764939 + }, + { + "step": 807, + "lr": 0.0001, + "sup_loss": 1.5203877668446022 + }, + { + "step": 808, + "lr": 0.0001, + "sup_loss": 1.492586592861398 + }, + { + "step": 809, + "lr": 0.0001, + "sup_loss": 1.5039200364706633 + }, + { + "step": 810, + "lr": 0.0001, + "sup_loss": 1.4772981905774536 + }, + { + "step": 811, + "lr": 0.0001, + "sup_loss": 1.5200133260780286 + }, + { + "step": 812, + "lr": 0.0001, + "sup_loss": 1.519401738989703 + }, + { + "step": 813, + "lr": 0.0001, + "sup_loss": 1.5258032459404562 + }, + { + "step": 814, + "lr": 0.0001, + "sup_loss": 1.5377889297754885 + }, + { + "step": 815, + "lr": 0.0001, + "sup_loss": 1.531876294284268 + }, + { + "step": 816, + "lr": 0.0001, + "sup_loss": 1.4890782502081874 + }, + { + "step": 817, + "lr": 0.0001, + "sup_loss": 1.5135778456497797 + }, + { + "step": 818, + "lr": 0.0001, + "sup_loss": 1.4895132250118437 + }, + { + "step": 819, + "lr": 0.0001, + "sup_loss": 1.5319449438162507 + }, + { + "step": 820, + "lr": 0.0001, + "sup_loss": 1.5478730438296613 + }, + { + "step": 821, + "lr": 0.0001, + "sup_loss": 1.5234296605953752 + }, + { + "step": 822, + "lr": 0.0001, + "sup_loss": 1.495539966481088 + }, + { + "step": 823, + "lr": 0.0001, + "sup_loss": 1.4938042542202827 + }, + { + "step": 824, + "lr": 0.0001, + "sup_loss": 1.53036073010849 + }, + { + "step": 825, + "lr": 0.0001, + "sup_loss": 1.5167469429239189 + }, + { + "step": 826, + "lr": 0.0001, + "sup_loss": 1.5224439671219032 + }, + { + "step": 827, + "lr": 0.0001, + "sup_loss": 1.502935736845863 + }, + { + "step": 828, + "lr": 0.0001, + "sup_loss": 1.4923314872325504 + }, + { + "step": 829, + "lr": 0.0001, + "sup_loss": 1.5220883963647738 + }, + { + "step": 830, + "lr": 0.0001, + "sup_loss": 1.5422434343850067 + }, + { + "step": 831, + "lr": 0.0001, + "sup_loss": 1.5321838505506373 + }, + { + "step": 832, + "lr": 0.0001, + "sup_loss": 1.5238810248029964 + }, + { + "step": 833, + "lr": 0.0001, + "sup_loss": 1.506038382435152 + }, + { + "step": 834, + "lr": 0.0001, + "sup_loss": 1.516271936441931 + }, + { + "step": 835, + "lr": 0.0001, + "sup_loss": 1.5141644084304013 + }, + { + "step": 836, + "lr": 0.0001, + "sup_loss": 1.5070632624785256 + }, + { + "step": 837, + "lr": 0.0001, + "sup_loss": 1.5189506063228038 + }, + { + "step": 838, + "lr": 0.0001, + "sup_loss": 1.5222628958504518 + }, + { + "step": 839, + "lr": 0.0001, + "sup_loss": 1.539600923636482 + }, + { + "step": 840, + "lr": 0.0001, + "sup_loss": 1.486930224177299 + }, + { + "step": 841, + "lr": 0.0001, + "sup_loss": 1.5096024197238151 + }, + { + "step": 842, + "lr": 0.0001, + "sup_loss": 1.543960076377166 + }, + { + "step": 843, + "lr": 0.0001, + "sup_loss": 1.5350423459292946 + }, + { + "step": 844, + "lr": 0.0001, + "sup_loss": 1.51947082585353 + }, + { + "step": 845, + "lr": 0.0001, + "sup_loss": 1.5000554733457534 + }, + { + "step": 846, + "lr": 0.0001, + "sup_loss": 1.4854979289135024 + }, + { + "step": 847, + "lr": 0.0001, + "sup_loss": 1.4528837087888837 + }, + { + "step": 848, + "lr": 0.0001, + "sup_loss": 1.4893767215001192 + }, + { + "step": 849, + "lr": 0.0001, + "sup_loss": 1.5116837147336943 + }, + { + "step": 850, + "lr": 0.0001, + "sup_loss": 1.5316996234308449, + "lyap1_mean": -5.944291114807129, + "lyap1_max": -5.860029220581055, + "lyap_spec_mean": [ + -5.944291591644287, + -5.9602861404418945 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 851, + "lr": 0.0001, + "sup_loss": 1.4662178118117368 + }, + { + "step": 852, + "lr": 0.0001, + "sup_loss": 1.496672346771726 + }, + { + "step": 853, + "lr": 0.0001, + "sup_loss": 1.4502686349180454 + }, + { + "step": 854, + "lr": 0.0001, + "sup_loss": 1.5101654747752313 + }, + { + "step": 855, + "lr": 0.0001, + "sup_loss": 1.5287651774536772 + }, + { + "step": 856, + "lr": 0.0001, + "sup_loss": 1.4887774476565343 + }, + { + "step": 857, + "lr": 0.0001, + "sup_loss": 1.5288190638328945 + }, + { + "step": 858, + "lr": 0.0001, + "sup_loss": 1.5331306595376184 + }, + { + "step": 859, + "lr": 0.0001, + "sup_loss": 1.5119847591823368 + }, + { + "step": 860, + "lr": 0.0001, + "sup_loss": 1.4939856897853803 + }, + { + "step": 861, + "lr": 0.0001, + "sup_loss": 1.5054032429188404 + }, + { + "step": 862, + "lr": 0.0001, + "sup_loss": 1.53178351688308 + }, + { + "step": 863, + "lr": 0.0001, + "sup_loss": 1.4975122146837385 + }, + { + "step": 864, + "lr": 0.0001, + "sup_loss": 1.529216706591692 + }, + { + "step": 865, + "lr": 0.0001, + "sup_loss": 1.4858391212571551 + }, + { + "step": 866, + "lr": 0.0001, + "sup_loss": 1.5067555131486585 + }, + { + "step": 867, + "lr": 0.0001, + "sup_loss": 1.4653182542665304 + }, + { + "step": 868, + "lr": 0.0001, + "sup_loss": 1.4939540326558096 + }, + { + "step": 869, + "lr": 0.0001, + "sup_loss": 1.5303096178101399 + }, + { + "step": 870, + "lr": 0.0001, + "sup_loss": 1.516176876584087 + }, + { + "step": 871, + "lr": 0.0001, + "sup_loss": 1.5219778344422048 + }, + { + "step": 872, + "lr": 0.0001, + "sup_loss": 1.4761000122837777 + }, + { + "step": 873, + "lr": 0.0001, + "sup_loss": 1.513006833405729 + }, + { + "step": 874, + "lr": 0.0001, + "sup_loss": 1.5485589422548636 + }, + { + "step": 875, + "lr": 0.0001, + "sup_loss": 1.485695503404262 + }, + { + "step": 876, + "lr": 0.0001, + "sup_loss": 1.4890143451204383 + }, + { + "step": 877, + "lr": 0.0001, + "sup_loss": 1.5175875346328733 + }, + { + "step": 878, + "lr": 0.0001, + "sup_loss": 1.5096965232152808 + }, + { + "step": 879, + "lr": 0.0001, + "sup_loss": 1.5517101778820277 + }, + { + "step": 880, + "lr": 0.0001, + "sup_loss": 1.5209166682770523 + }, + { + "step": 881, + "lr": 0.0001, + "sup_loss": 1.5005544662351054 + }, + { + "step": 882, + "lr": 0.0001, + "sup_loss": 1.4691439649158073 + }, + { + "step": 883, + "lr": 0.0001, + "sup_loss": 1.498696126804384 + }, + { + "step": 884, + "lr": 0.0001, + "sup_loss": 1.5087921193707006 + }, + { + "step": 885, + "lr": 0.0001, + "sup_loss": 1.5023848520057832 + }, + { + "step": 886, + "lr": 0.0001, + "sup_loss": 1.5195873920924312 + }, + { + "step": 887, + "lr": 0.0001, + "sup_loss": 1.5175733846833919 + }, + { + "step": 888, + "lr": 0.0001, + "sup_loss": 1.4952592414580277 + }, + { + "step": 889, + "lr": 0.0001, + "sup_loss": 1.496644038280453 + }, + { + "step": 890, + "lr": 0.0001, + "sup_loss": 1.5191835238179954 + }, + { + "step": 891, + "lr": 0.0001, + "sup_loss": 1.514809273108044 + }, + { + "step": 892, + "lr": 0.0001, + "sup_loss": 1.4885183127445003 + }, + { + "step": 893, + "lr": 0.0001, + "sup_loss": 1.5126444543487492 + }, + { + "step": 894, + "lr": 0.0001, + "sup_loss": 1.5258446574175875 + }, + { + "step": 895, + "lr": 0.0001, + "sup_loss": 1.4691680650241499 + }, + { + "step": 896, + "lr": 0.0001, + "sup_loss": 1.5170742203735634 + }, + { + "step": 897, + "lr": 0.0001, + "sup_loss": 1.4924416535416078 + }, + { + "step": 898, + "lr": 0.0001, + "sup_loss": 1.5363673680659329 + }, + { + "step": 899, + "lr": 0.0001, + "sup_loss": 1.536372445667659 + }, + { + "step": 900, + "lr": 0.0001, + "sup_loss": 1.4997625964904768, + "lyap1_mean": -5.652142524719238, + "lyap1_max": -5.594020366668701, + "lyap_spec_mean": [ + -5.652142524719238, + -5.6429667472839355 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 901, + "lr": 0.0001, + "sup_loss": 1.507499264067011 + }, + { + "step": 902, + "lr": 0.0001, + "sup_loss": 1.5167841992518767 + }, + { + "step": 903, + "lr": 0.0001, + "sup_loss": 1.467195067448345 + }, + { + "step": 904, + "lr": 0.0001, + "sup_loss": 1.5224154370923002 + }, + { + "step": 905, + "lr": 0.0001, + "sup_loss": 1.5317981279164885 + }, + { + "step": 906, + "lr": 0.0001, + "sup_loss": 1.5291796176202899 + }, + { + "step": 907, + "lr": 0.0001, + "sup_loss": 1.5141419183145175 + }, + { + "step": 908, + "lr": 0.0001, + "sup_loss": 1.486324712345776 + }, + { + "step": 909, + "lr": 0.0001, + "sup_loss": 1.4968871261056726 + }, + { + "step": 910, + "lr": 0.0001, + "sup_loss": 1.4828435875444144 + }, + { + "step": 911, + "lr": 0.0001, + "sup_loss": 1.4933315598782024 + }, + { + "step": 912, + "lr": 0.0001, + "sup_loss": 1.4868035687515695 + }, + { + "step": 913, + "lr": 0.0001, + "sup_loss": 1.5397040122960144 + }, + { + "step": 914, + "lr": 0.0001, + "sup_loss": 1.5136633188998756 + }, + { + "step": 915, + "lr": 0.0001, + "sup_loss": 1.4473687510979423 + }, + { + "step": 916, + "lr": 0.0001, + "sup_loss": 1.5355209072731966 + }, + { + "step": 917, + "lr": 0.0001, + "sup_loss": 1.463715133289889 + }, + { + "step": 918, + "lr": 0.0001, + "sup_loss": 1.524152528957749 + }, + { + "step": 919, + "lr": 0.0001, + "sup_loss": 1.5144340083338805 + }, + { + "step": 920, + "lr": 0.0001, + "sup_loss": 1.4936449496755215 + }, + { + "step": 921, + "lr": 0.0001, + "sup_loss": 1.551149291445036 + }, + { + "step": 922, + "lr": 0.0001, + "sup_loss": 1.475193621978353 + }, + { + "step": 923, + "lr": 0.0001, + "sup_loss": 1.5072904697448237 + }, + { + "step": 924, + "lr": 0.0001, + "sup_loss": 1.5034295320627853 + }, + { + "step": 925, + "lr": 0.0001, + "sup_loss": 1.4687878097170466 + }, + { + "step": 926, + "lr": 0.0001, + "sup_loss": 1.497240080963438 + }, + { + "step": 927, + "lr": 0.0001, + "sup_loss": 1.453493570747067 + }, + { + "step": 928, + "lr": 0.0001, + "sup_loss": 1.5272355249073333 + }, + { + "step": 929, + "lr": 0.0001, + "sup_loss": 1.4954138710957656 + }, + { + "step": 930, + "lr": 0.0001, + "sup_loss": 1.532836496771222 + }, + { + "step": 931, + "lr": 0.0001, + "sup_loss": 1.5243129092733296 + }, + { + "step": 932, + "lr": 0.0001, + "sup_loss": 1.5095965680375445 + }, + { + "step": 933, + "lr": 0.0001, + "sup_loss": 1.5081322883135968 + }, + { + "step": 934, + "lr": 0.0001, + "sup_loss": 1.5183249592661394 + }, + { + "step": 935, + "lr": 0.0001, + "sup_loss": 1.5093233281872678 + }, + { + "step": 936, + "lr": 0.0001, + "sup_loss": 1.509689676873144 + }, + { + "step": 937, + "lr": 0.0001, + "sup_loss": 1.5201641118624953 + }, + { + "step": 938, + "lr": 0.0001, + "sup_loss": 1.434867776183133 + }, + { + "step": 939, + "lr": 0.0001, + "sup_loss": 1.498886252878764 + }, + { + "step": 940, + "lr": 0.0001, + "sup_loss": 1.5011932339312852 + }, + { + "step": 941, + "lr": 0.0001, + "sup_loss": 1.5241598202101634 + }, + { + "step": 942, + "lr": 0.0001, + "sup_loss": 1.505778298169391 + }, + { + "step": 943, + "lr": 0.0001, + "sup_loss": 1.519579321968116 + }, + { + "step": 944, + "lr": 0.0001, + "sup_loss": 1.5079361183127624 + }, + { + "step": 945, + "lr": 0.0001, + "sup_loss": 1.5037163830793818 + }, + { + "step": 946, + "lr": 0.0001, + "sup_loss": 1.4689941026742286 + }, + { + "step": 947, + "lr": 0.0001, + "sup_loss": 1.5639875285326823 + }, + { + "step": 948, + "lr": 0.0001, + "sup_loss": 1.5392894189821704 + }, + { + "step": 949, + "lr": 0.0001, + "sup_loss": 1.5055158758145308 + }, + { + "step": 950, + "lr": 0.0001, + "sup_loss": 1.518267165943395, + "lyap1_mean": -5.448543548583984, + "lyap1_max": -5.374388694763184, + "lyap_spec_mean": [ + -5.448543548583984, + -5.42637825012207 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 951, + "lr": 0.0001, + "sup_loss": 1.4824273966788006 + }, + { + "step": 952, + "lr": 0.0001, + "sup_loss": 1.5323886198047028 + }, + { + "step": 953, + "lr": 0.0001, + "sup_loss": 1.5218753245403334 + }, + { + "step": 954, + "lr": 0.0001, + "sup_loss": 1.5106468562324793 + }, + { + "step": 955, + "lr": 0.0001, + "sup_loss": 1.4902749499708472 + }, + { + "step": 956, + "lr": 0.0001, + "sup_loss": 1.5072560962565862 + }, + { + "step": 957, + "lr": 0.0001, + "sup_loss": 1.5081858010439633 + }, + { + "step": 958, + "lr": 0.0001, + "sup_loss": 1.517830286889684 + }, + { + "step": 959, + "lr": 0.0001, + "sup_loss": 1.5292294570510734 + }, + { + "step": 960, + "lr": 0.0001, + "sup_loss": 1.5284211025221013 + }, + { + "step": 961, + "lr": 0.0001, + "sup_loss": 1.4869179701446167 + }, + { + "step": 962, + "lr": 0.0001, + "sup_loss": 1.5167554418953664 + }, + { + "step": 963, + "lr": 0.0001, + "sup_loss": 1.5022203921109658 + }, + { + "step": 964, + "lr": 0.0001, + "sup_loss": 1.5082677144024677 + }, + { + "step": 965, + "lr": 0.0001, + "sup_loss": 1.4737698001462685 + }, + { + "step": 966, + "lr": 0.0001, + "sup_loss": 1.5188335949555747 + }, + { + "step": 967, + "lr": 0.0001, + "sup_loss": 1.5274473696800166 + }, + { + "step": 968, + "lr": 0.0001, + "sup_loss": 1.514180666119481 + }, + { + "step": 969, + "lr": 0.0001, + "sup_loss": 1.5202551960844581 + }, + { + "step": 970, + "lr": 0.0001, + "sup_loss": 1.5143891182957718 + }, + { + "step": 971, + "lr": 0.0001, + "sup_loss": 1.504930148063865 + }, + { + "step": 972, + "lr": 0.0001, + "sup_loss": 1.501472242033977 + }, + { + "step": 973, + "lr": 0.0001, + "sup_loss": 1.5109138293566082 + }, + { + "step": 974, + "lr": 0.0001, + "sup_loss": 1.521006606339136 + }, + { + "step": 975, + "lr": 0.0001, + "sup_loss": 1.4974613446060685 + }, + { + "step": 976, + "lr": 0.0001, + "sup_loss": 1.5050752003953027 + }, + { + "step": 977, + "lr": 0.0001, + "sup_loss": 1.5218882284540702 + }, + { + "step": 978, + "lr": 0.0001, + "sup_loss": 1.54280851663081 + }, + { + "step": 979, + "lr": 0.0001, + "sup_loss": 1.4553324585789031 + }, + { + "step": 980, + "lr": 0.0001, + "sup_loss": 1.5596921913721995 + }, + { + "step": 981, + "lr": 0.0001, + "sup_loss": 1.4786041536384265 + }, + { + "step": 982, + "lr": 0.0001, + "sup_loss": 1.4971809060144017 + }, + { + "step": 983, + "lr": 0.0001, + "sup_loss": 1.5295897375459258 + }, + { + "step": 984, + "lr": 0.0001, + "sup_loss": 1.5299979674474378 + }, + { + "step": 985, + "lr": 0.0001, + "sup_loss": 1.456327333071445 + }, + { + "step": 986, + "lr": 0.0001, + "sup_loss": 1.5378813472825856 + }, + { + "step": 987, + "lr": 0.0001, + "sup_loss": 1.491217848090841 + }, + { + "step": 988, + "lr": 0.0001, + "sup_loss": 1.5116251776299399 + }, + { + "step": 989, + "lr": 0.0001, + "sup_loss": 1.474147240169287 + }, + { + "step": 990, + "lr": 0.0001, + "sup_loss": 1.4907397097415611 + }, + { + "step": 991, + "lr": 0.0001, + "sup_loss": 1.4992047493859184 + }, + { + "step": 992, + "lr": 0.0001, + "sup_loss": 1.4937366874149411 + }, + { + "step": 993, + "lr": 0.0001, + "sup_loss": 1.4772547257883624 + }, + { + "step": 994, + "lr": 0.0001, + "sup_loss": 1.4871965155865938 + }, + { + "step": 995, + "lr": 0.0001, + "sup_loss": 1.547781844799699 + }, + { + "step": 996, + "lr": 0.0001, + "sup_loss": 1.4729891039962326 + }, + { + "step": 997, + "lr": 0.0001, + "sup_loss": 1.4800220846633856 + }, + { + "step": 998, + "lr": 0.0001, + "sup_loss": 1.5138811195467095 + }, + { + "step": 999, + "lr": 0.0001, + "sup_loss": 1.4800734524212662 + }, + { + "step": 1000, + "lr": 0.0001, + "sup_loss": 1.4862077489691015, + "lyap1_mean": -5.086652755737305, + "lyap1_max": -5.010838508605957, + "lyap_spec_mean": [ + -5.086652755737305, + -5.101796627044678 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1001, + "lr": 0.0001, + "sup_loss": 1.491190142941294 + }, + { + "step": 1002, + "lr": 0.0001, + "sup_loss": 1.5338051140823528 + }, + { + "step": 1003, + "lr": 0.0001, + "sup_loss": 1.453768709725902 + }, + { + "step": 1004, + "lr": 0.0001, + "sup_loss": 1.4950069717523864 + }, + { + "step": 1005, + "lr": 0.0001, + "sup_loss": 1.4921631368346473 + }, + { + "step": 1006, + "lr": 0.0001, + "sup_loss": 1.5298900700411682 + }, + { + "step": 1007, + "lr": 0.0001, + "sup_loss": 1.4875213403635956 + }, + { + "step": 1008, + "lr": 0.0001, + "sup_loss": 1.4811045802261913 + }, + { + "step": 1009, + "lr": 0.0001, + "sup_loss": 1.5250101180805309 + }, + { + "step": 1010, + "lr": 0.0001, + "sup_loss": 1.5033493294925102 + }, + { + "step": 1011, + "lr": 0.0001, + "sup_loss": 1.5128267721867283 + }, + { + "step": 1012, + "lr": 0.0001, + "sup_loss": 1.4947289338333436 + }, + { + "step": 1013, + "lr": 0.0001, + "sup_loss": 1.4925525129247341 + }, + { + "step": 1014, + "lr": 0.0001, + "sup_loss": 1.4996314639909738 + }, + { + "step": 1015, + "lr": 0.0001, + "sup_loss": 1.492959768370052 + }, + { + "step": 1016, + "lr": 0.0001, + "sup_loss": 1.5091553965420217 + }, + { + "step": 1017, + "lr": 0.0001, + "sup_loss": 1.523810286677666 + }, + { + "step": 1018, + "lr": 0.0001, + "sup_loss": 1.483313092948707 + }, + { + "step": 1019, + "lr": 0.0001, + "sup_loss": 1.5150976478960907 + }, + { + "step": 1020, + "lr": 0.0001, + "sup_loss": 1.475309542182523 + }, + { + "step": 1021, + "lr": 0.0001, + "sup_loss": 1.500195908589669 + }, + { + "step": 1022, + "lr": 0.0001, + "sup_loss": 1.4945939977510452 + }, + { + "step": 1023, + "lr": 0.0001, + "sup_loss": 1.4250120115069176 + }, + { + "step": 1024, + "lr": 0.0001, + "sup_loss": 1.4918787542757612 + }, + { + "step": 1025, + "lr": 0.0001, + "sup_loss": 1.4991129912983376 + }, + { + "step": 1026, + "lr": 0.0001, + "sup_loss": 1.492617385449115 + }, + { + "step": 1027, + "lr": 0.0001, + "sup_loss": 1.5160458983109 + }, + { + "step": 1028, + "lr": 0.0001, + "sup_loss": 1.4941115004329355 + }, + { + "step": 1029, + "lr": 0.0001, + "sup_loss": 1.496451859922323 + }, + { + "step": 1030, + "lr": 0.0001, + "sup_loss": 1.4951053205183618 + }, + { + "step": 1031, + "lr": 0.0001, + "sup_loss": 1.4938560898638187 + }, + { + "step": 1032, + "lr": 0.0001, + "sup_loss": 1.4774130117810043 + }, + { + "step": 1033, + "lr": 0.0001, + "sup_loss": 1.5095015063091686 + }, + { + "step": 1034, + "lr": 0.0001, + "sup_loss": 1.5070117186327943 + }, + { + "step": 1035, + "lr": 0.0001, + "sup_loss": 1.4768238123518271 + }, + { + "step": 1036, + "lr": 0.0001, + "sup_loss": 1.476529080284256 + }, + { + "step": 1037, + "lr": 0.0001, + "sup_loss": 1.494776177357163 + }, + { + "step": 1038, + "lr": 0.0001, + "sup_loss": 1.5404689844677595 + }, + { + "step": 1039, + "lr": 0.0001, + "sup_loss": 1.5116941628376854 + }, + { + "step": 1040, + "lr": 0.0001, + "sup_loss": 1.4807356397180933 + }, + { + "step": 1041, + "lr": 0.0001, + "sup_loss": 1.498028787710557 + }, + { + "step": 1042, + "lr": 0.0001, + "sup_loss": 1.4754103357553532 + }, + { + "step": 1043, + "lr": 0.0001, + "sup_loss": 1.5053698487795235 + }, + { + "step": 1044, + "lr": 0.0001, + "sup_loss": 1.5296728770508057 + }, + { + "step": 1045, + "lr": 0.0001, + "sup_loss": 1.5086255065998997 + }, + { + "step": 1046, + "lr": 0.0001, + "sup_loss": 1.4916575167454593 + }, + { + "step": 1047, + "lr": 0.0001, + "sup_loss": 1.4932460950040167 + }, + { + "step": 1048, + "lr": 0.0001, + "sup_loss": 1.5022055807348158 + }, + { + "step": 1049, + "lr": 0.0001, + "sup_loss": 1.4846387059737416 + }, + { + "step": 1050, + "lr": 0.0001, + "sup_loss": 1.4583279112164924, + "lyap1_mean": -4.985368251800537, + "lyap1_max": -4.9488115310668945, + "lyap_spec_mean": [ + -4.985368251800537, + -5.0083394050598145 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1051, + "lr": 0.0001, + "sup_loss": 1.5079011066488937 + }, + { + "step": 1052, + "lr": 0.0001, + "sup_loss": 1.5007515289918683 + }, + { + "step": 1053, + "lr": 0.0001, + "sup_loss": 1.534958945999648 + }, + { + "step": 1054, + "lr": 0.0001, + "sup_loss": 1.4963135998085617 + }, + { + "step": 1055, + "lr": 0.0001, + "sup_loss": 1.4930120557032553 + }, + { + "step": 1056, + "lr": 0.0001, + "sup_loss": 1.512046037240657 + }, + { + "step": 1057, + "lr": 0.0001, + "sup_loss": 1.5058272313052585 + }, + { + "step": 1058, + "lr": 0.0001, + "sup_loss": 1.481208655387988 + }, + { + "step": 1059, + "lr": 0.0001, + "sup_loss": 1.5237169406639706 + }, + { + "step": 1060, + "lr": 0.0001, + "sup_loss": 1.5153822571783004 + }, + { + "step": 1061, + "lr": 0.0001, + "sup_loss": 1.477981358976096 + }, + { + "step": 1062, + "lr": 0.0001, + "sup_loss": 1.493432202847611 + }, + { + "step": 1063, + "lr": 0.0001, + "sup_loss": 1.4770211775614548 + }, + { + "step": 1064, + "lr": 0.0001, + "sup_loss": 1.4696978837949088 + }, + { + "step": 1065, + "lr": 0.0001, + "sup_loss": 1.4943629670203953 + }, + { + "step": 1066, + "lr": 0.0001, + "sup_loss": 1.49505678933645 + }, + { + "step": 1067, + "lr": 0.0001, + "sup_loss": 1.495147677887443 + }, + { + "step": 1068, + "lr": 0.0001, + "sup_loss": 1.4844499947850718 + }, + { + "step": 1069, + "lr": 0.0001, + "sup_loss": 1.4543617916322704 + }, + { + "step": 1070, + "lr": 0.0001, + "sup_loss": 1.5000871222605547 + }, + { + "step": 1071, + "lr": 0.0001, + "sup_loss": 1.502567682384034 + }, + { + "step": 1072, + "lr": 0.0001, + "sup_loss": 1.488314001438542 + }, + { + "step": 1073, + "lr": 0.0001, + "sup_loss": 1.488304624795962 + }, + { + "step": 1074, + "lr": 0.0001, + "sup_loss": 1.494284739754046 + }, + { + "step": 1075, + "lr": 0.0001, + "sup_loss": 1.4727882874363902 + }, + { + "step": 1076, + "lr": 0.0001, + "sup_loss": 1.511807986328729 + }, + { + "step": 1077, + "lr": 0.0001, + "sup_loss": 1.4870160784126345 + }, + { + "step": 1078, + "lr": 0.0001, + "sup_loss": 1.4837270015578496 + }, + { + "step": 1079, + "lr": 0.0001, + "sup_loss": 1.4875352025713129 + }, + { + "step": 1080, + "lr": 0.0001, + "sup_loss": 1.5024935978982221 + }, + { + "step": 1081, + "lr": 0.0001, + "sup_loss": 1.4819859872644976 + }, + { + "step": 1082, + "lr": 0.0001, + "sup_loss": 1.5043254784455866 + }, + { + "step": 1083, + "lr": 0.0001, + "sup_loss": 1.4645798123131413 + }, + { + "step": 1084, + "lr": 0.0001, + "sup_loss": 1.4915036619135214 + }, + { + "step": 1085, + "lr": 0.0001, + "sup_loss": 1.5015138581699607 + }, + { + "step": 1086, + "lr": 0.0001, + "sup_loss": 1.4957361692727997 + }, + { + "step": 1087, + "lr": 0.0001, + "sup_loss": 1.5477593496318298 + }, + { + "step": 1088, + "lr": 0.0001, + "sup_loss": 1.5067801216534944 + }, + { + "step": 1089, + "lr": 0.0001, + "sup_loss": 1.4995562337114996 + }, + { + "step": 1090, + "lr": 0.0001, + "sup_loss": 1.4612830614799217 + }, + { + "step": 1091, + "lr": 0.0001, + "sup_loss": 1.4742256666063809 + }, + { + "step": 1092, + "lr": 0.0001, + "sup_loss": 1.4724958822731793 + }, + { + "step": 1093, + "lr": 0.0001, + "sup_loss": 1.4612374227310905 + }, + { + "step": 1094, + "lr": 0.0001, + "sup_loss": 1.4505542347531546 + }, + { + "step": 1095, + "lr": 0.0001, + "sup_loss": 1.5367597739936414 + }, + { + "step": 1096, + "lr": 0.0001, + "sup_loss": 1.4695740675730589 + }, + { + "step": 1097, + "lr": 0.0001, + "sup_loss": 1.5071457895270661 + }, + { + "step": 1098, + "lr": 0.0001, + "sup_loss": 1.5137372385953056 + }, + { + "step": 1099, + "lr": 0.0001, + "sup_loss": 1.5164012149254902 + }, + { + "step": 1100, + "lr": 0.0001, + "sup_loss": 1.5074580993196545, + "lyap1_mean": -5.024776458740234, + "lyap1_max": -4.9834303855896, + "lyap_spec_mean": [ + -5.024776458740234, + -5.0234293937683105 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1101, + "lr": 0.0001, + "sup_loss": 1.4893846557301706 + }, + { + "step": 1102, + "lr": 0.0001, + "sup_loss": 1.4810311211838452 + }, + { + "step": 1103, + "lr": 0.0001, + "sup_loss": 1.4881358655681456 + }, + { + "step": 1104, + "lr": 0.0001, + "sup_loss": 1.4940961901532852 + }, + { + "step": 1105, + "lr": 0.0001, + "sup_loss": 1.4753095966685144 + }, + { + "step": 1106, + "lr": 0.0001, + "sup_loss": 1.5248464308547973 + }, + { + "step": 1107, + "lr": 0.0001, + "sup_loss": 1.5093487508039454 + }, + { + "step": 1108, + "lr": 0.0001, + "sup_loss": 1.5367204201189768 + }, + { + "step": 1109, + "lr": 0.0001, + "sup_loss": 1.4951622618093052 + }, + { + "step": 1110, + "lr": 0.0001, + "sup_loss": 1.4954845796681129 + }, + { + "step": 1111, + "lr": 0.0001, + "sup_loss": 1.4917942853885013 + }, + { + "step": 1112, + "lr": 0.0001, + "sup_loss": 1.4262000511817663 + }, + { + "step": 1113, + "lr": 0.0001, + "sup_loss": 1.5043712728927912 + }, + { + "step": 1114, + "lr": 0.0001, + "sup_loss": 1.484278422177646 + }, + { + "step": 1115, + "lr": 0.0001, + "sup_loss": 1.5219035891965151 + }, + { + "step": 1116, + "lr": 0.0001, + "sup_loss": 1.4950970748440897 + }, + { + "step": 1117, + "lr": 0.0001, + "sup_loss": 1.4943428543594146 + }, + { + "step": 1118, + "lr": 0.0001, + "sup_loss": 1.4871422744331029 + }, + { + "step": 1119, + "lr": 0.0001, + "sup_loss": 1.505205653908111 + }, + { + "step": 1120, + "lr": 0.0001, + "sup_loss": 1.4624580150776079 + }, + { + "step": 1121, + "lr": 0.0001, + "sup_loss": 1.478784498763538 + }, + { + "step": 1122, + "lr": 0.0001, + "sup_loss": 1.4715949945354443 + }, + { + "step": 1123, + "lr": 0.0001, + "sup_loss": 1.4957458644069577 + }, + { + "step": 1124, + "lr": 0.0001, + "sup_loss": 1.4827836699290668 + }, + { + "step": 1125, + "lr": 0.0001, + "sup_loss": 1.4743856874260983 + }, + { + "step": 1126, + "lr": 0.0001, + "sup_loss": 1.471642222687305 + }, + { + "step": 1127, + "lr": 0.0001, + "sup_loss": 1.4942140766445238 + }, + { + "step": 1128, + "lr": 0.0001, + "sup_loss": 1.4964272399481269 + }, + { + "step": 1129, + "lr": 0.0001, + "sup_loss": 1.489714735807674 + }, + { + "step": 1130, + "lr": 0.0001, + "sup_loss": 1.4795423848023082 + }, + { + "step": 1131, + "lr": 0.0001, + "sup_loss": 1.5017106888127012 + }, + { + "step": 1132, + "lr": 0.0001, + "sup_loss": 1.4714448207224442 + }, + { + "step": 1133, + "lr": 0.0001, + "sup_loss": 1.4626417158867993 + }, + { + "step": 1134, + "lr": 0.0001, + "sup_loss": 1.4491329459413975 + }, + { + "step": 1135, + "lr": 0.0001, + "sup_loss": 1.4617050052344747 + }, + { + "step": 1136, + "lr": 0.0001, + "sup_loss": 1.4998257578902021 + }, + { + "step": 1137, + "lr": 0.0001, + "sup_loss": 1.4998440252848666 + }, + { + "step": 1138, + "lr": 0.0001, + "sup_loss": 1.4846175202054472 + }, + { + "step": 1139, + "lr": 0.0001, + "sup_loss": 1.4718216224866734 + }, + { + "step": 1140, + "lr": 0.0001, + "sup_loss": 1.5081759004682445 + }, + { + "step": 1141, + "lr": 0.0001, + "sup_loss": 1.4969522608011026 + }, + { + "step": 1142, + "lr": 0.0001, + "sup_loss": 1.4690122810816595 + }, + { + "step": 1143, + "lr": 0.0001, + "sup_loss": 1.4955443523100043 + }, + { + "step": 1144, + "lr": 0.0001, + "sup_loss": 1.4927066398111735 + }, + { + "step": 1145, + "lr": 0.0001, + "sup_loss": 1.4725230333581665 + }, + { + "step": 1146, + "lr": 0.0001, + "sup_loss": 1.4701228574799046 + }, + { + "step": 1147, + "lr": 0.0001, + "sup_loss": 1.4780445694839879 + }, + { + "step": 1148, + "lr": 0.0001, + "sup_loss": 1.4693462829861517 + }, + { + "step": 1149, + "lr": 0.0001, + "sup_loss": 1.5100726072285409 + }, + { + "step": 1150, + "lr": 0.0001, + "sup_loss": 1.480903277719113, + "lyap1_mean": -4.9455885887146, + "lyap1_max": -4.850168228149414, + "lyap_spec_mean": [ + -4.9455885887146, + -4.929689407348633 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1151, + "lr": 0.0001, + "sup_loss": 1.4507184038835605 + }, + { + "step": 1152, + "lr": 0.0001, + "sup_loss": 1.502905456958449 + }, + { + "step": 1153, + "lr": 0.0001, + "sup_loss": 1.4731891271094737 + }, + { + "step": 1154, + "lr": 0.0001, + "sup_loss": 1.5114034459966716 + }, + { + "step": 1155, + "lr": 0.0001, + "sup_loss": 1.4851636061949942 + }, + { + "step": 1156, + "lr": 0.0001, + "sup_loss": 1.5153188881042223 + }, + { + "step": 1157, + "lr": 0.0001, + "sup_loss": 1.53415556606997 + }, + { + "step": 1158, + "lr": 0.0001, + "sup_loss": 1.4927789967832232 + }, + { + "step": 1159, + "lr": 0.0001, + "sup_loss": 1.4748716306954783 + }, + { + "step": 1160, + "lr": 0.0001, + "sup_loss": 1.5037391789734031 + }, + { + "step": 1161, + "lr": 0.0001, + "sup_loss": 1.4909826070120134 + }, + { + "step": 1162, + "lr": 0.0001, + "sup_loss": 1.4882852503259236 + }, + { + "step": 1163, + "lr": 0.0001, + "sup_loss": 1.476347461882321 + }, + { + "step": 1164, + "lr": 0.0001, + "sup_loss": 1.500105285760655 + }, + { + "step": 1165, + "lr": 0.0001, + "sup_loss": 1.459256843071044 + }, + { + "step": 1166, + "lr": 0.0001, + "sup_loss": 1.4948051454166613 + }, + { + "step": 1167, + "lr": 0.0001, + "sup_loss": 1.4487606220173448 + }, + { + "step": 1168, + "lr": 0.0001, + "sup_loss": 1.4773466283799845 + }, + { + "step": 1169, + "lr": 0.0001, + "sup_loss": 1.4666078537550662 + }, + { + "step": 1170, + "lr": 0.0001, + "sup_loss": 1.4845902155303574 + }, + { + "step": 1171, + "lr": 0.0001, + "sup_loss": 1.4556790254014549 + }, + { + "step": 1172, + "lr": 0.0001, + "sup_loss": 1.4850841195107127 + }, + { + "step": 1173, + "lr": 0.0001, + "sup_loss": 1.5182421729232407 + }, + { + "step": 1174, + "lr": 0.0001, + "sup_loss": 1.476394210737806 + }, + { + "step": 1175, + "lr": 0.0001, + "sup_loss": 1.4821449618214182 + }, + { + "step": 1176, + "lr": 0.0001, + "sup_loss": 1.5024094032370132 + }, + { + "step": 1177, + "lr": 0.0001, + "sup_loss": 1.5022114492114407 + }, + { + "step": 1178, + "lr": 0.0001, + "sup_loss": 1.4621831924589657 + }, + { + "step": 1179, + "lr": 0.0001, + "sup_loss": 1.4811286199451543 + }, + { + "step": 1180, + "lr": 0.0001, + "sup_loss": 1.4578099169211276 + }, + { + "step": 1181, + "lr": 0.0001, + "sup_loss": 1.456907282548804 + }, + { + "step": 1182, + "lr": 0.0001, + "sup_loss": 1.4740023330832586 + }, + { + "step": 1183, + "lr": 0.0001, + "sup_loss": 1.4920635428211853 + }, + { + "step": 1184, + "lr": 0.0001, + "sup_loss": 1.5024858560779706 + }, + { + "step": 1185, + "lr": 0.0001, + "sup_loss": 1.5038948187638805 + }, + { + "step": 1186, + "lr": 0.0001, + "sup_loss": 1.4676908572162626 + }, + { + "step": 1187, + "lr": 0.0001, + "sup_loss": 1.488254510144115 + }, + { + "step": 1188, + "lr": 0.0001, + "sup_loss": 1.4263772574814746 + }, + { + "step": 1189, + "lr": 0.0001, + "sup_loss": 1.4676478384293827 + }, + { + "step": 1190, + "lr": 0.0001, + "sup_loss": 1.479789183280758 + }, + { + "step": 1191, + "lr": 0.0001, + "sup_loss": 1.4711267705653097 + }, + { + "step": 1192, + "lr": 0.0001, + "sup_loss": 1.4949702804907512 + }, + { + "step": 1193, + "lr": 0.0001, + "sup_loss": 1.513007655717897 + }, + { + "step": 1194, + "lr": 0.0001, + "sup_loss": 1.4509345633164903 + }, + { + "step": 1195, + "lr": 0.0001, + "sup_loss": 1.4685760250534123 + }, + { + "step": 1196, + "lr": 0.0001, + "sup_loss": 1.480123566158829 + }, + { + "step": 1197, + "lr": 0.0001, + "sup_loss": 1.418077246836308 + }, + { + "step": 1198, + "lr": 0.0001, + "sup_loss": 1.490917450497209 + }, + { + "step": 1199, + "lr": 0.0001, + "sup_loss": 1.5074936351952484 + }, + { + "step": 1200, + "lr": 0.0001, + "sup_loss": 1.4327738160400691, + "lyap1_mean": -4.980145454406738, + "lyap1_max": -4.894585132598877, + "lyap_spec_mean": [ + -4.980145454406738, + -4.967625617980957 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1201, + "lr": 0.0001, + "sup_loss": 1.4895817401426568 + }, + { + "step": 1202, + "lr": 0.0001, + "sup_loss": 1.4899703577499632 + }, + { + "step": 1203, + "lr": 0.0001, + "sup_loss": 1.4024933659507388 + }, + { + "step": 1204, + "lr": 0.0001, + "sup_loss": 1.5022236802821578 + }, + { + "step": 1205, + "lr": 0.0001, + "sup_loss": 1.4897626062931595 + }, + { + "step": 1206, + "lr": 0.0001, + "sup_loss": 1.5044620497089163 + }, + { + "step": 1207, + "lr": 0.0001, + "sup_loss": 1.4643798769968492 + }, + { + "step": 1208, + "lr": 0.0001, + "sup_loss": 1.4491454287091519 + }, + { + "step": 1209, + "lr": 0.0001, + "sup_loss": 1.4219676043723457 + }, + { + "step": 1210, + "lr": 0.0001, + "sup_loss": 1.4867866233353422 + }, + { + "step": 1211, + "lr": 0.0001, + "sup_loss": 1.4936261592840825 + }, + { + "step": 1212, + "lr": 0.0001, + "sup_loss": 1.493904028254398 + }, + { + "step": 1213, + "lr": 0.0001, + "sup_loss": 1.4308489828733526 + }, + { + "step": 1214, + "lr": 0.0001, + "sup_loss": 1.4677810549300505 + }, + { + "step": 1215, + "lr": 0.0001, + "sup_loss": 1.3859949750578329 + }, + { + "step": 1216, + "lr": 0.0001, + "sup_loss": 1.4505806596494422 + }, + { + "step": 1217, + "lr": 0.0001, + "sup_loss": 1.4778397790206566 + }, + { + "step": 1218, + "lr": 0.0001, + "sup_loss": 1.440790028206717 + }, + { + "step": 1219, + "lr": 0.0001, + "sup_loss": 1.4726534285928603 + }, + { + "step": 1220, + "lr": 0.0001, + "sup_loss": 1.4858277006105567 + }, + { + "step": 1221, + "lr": 0.0001, + "sup_loss": 1.4535088818412911 + }, + { + "step": 1222, + "lr": 0.0001, + "sup_loss": 1.4167877079974307 + }, + { + "step": 1223, + "lr": 0.0001, + "sup_loss": 1.4929940592460411 + }, + { + "step": 1224, + "lr": 0.0001, + "sup_loss": 1.5014798452852063 + }, + { + "step": 1225, + "lr": 0.0001, + "sup_loss": 1.4830507593952498 + }, + { + "step": 1226, + "lr": 0.0001, + "sup_loss": 1.4618634931835748 + }, + { + "step": 1227, + "lr": 0.0001, + "sup_loss": 1.5204372552744971 + }, + { + "step": 1228, + "lr": 0.0001, + "sup_loss": 1.4946793222065595 + }, + { + "step": 1229, + "lr": 0.0001, + "sup_loss": 1.483768416583394 + }, + { + "step": 1230, + "lr": 0.0001, + "sup_loss": 1.4740270725400642 + }, + { + "step": 1231, + "lr": 0.0001, + "sup_loss": 1.48345038920087 + }, + { + "step": 1232, + "lr": 0.0001, + "sup_loss": 1.4616529497138675 + }, + { + "step": 1233, + "lr": 0.0001, + "sup_loss": 1.4716175096811974 + }, + { + "step": 1234, + "lr": 0.0001, + "sup_loss": 1.457797067601594 + }, + { + "step": 1235, + "lr": 0.0001, + "sup_loss": 1.4874279390796319 + }, + { + "step": 1236, + "lr": 0.0001, + "sup_loss": 1.4636663214377188 + }, + { + "step": 1237, + "lr": 0.0001, + "sup_loss": 1.4504392628921492 + }, + { + "step": 1238, + "lr": 0.0001, + "sup_loss": 1.4170344608345224 + }, + { + "step": 1239, + "lr": 0.0001, + "sup_loss": 1.4893669447572702 + }, + { + "step": 1240, + "lr": 0.0001, + "sup_loss": 1.480716245937543 + }, + { + "step": 1241, + "lr": 0.0001, + "sup_loss": 1.4729937213032487 + }, + { + "step": 1242, + "lr": 0.0001, + "sup_loss": 1.4760532853462685 + }, + { + "step": 1243, + "lr": 0.0001, + "sup_loss": 1.4538871862153457 + }, + { + "step": 1244, + "lr": 0.0001, + "sup_loss": 1.4691397369385832 + }, + { + "step": 1245, + "lr": 0.0001, + "sup_loss": 1.4865630523389617 + }, + { + "step": 1246, + "lr": 0.0001, + "sup_loss": 1.4951783866766888 + }, + { + "step": 1247, + "lr": 0.0001, + "sup_loss": 1.4658872212811977 + }, + { + "step": 1248, + "lr": 0.0001, + "sup_loss": 1.4641396308973371 + }, + { + "step": 1249, + "lr": 0.0001, + "sup_loss": 1.4879901314026 + }, + { + "step": 1250, + "lr": 0.0001, + "sup_loss": 1.4687423186118318, + "lyap1_mean": -4.899415969848633, + "lyap1_max": -4.83683443069458, + "lyap_spec_mean": [ + -4.899415969848633, + -4.950930595397949 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1251, + "lr": 0.0001, + "sup_loss": 1.4469572613294321 + }, + { + "step": 1252, + "lr": 0.0001, + "sup_loss": 1.4581797062249233 + }, + { + "step": 1253, + "lr": 0.0001, + "sup_loss": 1.5087221661119814 + }, + { + "step": 1254, + "lr": 0.0001, + "sup_loss": 1.4923270443285557 + }, + { + "step": 1255, + "lr": 0.0001, + "sup_loss": 1.4851067653432983 + }, + { + "step": 1256, + "lr": 0.0001, + "sup_loss": 1.4747097076636708 + }, + { + "step": 1257, + "lr": 0.0001, + "sup_loss": 1.4987728722195657 + }, + { + "step": 1258, + "lr": 0.0001, + "sup_loss": 1.4576866146886762 + }, + { + "step": 1259, + "lr": 0.0001, + "sup_loss": 1.4267594419820548 + }, + { + "step": 1260, + "lr": 0.0001, + "sup_loss": 1.4895388922870765 + }, + { + "step": 1261, + "lr": 0.0001, + "sup_loss": 1.445517901260158 + }, + { + "step": 1262, + "lr": 0.0001, + "sup_loss": 1.473762977426208 + }, + { + "step": 1263, + "lr": 0.0001, + "sup_loss": 1.4828652079630404 + }, + { + "step": 1264, + "lr": 0.0001, + "sup_loss": 1.4991972207034314 + }, + { + "step": 1265, + "lr": 0.0001, + "sup_loss": 1.4757345657404937 + }, + { + "step": 1266, + "lr": 0.0001, + "sup_loss": 1.5010977982497316 + }, + { + "step": 1267, + "lr": 0.0001, + "sup_loss": 1.402599351061424 + }, + { + "step": 1268, + "lr": 0.0001, + "sup_loss": 1.4980575569678551 + }, + { + "step": 1269, + "lr": 0.0001, + "sup_loss": 1.478188775275099 + }, + { + "step": 1270, + "lr": 0.0001, + "sup_loss": 1.5047462702079168 + }, + { + "step": 1271, + "lr": 0.0001, + "sup_loss": 1.453459764416147 + }, + { + "step": 1272, + "lr": 0.0001, + "sup_loss": 1.5077941281889407 + }, + { + "step": 1273, + "lr": 0.0001, + "sup_loss": 1.466721068102257 + }, + { + "step": 1274, + "lr": 0.0001, + "sup_loss": 1.4807916531840593 + }, + { + "step": 1275, + "lr": 0.0001, + "sup_loss": 1.449105744514764 + }, + { + "step": 1276, + "lr": 0.0001, + "sup_loss": 1.458009877940556 + }, + { + "step": 1277, + "lr": 0.0001, + "sup_loss": 1.4687265051814076 + }, + { + "step": 1278, + "lr": 0.0001, + "sup_loss": 1.4424690589508722 + }, + { + "step": 1279, + "lr": 0.0001, + "sup_loss": 1.4693114190501486 + }, + { + "step": 1280, + "lr": 0.0001, + "sup_loss": 1.4626151462849437 + }, + { + "step": 1281, + "lr": 0.0001, + "sup_loss": 1.4482632170416359 + }, + { + "step": 1282, + "lr": 0.0001, + "sup_loss": 1.4939683942967972 + }, + { + "step": 1283, + "lr": 0.0001, + "sup_loss": 1.4078564557088988 + }, + { + "step": 1284, + "lr": 0.0001, + "sup_loss": 1.4524933791214658 + }, + { + "step": 1285, + "lr": 0.0001, + "sup_loss": 1.4720895571225794 + }, + { + "step": 1286, + "lr": 0.0001, + "sup_loss": 1.4614224348921792 + }, + { + "step": 1287, + "lr": 0.0001, + "sup_loss": 1.4823583720322766 + }, + { + "step": 1288, + "lr": 0.0001, + "sup_loss": 1.5181830741607076 + }, + { + "step": 1289, + "lr": 0.0001, + "sup_loss": 1.5083781865830375 + }, + { + "step": 1290, + "lr": 0.0001, + "sup_loss": 1.4343800580328865 + }, + { + "step": 1291, + "lr": 0.0001, + "sup_loss": 1.501462498856272 + }, + { + "step": 1292, + "lr": 0.0001, + "sup_loss": 1.49106349856551 + }, + { + "step": 1293, + "lr": 0.0001, + "sup_loss": 1.4634810108772984 + }, + { + "step": 1294, + "lr": 0.0001, + "sup_loss": 1.445329403388514 + }, + { + "step": 1295, + "lr": 0.0001, + "sup_loss": 1.491845201882513 + }, + { + "step": 1296, + "lr": 0.0001, + "sup_loss": 1.4986002971463355 + }, + { + "step": 1297, + "lr": 0.0001, + "sup_loss": 1.456849861066217 + }, + { + "step": 1298, + "lr": 0.0001, + "sup_loss": 1.5209139225170956 + }, + { + "step": 1299, + "lr": 0.0001, + "sup_loss": 1.4783268760775852 + }, + { + "step": 1300, + "lr": 0.0001, + "sup_loss": 1.4505696377645774, + "lyap1_mean": -4.9549150466918945, + "lyap1_max": -4.879584789276123, + "lyap_spec_mean": [ + -4.9549150466918945, + -4.968719482421875 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1301, + "lr": 0.0001, + "sup_loss": 1.460384983999942 + }, + { + "step": 1302, + "lr": 0.0001, + "sup_loss": 1.517781936006953 + }, + { + "step": 1303, + "lr": 0.0001, + "sup_loss": 1.4854018418932051 + }, + { + "step": 1304, + "lr": 0.0001, + "sup_loss": 1.4924999559426448 + }, + { + "step": 1305, + "lr": 0.0001, + "sup_loss": 1.5417023057297385 + }, + { + "step": 1306, + "lr": 0.0001, + "sup_loss": 1.4740305175611248 + }, + { + "step": 1307, + "lr": 0.0001, + "sup_loss": 1.4857927905801596 + }, + { + "step": 1308, + "lr": 0.0001, + "sup_loss": 1.415051865940042 + }, + { + "step": 1309, + "lr": 0.0001, + "sup_loss": 1.4648704114524878 + }, + { + "step": 1310, + "lr": 0.0001, + "sup_loss": 1.4349438325914907 + }, + { + "step": 1311, + "lr": 0.0001, + "sup_loss": 1.4585966529935726 + }, + { + "step": 1312, + "lr": 0.0001, + "sup_loss": 1.4586155395149896 + }, + { + "step": 1313, + "lr": 0.0001, + "sup_loss": 1.477302356256303 + }, + { + "step": 1314, + "lr": 0.0001, + "sup_loss": 1.5036037677806355 + }, + { + "step": 1315, + "lr": 0.0001, + "sup_loss": 1.4675150214295212 + }, + { + "step": 1316, + "lr": 0.0001, + "sup_loss": 1.4865319754601845 + }, + { + "step": 1317, + "lr": 0.0001, + "sup_loss": 1.4912920452304554 + }, + { + "step": 1318, + "lr": 0.0001, + "sup_loss": 1.4759324266604805 + }, + { + "step": 1319, + "lr": 0.0001, + "sup_loss": 1.5282260128519345 + }, + { + "step": 1320, + "lr": 0.0001, + "sup_loss": 1.4750823000308122 + }, + { + "step": 1321, + "lr": 0.0001, + "sup_loss": 1.4503966787587672 + }, + { + "step": 1322, + "lr": 0.0001, + "sup_loss": 1.4776967328834663 + }, + { + "step": 1323, + "lr": 0.0001, + "sup_loss": 1.4601244767576667 + }, + { + "step": 1324, + "lr": 0.0001, + "sup_loss": 1.4531779622048628 + }, + { + "step": 1325, + "lr": 0.0001, + "sup_loss": 1.497494493622028 + }, + { + "step": 1326, + "lr": 0.0001, + "sup_loss": 1.4553385998154538 + }, + { + "step": 1327, + "lr": 0.0001, + "sup_loss": 1.4501649768439246 + }, + { + "step": 1328, + "lr": 0.0001, + "sup_loss": 1.4325750957136725 + }, + { + "step": 1329, + "lr": 0.0001, + "sup_loss": 1.4671514805063177 + }, + { + "step": 1330, + "lr": 0.0001, + "sup_loss": 1.4455857124314446 + }, + { + "step": 1331, + "lr": 0.0001, + "sup_loss": 1.4859139999051265 + }, + { + "step": 1332, + "lr": 0.0001, + "sup_loss": 1.4598386827419108 + }, + { + "step": 1333, + "lr": 0.0001, + "sup_loss": 1.4529397783925984 + }, + { + "step": 1334, + "lr": 0.0001, + "sup_loss": 1.4755278841286046 + }, + { + "step": 1335, + "lr": 0.0001, + "sup_loss": 1.3942360789764172 + }, + { + "step": 1336, + "lr": 0.0001, + "sup_loss": 1.4987030744783227 + }, + { + "step": 1337, + "lr": 0.0001, + "sup_loss": 1.4462084641083286 + }, + { + "step": 1338, + "lr": 0.0001, + "sup_loss": 1.4502301667462927 + }, + { + "step": 1339, + "lr": 0.0001, + "sup_loss": 1.4737479860943496 + }, + { + "step": 1340, + "lr": 0.0001, + "sup_loss": 1.487379088274462 + }, + { + "step": 1341, + "lr": 0.0001, + "sup_loss": 1.4623950037545321 + }, + { + "step": 1342, + "lr": 0.0001, + "sup_loss": 1.447335404682255 + }, + { + "step": 1343, + "lr": 0.0001, + "sup_loss": 1.4739215994972084 + }, + { + "step": 1344, + "lr": 0.0001, + "sup_loss": 1.5032003694394298 + }, + { + "step": 1345, + "lr": 0.0001, + "sup_loss": 1.4593605151972133 + }, + { + "step": 1346, + "lr": 0.0001, + "sup_loss": 1.4460440573893067 + }, + { + "step": 1347, + "lr": 0.0001, + "sup_loss": 1.4545720742066124 + }, + { + "step": 1348, + "lr": 0.0001, + "sup_loss": 1.4920996736069647 + }, + { + "step": 1349, + "lr": 0.0001, + "sup_loss": 1.4504111254454797 + }, + { + "step": 1350, + "lr": 0.0001, + "sup_loss": 1.4689092344518515, + "lyap1_mean": -4.931465148925781, + "lyap1_max": -4.89593505859375, + "lyap_spec_mean": [ + -4.931465148925781, + -4.935503005981445 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1351, + "lr": 0.0001, + "sup_loss": 1.4630826446384495 + }, + { + "step": 1352, + "lr": 0.0001, + "sup_loss": 1.4299442977172152 + }, + { + "step": 1353, + "lr": 0.0001, + "sup_loss": 1.4421396787085525 + }, + { + "step": 1354, + "lr": 0.0001, + "sup_loss": 1.457529412612607 + }, + { + "step": 1355, + "lr": 0.0001, + "sup_loss": 1.4598766565615744 + }, + { + "step": 1356, + "lr": 0.0001, + "sup_loss": 1.467753297812535 + }, + { + "step": 1357, + "lr": 0.0001, + "sup_loss": 1.4281446846028276 + }, + { + "step": 1358, + "lr": 0.0001, + "sup_loss": 1.4391846346674546 + }, + { + "step": 1359, + "lr": 0.0001, + "sup_loss": 1.4700838855903684 + }, + { + "step": 1360, + "lr": 0.0001, + "sup_loss": 1.4692868012953695 + }, + { + "step": 1361, + "lr": 0.0001, + "sup_loss": 1.479793221633773 + }, + { + "step": 1362, + "lr": 0.0001, + "sup_loss": 1.4854180015532912 + }, + { + "step": 1363, + "lr": 0.0001, + "sup_loss": 1.4483145014422605 + }, + { + "step": 1364, + "lr": 0.0001, + "sup_loss": 1.4606855841318707 + }, + { + "step": 1365, + "lr": 0.0001, + "sup_loss": 1.4730099942735924 + }, + { + "step": 1366, + "lr": 0.0001, + "sup_loss": 1.4549413542224845 + }, + { + "step": 1367, + "lr": 0.0001, + "sup_loss": 1.4974572226217275 + }, + { + "step": 1368, + "lr": 0.0001, + "sup_loss": 1.4834459848526274 + }, + { + "step": 1369, + "lr": 0.0001, + "sup_loss": 1.4963073446846717 + }, + { + "step": 1370, + "lr": 0.0001, + "sup_loss": 1.3891793064379003 + }, + { + "step": 1371, + "lr": 0.0001, + "sup_loss": 1.5137110719177653 + }, + { + "step": 1372, + "lr": 0.0001, + "sup_loss": 1.4540415287576216 + }, + { + "step": 1373, + "lr": 0.0001, + "sup_loss": 1.4793176702172128 + }, + { + "step": 1374, + "lr": 0.0001, + "sup_loss": 1.487491825446896 + }, + { + "step": 1375, + "lr": 0.0001, + "sup_loss": 1.5073274954065587 + }, + { + "step": 1376, + "lr": 0.0001, + "sup_loss": 1.479394108427727 + }, + { + "step": 1377, + "lr": 0.0001, + "sup_loss": 1.446787371202537 + }, + { + "step": 1378, + "lr": 0.0001, + "sup_loss": 1.435867942260591 + }, + { + "step": 1379, + "lr": 0.0001, + "sup_loss": 1.4557625878849318 + }, + { + "step": 1380, + "lr": 0.0001, + "sup_loss": 1.4732116926036665 + }, + { + "step": 1381, + "lr": 0.0001, + "sup_loss": 1.4051078634930474 + }, + { + "step": 1382, + "lr": 0.0001, + "sup_loss": 1.466746186176767 + }, + { + "step": 1383, + "lr": 0.0001, + "sup_loss": 1.46929751694308 + }, + { + "step": 1384, + "lr": 0.0001, + "sup_loss": 1.463057672518489 + }, + { + "step": 1385, + "lr": 0.0001, + "sup_loss": 1.4259808233941291 + }, + { + "step": 1386, + "lr": 0.0001, + "sup_loss": 1.455293708515534 + }, + { + "step": 1387, + "lr": 0.0001, + "sup_loss": 1.4550107447406377 + }, + { + "step": 1388, + "lr": 0.0001, + "sup_loss": 1.454115771654303 + }, + { + "step": 1389, + "lr": 0.0001, + "sup_loss": 1.4786664280358452 + }, + { + "step": 1390, + "lr": 0.0001, + "sup_loss": 1.455955020995784 + }, + { + "step": 1391, + "lr": 0.0001, + "sup_loss": 1.4659499119455552 + }, + { + "step": 1392, + "lr": 0.0001, + "sup_loss": 1.451174394821315 + }, + { + "step": 1393, + "lr": 0.0001, + "sup_loss": 1.4465310232613278 + }, + { + "step": 1394, + "lr": 0.0001, + "sup_loss": 1.4320645250557789 + }, + { + "step": 1395, + "lr": 0.0001, + "sup_loss": 1.4676390112981756 + }, + { + "step": 1396, + "lr": 0.0001, + "sup_loss": 1.5132170845575477 + }, + { + "step": 1397, + "lr": 0.0001, + "sup_loss": 1.4234981403832874 + }, + { + "step": 1398, + "lr": 0.0001, + "sup_loss": 1.4042491299832027 + }, + { + "step": 1399, + "lr": 0.0001, + "sup_loss": 1.462284596693642 + }, + { + "step": 1400, + "lr": 0.0001, + "sup_loss": 1.445975186773351, + "lyap1_mean": -4.836701393127441, + "lyap1_max": -4.783886432647705, + "lyap_spec_mean": [ + -4.8367018699646, + -4.896517276763916 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1401, + "lr": 0.0001, + "sup_loss": 1.425526613166654 + }, + { + "step": 1402, + "lr": 0.0001, + "sup_loss": 1.4634727501607048 + }, + { + "step": 1403, + "lr": 0.0001, + "sup_loss": 1.4154321319087628 + }, + { + "step": 1404, + "lr": 0.0001, + "sup_loss": 1.468091586947968 + }, + { + "step": 1405, + "lr": 0.0001, + "sup_loss": 1.476015856226677 + }, + { + "step": 1406, + "lr": 0.0001, + "sup_loss": 1.4469313944832158 + }, + { + "step": 1407, + "lr": 0.0001, + "sup_loss": 1.4648772473230929 + }, + { + "step": 1408, + "lr": 0.0001, + "sup_loss": 1.4353985894400934 + }, + { + "step": 1409, + "lr": 0.0001, + "sup_loss": 1.471189726692836 + }, + { + "step": 1410, + "lr": 0.0001, + "sup_loss": 1.4349447640461945 + }, + { + "step": 1411, + "lr": 0.0001, + "sup_loss": 1.459744562843906 + }, + { + "step": 1412, + "lr": 0.0001, + "sup_loss": 1.4746893257621285 + }, + { + "step": 1413, + "lr": 0.0001, + "sup_loss": 1.4643818213289568 + }, + { + "step": 1414, + "lr": 0.0001, + "sup_loss": 1.4549503817906437 + }, + { + "step": 1415, + "lr": 0.0001, + "sup_loss": 1.4391135190587492 + }, + { + "step": 1416, + "lr": 0.0001, + "sup_loss": 1.4326492161649624 + }, + { + "step": 1417, + "lr": 0.0001, + "sup_loss": 1.4720987897305684 + }, + { + "step": 1418, + "lr": 0.0001, + "sup_loss": 1.4914051889134379 + }, + { + "step": 1419, + "lr": 0.0001, + "sup_loss": 1.4495293383437353 + }, + { + "step": 1420, + "lr": 0.0001, + "sup_loss": 1.4190264207680032 + }, + { + "step": 1421, + "lr": 0.0001, + "sup_loss": 1.4440298753641012 + }, + { + "step": 1422, + "lr": 0.0001, + "sup_loss": 1.4541722232559824 + }, + { + "step": 1423, + "lr": 0.0001, + "sup_loss": 1.451851009894193 + }, + { + "step": 1424, + "lr": 0.0001, + "sup_loss": 1.4817215110014974 + }, + { + "step": 1425, + "lr": 0.0001, + "sup_loss": 1.4576574069684372 + }, + { + "step": 1426, + "lr": 0.0001, + "sup_loss": 1.4468147058934735 + }, + { + "step": 1427, + "lr": 0.0001, + "sup_loss": 1.4661241096381898 + }, + { + "step": 1428, + "lr": 0.0001, + "sup_loss": 1.4414842098636724 + }, + { + "step": 1429, + "lr": 0.0001, + "sup_loss": 1.4400921159499058 + }, + { + "step": 1430, + "lr": 0.0001, + "sup_loss": 1.4618246404247086 + }, + { + "step": 1431, + "lr": 0.0001, + "sup_loss": 1.4238758998484815 + }, + { + "step": 1432, + "lr": 0.0001, + "sup_loss": 1.4122807748429786 + }, + { + "step": 1433, + "lr": 0.0001, + "sup_loss": 1.4696382313630718 + }, + { + "step": 1434, + "lr": 0.0001, + "sup_loss": 1.4699219728407953 + }, + { + "step": 1435, + "lr": 0.0001, + "sup_loss": 1.4435760705898395 + }, + { + "step": 1436, + "lr": 0.0001, + "sup_loss": 1.4491098498119803 + }, + { + "step": 1437, + "lr": 0.0001, + "sup_loss": 1.4636085678164543 + }, + { + "step": 1438, + "lr": 0.0001, + "sup_loss": 1.435290432757181 + }, + { + "step": 1439, + "lr": 0.0001, + "sup_loss": 1.4312625206038057 + }, + { + "step": 1440, + "lr": 0.0001, + "sup_loss": 1.4504287630939405 + }, + { + "step": 1441, + "lr": 0.0001, + "sup_loss": 1.4118009765397788 + }, + { + "step": 1442, + "lr": 0.0001, + "sup_loss": 1.4566654497466118 + }, + { + "step": 1443, + "lr": 0.0001, + "sup_loss": 1.4492776564966285 + }, + { + "step": 1444, + "lr": 0.0001, + "sup_loss": 1.3958982549544174 + }, + { + "step": 1445, + "lr": 0.0001, + "sup_loss": 1.47066031940578 + }, + { + "step": 1446, + "lr": 0.0001, + "sup_loss": 1.462014501980225 + }, + { + "step": 1447, + "lr": 0.0001, + "sup_loss": 1.430744536039194 + }, + { + "step": 1448, + "lr": 0.0001, + "sup_loss": 1.4803154673721786 + }, + { + "step": 1449, + "lr": 0.0001, + "sup_loss": 1.4390528509936122 + }, + { + "step": 1450, + "lr": 0.0001, + "sup_loss": 1.4671590561530587, + "lyap1_mean": -4.90616512298584, + "lyap1_max": -4.82309103012085, + "lyap_spec_mean": [ + -4.90616512298584, + -4.88002872467041 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1451, + "lr": 0.0001, + "sup_loss": 1.491449835409722 + }, + { + "step": 1452, + "lr": 0.0001, + "sup_loss": 1.4102554179793172 + }, + { + "step": 1453, + "lr": 0.0001, + "sup_loss": 1.3989063628407754 + }, + { + "step": 1454, + "lr": 0.0001, + "sup_loss": 1.472127869804765 + }, + { + "step": 1455, + "lr": 0.0001, + "sup_loss": 1.4634677411154382 + }, + { + "step": 1456, + "lr": 0.0001, + "sup_loss": 1.4765568745353184 + }, + { + "step": 1457, + "lr": 0.0001, + "sup_loss": 1.4573270010805905 + }, + { + "step": 1458, + "lr": 0.0001, + "sup_loss": 1.4550360625273984 + }, + { + "step": 1459, + "lr": 0.0001, + "sup_loss": 1.4136294666326934 + }, + { + "step": 1460, + "lr": 0.0001, + "sup_loss": 1.442390763243285 + }, + { + "step": 1461, + "lr": 0.0001, + "sup_loss": 1.4629446204575114 + }, + { + "step": 1462, + "lr": 0.0001, + "sup_loss": 1.4599644884978145 + }, + { + "step": 1463, + "lr": 0.0001, + "sup_loss": 1.4711858370095716 + }, + { + "step": 1464, + "lr": 0.0001, + "sup_loss": 1.4041374902763835 + }, + { + "step": 1465, + "lr": 0.0001, + "sup_loss": 1.4374500732754496 + }, + { + "step": 1466, + "lr": 0.0001, + "sup_loss": 1.4513513570721417 + }, + { + "step": 1467, + "lr": 0.0001, + "sup_loss": 1.4173516926917245 + }, + { + "step": 1468, + "lr": 0.0001, + "sup_loss": 1.461530739996572 + }, + { + "step": 1469, + "lr": 0.0001, + "sup_loss": 1.4226043641662203 + }, + { + "step": 1470, + "lr": 0.0001, + "sup_loss": 1.451586577456039 + }, + { + "step": 1471, + "lr": 0.0001, + "sup_loss": 1.4430202592766468 + }, + { + "step": 1472, + "lr": 0.0001, + "sup_loss": 1.4487111015767749 + }, + { + "step": 1473, + "lr": 0.0001, + "sup_loss": 1.4578467531434212 + }, + { + "step": 1474, + "lr": 0.0001, + "sup_loss": 1.4121042796802712 + }, + { + "step": 1475, + "lr": 0.0001, + "sup_loss": 1.4828702623885843 + }, + { + "step": 1476, + "lr": 0.0001, + "sup_loss": 1.4432026368424578 + }, + { + "step": 1477, + "lr": 0.0001, + "sup_loss": 1.424240077660403 + }, + { + "step": 1478, + "lr": 0.0001, + "sup_loss": 1.4356999725345851 + }, + { + "step": 1479, + "lr": 0.0001, + "sup_loss": 1.4055916905473094 + }, + { + "step": 1480, + "lr": 0.0001, + "sup_loss": 1.4637976175365572 + }, + { + "step": 1481, + "lr": 0.0001, + "sup_loss": 1.458403314810014 + }, + { + "step": 1482, + "lr": 0.0001, + "sup_loss": 1.4927108373228621 + }, + { + "step": 1483, + "lr": 0.0001, + "sup_loss": 1.4489999078809652 + }, + { + "step": 1484, + "lr": 0.0001, + "sup_loss": 1.4278160973008365 + }, + { + "step": 1485, + "lr": 0.0001, + "sup_loss": 1.4410087245834038 + }, + { + "step": 1486, + "lr": 0.0001, + "sup_loss": 1.4096582771348374 + }, + { + "step": 1487, + "lr": 0.0001, + "sup_loss": 1.4415387910578528 + }, + { + "step": 1488, + "lr": 0.0001, + "sup_loss": 1.4421186568768058 + }, + { + "step": 1489, + "lr": 0.0001, + "sup_loss": 1.4539014787301254 + }, + { + "step": 1490, + "lr": 0.0001, + "sup_loss": 1.4398542386156385 + }, + { + "step": 1491, + "lr": 0.0001, + "sup_loss": 1.4351546744594235 + }, + { + "step": 1492, + "lr": 0.0001, + "sup_loss": 1.4473557335491596 + }, + { + "step": 1493, + "lr": 0.0001, + "sup_loss": 1.4526134050475805 + }, + { + "step": 1494, + "lr": 0.0001, + "sup_loss": 1.449403060501902 + }, + { + "step": 1495, + "lr": 0.0001, + "sup_loss": 1.4483793022647264 + }, + { + "step": 1496, + "lr": 0.0001, + "sup_loss": 1.4505850445617374 + }, + { + "step": 1497, + "lr": 0.0001, + "sup_loss": 1.4417696126903843 + }, + { + "step": 1498, + "lr": 0.0001, + "sup_loss": 1.4573266014393726 + }, + { + "step": 1499, + "lr": 0.0001, + "sup_loss": 1.3771375701434592 + }, + { + "step": 1500, + "lr": 0.0001, + "sup_loss": 1.3766876470720837, + "lyap1_mean": -4.871829986572266, + "lyap1_max": -4.84684944152832, + "lyap_spec_mean": [ + -4.871829986572266, + -4.8450117111206055 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1501, + "lr": 0.0001, + "sup_loss": 1.3965918804816742 + }, + { + "step": 1502, + "lr": 0.0001, + "sup_loss": 1.4468647406829607 + }, + { + "step": 1503, + "lr": 0.0001, + "sup_loss": 1.4254322832396327 + }, + { + "step": 1504, + "lr": 0.0001, + "sup_loss": 1.4524092202807979 + }, + { + "step": 1505, + "lr": 0.0001, + "sup_loss": 1.442207869589765 + }, + { + "step": 1506, + "lr": 0.0001, + "sup_loss": 1.459412210629993 + }, + { + "step": 1507, + "lr": 0.0001, + "sup_loss": 1.4374820854662866 + }, + { + "step": 1508, + "lr": 0.0001, + "sup_loss": 1.4085379451374989 + }, + { + "step": 1509, + "lr": 0.0001, + "sup_loss": 1.443730493127249 + }, + { + "step": 1510, + "lr": 0.0001, + "sup_loss": 1.4674984512768257 + }, + { + "step": 1511, + "lr": 0.0001, + "sup_loss": 1.4462923080916368 + }, + { + "step": 1512, + "lr": 0.0001, + "sup_loss": 1.3202777433618544 + }, + { + "step": 1513, + "lr": 0.0001, + "sup_loss": 1.4651239241236453 + }, + { + "step": 1514, + "lr": 0.0001, + "sup_loss": 1.4437704752606575 + }, + { + "step": 1515, + "lr": 0.0001, + "sup_loss": 1.4552223612865343 + }, + { + "step": 1516, + "lr": 0.0001, + "sup_loss": 1.4456883634826196 + }, + { + "step": 1517, + "lr": 0.0001, + "sup_loss": 1.4408543382277836 + }, + { + "step": 1518, + "lr": 0.0001, + "sup_loss": 1.4419442236808644 + }, + { + "step": 1519, + "lr": 0.0001, + "sup_loss": 1.440560727647496 + }, + { + "step": 1520, + "lr": 0.0001, + "sup_loss": 1.419030841214394 + }, + { + "step": 1521, + "lr": 0.0001, + "sup_loss": 1.4686379857966065 + }, + { + "step": 1522, + "lr": 0.0001, + "sup_loss": 1.4333427271638608 + }, + { + "step": 1523, + "lr": 0.0001, + "sup_loss": 1.4711165418108552 + }, + { + "step": 1524, + "lr": 0.0001, + "sup_loss": 1.4490470654712322 + }, + { + "step": 1525, + "lr": 0.0001, + "sup_loss": 1.4390122524656563 + }, + { + "step": 1526, + "lr": 0.0001, + "sup_loss": 1.4106675291259594 + }, + { + "step": 1527, + "lr": 0.0001, + "sup_loss": 1.4050782705555793 + }, + { + "step": 1528, + "lr": 0.0001, + "sup_loss": 1.4506016534995017 + }, + { + "step": 1529, + "lr": 0.0001, + "sup_loss": 1.4642485397074907 + }, + { + "step": 1530, + "lr": 0.0001, + "sup_loss": 1.49480964273257 + }, + { + "step": 1531, + "lr": 0.0001, + "sup_loss": 1.4415406669082693 + }, + { + "step": 1532, + "lr": 0.0001, + "sup_loss": 1.4249572828119925 + }, + { + "step": 1533, + "lr": 0.0001, + "sup_loss": 1.4758269501521326 + }, + { + "step": 1534, + "lr": 0.0001, + "sup_loss": 1.4443047736953802 + }, + { + "step": 1535, + "lr": 0.0001, + "sup_loss": 1.413901495120456 + }, + { + "step": 1536, + "lr": 0.0001, + "sup_loss": 1.4784263085091247 + }, + { + "step": 1537, + "lr": 0.0001, + "sup_loss": 1.4521063973237631 + }, + { + "step": 1538, + "lr": 0.0001, + "sup_loss": 1.4147111108265757 + }, + { + "step": 1539, + "lr": 0.0001, + "sup_loss": 1.4730862181915314 + }, + { + "step": 1540, + "lr": 0.0001, + "sup_loss": 1.4251807375381318 + }, + { + "step": 1541, + "lr": 0.0001, + "sup_loss": 1.447337750390472 + }, + { + "step": 1542, + "lr": 0.0001, + "sup_loss": 1.4213962976530492 + }, + { + "step": 1543, + "lr": 0.0001, + "sup_loss": 1.3839344815549173 + }, + { + "step": 1544, + "lr": 0.0001, + "sup_loss": 1.438176941126204 + }, + { + "step": 1545, + "lr": 0.0001, + "sup_loss": 1.4495826050872176 + }, + { + "step": 1546, + "lr": 0.0001, + "sup_loss": 1.398046447358952 + }, + { + "step": 1547, + "lr": 0.0001, + "sup_loss": 1.4727749053136245 + }, + { + "step": 1548, + "lr": 0.0001, + "sup_loss": 1.4108699284492 + }, + { + "step": 1549, + "lr": 0.0001, + "sup_loss": 1.4307992397035778 + }, + { + "step": 1550, + "lr": 0.0001, + "sup_loss": 1.460253181661509, + "lyap1_mean": -4.80469274520874, + "lyap1_max": -4.705941677093506, + "lyap_spec_mean": [ + -4.804693222045898, + -4.79709529876709 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1551, + "lr": 0.0001, + "sup_loss": 1.4320098646343116 + }, + { + "step": 1552, + "lr": 0.0001, + "sup_loss": 1.4420628696579727 + }, + { + "step": 1553, + "lr": 0.0001, + "sup_loss": 1.4154910895199624 + }, + { + "step": 1554, + "lr": 0.0001, + "sup_loss": 1.4539745642951691 + }, + { + "step": 1555, + "lr": 0.0001, + "sup_loss": 1.4223957784583583 + }, + { + "step": 1556, + "lr": 0.0001, + "sup_loss": 1.407259798366072 + }, + { + "step": 1557, + "lr": 0.0001, + "sup_loss": 1.4089124678735199 + }, + { + "step": 1558, + "lr": 0.0001, + "sup_loss": 1.4491187104295231 + }, + { + "step": 1559, + "lr": 0.0001, + "sup_loss": 1.426229774829771 + }, + { + "step": 1560, + "lr": 0.0001, + "sup_loss": 1.4511797323531495 + }, + { + "step": 1561, + "lr": 0.0001, + "sup_loss": 1.4345490973707553 + }, + { + "step": 1562, + "lr": 0.0001, + "sup_loss": 1.423930644907223 + }, + { + "step": 1563, + "lr": 0.0001, + "sup_loss": 1.4115715099886488 + }, + { + "step": 1564, + "lr": 0.0001, + "sup_loss": 1.4624286309927732 + }, + { + "step": 1565, + "lr": 0.0001, + "sup_loss": 1.4377162691176748 + }, + { + "step": 1566, + "lr": 0.0001, + "sup_loss": 1.472821141684176 + }, + { + "step": 1567, + "lr": 0.0001, + "sup_loss": 1.441649061626242 + }, + { + "step": 1568, + "lr": 0.0001, + "sup_loss": 1.4046182423761906 + }, + { + "step": 1569, + "lr": 0.0001, + "sup_loss": 1.4447464080357837 + }, + { + "step": 1570, + "lr": 0.0001, + "sup_loss": 1.4435588230092717 + }, + { + "step": 1571, + "lr": 0.0001, + "sup_loss": 1.4307417489848482 + }, + { + "step": 1572, + "lr": 0.0001, + "sup_loss": 1.3650688021631034 + }, + { + "step": 1573, + "lr": 0.0001, + "sup_loss": 1.452389155388695 + }, + { + "step": 1574, + "lr": 0.0001, + "sup_loss": 1.435506967538476 + }, + { + "step": 1575, + "lr": 0.0001, + "sup_loss": 1.396915401950261 + }, + { + "step": 1576, + "lr": 0.0001, + "sup_loss": 1.4191532717079836 + }, + { + "step": 1577, + "lr": 0.0001, + "sup_loss": 1.4291236792398887 + }, + { + "step": 1578, + "lr": 0.0001, + "sup_loss": 1.4428497107445395 + }, + { + "step": 1579, + "lr": 0.0001, + "sup_loss": 1.4365638826806746 + }, + { + "step": 1580, + "lr": 0.0001, + "sup_loss": 1.4386359737369656 + }, + { + "step": 1581, + "lr": 0.0001, + "sup_loss": 1.4293804073971739 + }, + { + "step": 1582, + "lr": 0.0001, + "sup_loss": 1.4037636622759355 + }, + { + "step": 1583, + "lr": 0.0001, + "sup_loss": 1.4493564261489453 + }, + { + "step": 1584, + "lr": 0.0001, + "sup_loss": 1.425427620195918 + }, + { + "step": 1585, + "lr": 0.0001, + "sup_loss": 1.4265061881917855 + }, + { + "step": 1586, + "lr": 0.0001, + "sup_loss": 1.4494213139709435 + }, + { + "step": 1587, + "lr": 0.0001, + "sup_loss": 1.4289017183547863 + }, + { + "step": 1588, + "lr": 0.0001, + "sup_loss": 1.4320519049613913 + }, + { + "step": 1589, + "lr": 0.0001, + "sup_loss": 1.4315669939151838 + }, + { + "step": 1590, + "lr": 0.0001, + "sup_loss": 1.4432898052203689 + }, + { + "step": 1591, + "lr": 0.0001, + "sup_loss": 1.4061406292705643 + }, + { + "step": 1592, + "lr": 0.0001, + "sup_loss": 1.4313613710222521 + }, + { + "step": 1593, + "lr": 0.0001, + "sup_loss": 1.4197792724377134 + }, + { + "step": 1594, + "lr": 0.0001, + "sup_loss": 1.442038832529003 + }, + { + "step": 1595, + "lr": 0.0001, + "sup_loss": 1.378936526715241 + }, + { + "step": 1596, + "lr": 0.0001, + "sup_loss": 1.4292915904783534 + }, + { + "step": 1597, + "lr": 0.0001, + "sup_loss": 1.4338699193250708 + }, + { + "step": 1598, + "lr": 0.0001, + "sup_loss": 1.4062768140680455 + }, + { + "step": 1599, + "lr": 0.0001, + "sup_loss": 1.4390196676438947 + }, + { + "step": 1600, + "lr": 0.0001, + "sup_loss": 1.4286613750137396, + "lyap1_mean": -4.763199806213379, + "lyap1_max": -4.703543663024902, + "lyap_spec_mean": [ + -4.763199806213379, + -4.740202903747559 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1601, + "lr": 0.0001, + "sup_loss": 1.4313711013301813 + }, + { + "step": 1602, + "lr": 0.0001, + "sup_loss": 1.4231260501591987 + }, + { + "step": 1603, + "lr": 0.0001, + "sup_loss": 1.4143425399117926 + }, + { + "step": 1604, + "lr": 0.0001, + "sup_loss": 1.3952967175769726 + }, + { + "step": 1605, + "lr": 0.0001, + "sup_loss": 1.3696814367291628 + }, + { + "step": 1606, + "lr": 0.0001, + "sup_loss": 1.4499061611564183 + }, + { + "step": 1607, + "lr": 0.0001, + "sup_loss": 1.4623163085656987 + }, + { + "step": 1608, + "lr": 0.0001, + "sup_loss": 1.444351381271383 + }, + { + "step": 1609, + "lr": 0.0001, + "sup_loss": 1.481332893135834 + }, + { + "step": 1610, + "lr": 0.0001, + "sup_loss": 1.3963978987054417 + }, + { + "step": 1611, + "lr": 0.0001, + "sup_loss": 1.4446230649070155 + }, + { + "step": 1612, + "lr": 0.0001, + "sup_loss": 1.3832560736108355 + }, + { + "step": 1613, + "lr": 0.0001, + "sup_loss": 1.394415893257476 + }, + { + "step": 1614, + "lr": 0.0001, + "sup_loss": 1.398991654114705 + }, + { + "step": 1615, + "lr": 0.0001, + "sup_loss": 1.4276028124944597 + }, + { + "step": 1616, + "lr": 0.0001, + "sup_loss": 1.468078326701278 + }, + { + "step": 1617, + "lr": 0.0001, + "sup_loss": 1.430733044913817 + }, + { + "step": 1618, + "lr": 0.0001, + "sup_loss": 1.4431687698911906 + }, + { + "step": 1619, + "lr": 0.0001, + "sup_loss": 1.4145730132125411 + }, + { + "step": 1620, + "lr": 0.0001, + "sup_loss": 1.4416061158792244 + }, + { + "step": 1621, + "lr": 0.0001, + "sup_loss": 1.4485650497508529 + }, + { + "step": 1622, + "lr": 0.0001, + "sup_loss": 1.4362126962339543 + }, + { + "step": 1623, + "lr": 0.0001, + "sup_loss": 1.416302843311267 + }, + { + "step": 1624, + "lr": 0.0001, + "sup_loss": 1.3833771715083552 + }, + { + "step": 1625, + "lr": 0.0001, + "sup_loss": 1.4380014476274923 + }, + { + "step": 1626, + "lr": 0.0001, + "sup_loss": 1.393176127322929 + }, + { + "step": 1627, + "lr": 0.0001, + "sup_loss": 1.4463411928478747 + }, + { + "step": 1628, + "lr": 0.0001, + "sup_loss": 1.4824992297043507 + }, + { + "step": 1629, + "lr": 0.0001, + "sup_loss": 1.4585442526448107 + }, + { + "step": 1630, + "lr": 0.0001, + "sup_loss": 1.4582178907285923 + }, + { + "step": 1631, + "lr": 0.0001, + "sup_loss": 1.4075550806801504 + }, + { + "step": 1632, + "lr": 0.0001, + "sup_loss": 1.463687383620692 + }, + { + "step": 1633, + "lr": 0.0001, + "sup_loss": 1.3850792501614422 + }, + { + "step": 1634, + "lr": 0.0001, + "sup_loss": 1.4332836650862533 + }, + { + "step": 1635, + "lr": 0.0001, + "sup_loss": 1.4435765539443548 + }, + { + "step": 1636, + "lr": 0.0001, + "sup_loss": 1.4205704543582052 + }, + { + "step": 1637, + "lr": 0.0001, + "sup_loss": 1.4226421267757798 + }, + { + "step": 1638, + "lr": 0.0001, + "sup_loss": 1.3954355525190534 + }, + { + "step": 1639, + "lr": 0.0001, + "sup_loss": 1.4155729674883952 + }, + { + "step": 1640, + "lr": 0.0001, + "sup_loss": 1.418942643449481 + }, + { + "step": 1641, + "lr": 0.0001, + "sup_loss": 1.4405763550971713 + }, + { + "step": 1642, + "lr": 0.0001, + "sup_loss": 1.453842294406012 + }, + { + "step": 1643, + "lr": 0.0001, + "sup_loss": 1.4032510063544934 + }, + { + "step": 1644, + "lr": 0.0001, + "sup_loss": 1.4366442090870355 + }, + { + "step": 1645, + "lr": 0.0001, + "sup_loss": 1.4142527229832684 + }, + { + "step": 1646, + "lr": 0.0001, + "sup_loss": 1.4597409504314474 + }, + { + "step": 1647, + "lr": 0.0001, + "sup_loss": 1.390053620372242 + }, + { + "step": 1648, + "lr": 0.0001, + "sup_loss": 1.4452855452649864 + }, + { + "step": 1649, + "lr": 0.0001, + "sup_loss": 1.3482241767899956 + }, + { + "step": 1650, + "lr": 0.0001, + "sup_loss": 1.4379065717912682, + "lyap1_mean": -4.727723121643066, + "lyap1_max": -4.705477237701416, + "lyap_spec_mean": [ + -4.727723121643066, + -4.718955993652344 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1651, + "lr": 0.0001, + "sup_loss": 1.4321117705376714 + }, + { + "step": 1652, + "lr": 0.0001, + "sup_loss": 1.4133861409512296 + }, + { + "step": 1653, + "lr": 0.0001, + "sup_loss": 1.4176743542731363 + }, + { + "step": 1654, + "lr": 0.0001, + "sup_loss": 1.396138023143566 + }, + { + "step": 1655, + "lr": 0.0001, + "sup_loss": 1.3997407080845763 + }, + { + "step": 1656, + "lr": 0.0001, + "sup_loss": 1.427349155133362 + }, + { + "step": 1657, + "lr": 0.0001, + "sup_loss": 1.4258199824411828 + }, + { + "step": 1658, + "lr": 0.0001, + "sup_loss": 1.3965682996272144 + }, + { + "step": 1659, + "lr": 0.0001, + "sup_loss": 1.41996267365519 + }, + { + "step": 1660, + "lr": 0.0001, + "sup_loss": 1.384334905027236 + }, + { + "step": 1661, + "lr": 0.0001, + "sup_loss": 1.4192923429379514 + }, + { + "step": 1662, + "lr": 0.0001, + "sup_loss": 1.441896717427595 + }, + { + "step": 1663, + "lr": 0.0001, + "sup_loss": 1.4611603278310235 + }, + { + "step": 1664, + "lr": 0.0001, + "sup_loss": 1.4353512791182002 + }, + { + "step": 1665, + "lr": 0.0001, + "sup_loss": 1.4400298026306764 + }, + { + "step": 1666, + "lr": 0.0001, + "sup_loss": 1.4343377325640425 + }, + { + "step": 1667, + "lr": 0.0001, + "sup_loss": 1.339260407673035 + }, + { + "step": 1668, + "lr": 0.0001, + "sup_loss": 1.4315497336457232 + }, + { + "step": 1669, + "lr": 0.0001, + "sup_loss": 1.3972973322769555 + }, + { + "step": 1670, + "lr": 0.0001, + "sup_loss": 1.447709409675841 + }, + { + "step": 1671, + "lr": 0.0001, + "sup_loss": 1.463115475220204 + }, + { + "step": 1672, + "lr": 0.0001, + "sup_loss": 1.4403629075924735 + }, + { + "step": 1673, + "lr": 0.0001, + "sup_loss": 1.4281720894559717 + }, + { + "step": 1674, + "lr": 0.0001, + "sup_loss": 1.428105523848226 + }, + { + "step": 1675, + "lr": 0.0001, + "sup_loss": 1.3686610951756117 + }, + { + "step": 1676, + "lr": 0.0001, + "sup_loss": 1.4167514372908827 + }, + { + "step": 1677, + "lr": 0.0001, + "sup_loss": 1.3994320602767463 + }, + { + "step": 1678, + "lr": 0.0001, + "sup_loss": 1.4253372446178374 + }, + { + "step": 1679, + "lr": 0.0001, + "sup_loss": 1.4026265897817207 + }, + { + "step": 1680, + "lr": 0.0001, + "sup_loss": 1.3920230830007527 + }, + { + "step": 1681, + "lr": 0.0001, + "sup_loss": 1.4095924991565152 + }, + { + "step": 1682, + "lr": 0.0001, + "sup_loss": 1.4676702581083167 + }, + { + "step": 1683, + "lr": 0.0001, + "sup_loss": 1.3992303969655753 + }, + { + "step": 1684, + "lr": 0.0001, + "sup_loss": 1.4439143679373132 + }, + { + "step": 1685, + "lr": 0.0001, + "sup_loss": 1.4034484221378796 + }, + { + "step": 1686, + "lr": 0.0001, + "sup_loss": 1.418778198822478 + }, + { + "step": 1687, + "lr": 0.0001, + "sup_loss": 1.4083344627812966 + }, + { + "step": 1688, + "lr": 0.0001, + "sup_loss": 1.4120106667129981 + }, + { + "step": 1689, + "lr": 0.0001, + "sup_loss": 1.3977457566406022 + }, + { + "step": 1690, + "lr": 0.0001, + "sup_loss": 1.422282104114155 + }, + { + "step": 1691, + "lr": 0.0001, + "sup_loss": 1.4226098030955077 + }, + { + "step": 1692, + "lr": 0.0001, + "sup_loss": 1.4197586173031582 + }, + { + "step": 1693, + "lr": 0.0001, + "sup_loss": 1.4018677006490612 + }, + { + "step": 1694, + "lr": 0.0001, + "sup_loss": 1.4733926593923452 + }, + { + "step": 1695, + "lr": 0.0001, + "sup_loss": 1.4167489364297425 + }, + { + "step": 1696, + "lr": 0.0001, + "sup_loss": 1.43865931643097 + }, + { + "step": 1697, + "lr": 0.0001, + "sup_loss": 1.4411628795526243 + }, + { + "step": 1698, + "lr": 0.0001, + "sup_loss": 1.4263331180858603 + }, + { + "step": 1699, + "lr": 0.0001, + "sup_loss": 1.41919149827389 + }, + { + "step": 1700, + "lr": 0.0001, + "sup_loss": 1.4295963893504244, + "lyap1_mean": -4.666998386383057, + "lyap1_max": -4.567479610443115, + "lyap_spec_mean": [ + -4.666998386383057, + -4.687930107116699 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1701, + "lr": 0.0001, + "sup_loss": 1.368808215756502 + }, + { + "step": 1702, + "lr": 0.0001, + "sup_loss": 1.4008436537421505 + }, + { + "step": 1703, + "lr": 0.0001, + "sup_loss": 1.4424056708332293 + }, + { + "step": 1704, + "lr": 0.0001, + "sup_loss": 1.4039474706585189 + }, + { + "step": 1705, + "lr": 0.0001, + "sup_loss": 1.3633327161587419 + }, + { + "step": 1706, + "lr": 0.0001, + "sup_loss": 1.4025609839473128 + }, + { + "step": 1707, + "lr": 0.0001, + "sup_loss": 1.4483348634165458 + }, + { + "step": 1708, + "lr": 0.0001, + "sup_loss": 1.3753938609075513 + }, + { + "step": 1709, + "lr": 0.0001, + "sup_loss": 1.4157479029280444 + }, + { + "step": 1710, + "lr": 0.0001, + "sup_loss": 1.4666936377813222 + }, + { + "step": 1711, + "lr": 0.0001, + "sup_loss": 1.4452608477161177 + }, + { + "step": 1712, + "lr": 0.0001, + "sup_loss": 1.3974868662753643 + }, + { + "step": 1713, + "lr": 0.0001, + "sup_loss": 1.4255705364106201 + }, + { + "step": 1714, + "lr": 0.0001, + "sup_loss": 1.4152061894028702 + }, + { + "step": 1715, + "lr": 0.0001, + "sup_loss": 1.4241624227422516 + }, + { + "step": 1716, + "lr": 0.0001, + "sup_loss": 1.4129704169481392 + }, + { + "step": 1717, + "lr": 0.0001, + "sup_loss": 1.4026697305936924 + }, + { + "step": 1718, + "lr": 0.0001, + "sup_loss": 1.423574577463633 + }, + { + "step": 1719, + "lr": 0.0001, + "sup_loss": 1.4466356305993462 + }, + { + "step": 1720, + "lr": 0.0001, + "sup_loss": 1.4175193956136638 + }, + { + "step": 1721, + "lr": 0.0001, + "sup_loss": 1.4164095825015055 + }, + { + "step": 1722, + "lr": 0.0001, + "sup_loss": 1.4511173858479929 + }, + { + "step": 1723, + "lr": 0.0001, + "sup_loss": 1.400809494659548 + }, + { + "step": 1724, + "lr": 0.0001, + "sup_loss": 1.3742221062609383 + }, + { + "step": 1725, + "lr": 0.0001, + "sup_loss": 1.391124666874474 + }, + { + "step": 1726, + "lr": 0.0001, + "sup_loss": 1.433945069819226 + }, + { + "step": 1727, + "lr": 0.0001, + "sup_loss": 1.418308873193478 + }, + { + "step": 1728, + "lr": 0.0001, + "sup_loss": 1.390428470826234 + }, + { + "step": 1729, + "lr": 0.0001, + "sup_loss": 1.3973969970307558 + }, + { + "step": 1730, + "lr": 0.0001, + "sup_loss": 1.3900446181179833 + }, + { + "step": 1731, + "lr": 0.0001, + "sup_loss": 1.4597287572458324 + }, + { + "step": 1732, + "lr": 0.0001, + "sup_loss": 1.393457958165169 + }, + { + "step": 1733, + "lr": 0.0001, + "sup_loss": 1.381019286796775 + }, + { + "step": 1734, + "lr": 0.0001, + "sup_loss": 1.4181985526336014 + }, + { + "step": 1735, + "lr": 0.0001, + "sup_loss": 1.453825708510764 + }, + { + "step": 1736, + "lr": 0.0001, + "sup_loss": 1.4172176895689348 + }, + { + "step": 1737, + "lr": 0.0001, + "sup_loss": 1.3895565057409323 + }, + { + "step": 1738, + "lr": 0.0001, + "sup_loss": 1.3937831329283283 + }, + { + "step": 1739, + "lr": 0.0001, + "sup_loss": 1.4367301391783125 + }, + { + "step": 1740, + "lr": 0.0001, + "sup_loss": 1.3726961230187977 + }, + { + "step": 1741, + "lr": 0.0001, + "sup_loss": 1.3119534294457615 + }, + { + "step": 1742, + "lr": 0.0001, + "sup_loss": 1.3920696637967949 + }, + { + "step": 1743, + "lr": 0.0001, + "sup_loss": 1.4287212232165254 + }, + { + "step": 1744, + "lr": 0.0001, + "sup_loss": 1.3921783234162188 + }, + { + "step": 1745, + "lr": 0.0001, + "sup_loss": 1.4124704721235282 + }, + { + "step": 1746, + "lr": 0.0001, + "sup_loss": 1.399998407494116 + }, + { + "step": 1747, + "lr": 0.0001, + "sup_loss": 1.383717333187172 + }, + { + "step": 1748, + "lr": 0.0001, + "sup_loss": 1.4199211011343578 + }, + { + "step": 1749, + "lr": 0.0001, + "sup_loss": 1.4110596219218836 + }, + { + "step": 1750, + "lr": 0.0001, + "sup_loss": 1.3852943158751683, + "lyap1_mean": -4.665495872497559, + "lyap1_max": -4.607874870300293, + "lyap_spec_mean": [ + -4.665495872497559, + -4.658141613006592 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1751, + "lr": 0.0001, + "sup_loss": 1.4154354821456574 + }, + { + "step": 1752, + "lr": 0.0001, + "sup_loss": 1.4172083339262436 + }, + { + "step": 1753, + "lr": 0.0001, + "sup_loss": 1.4135405345450556 + }, + { + "step": 1754, + "lr": 0.0001, + "sup_loss": 1.379603965541869 + }, + { + "step": 1755, + "lr": 0.0001, + "sup_loss": 1.3878612968546624 + }, + { + "step": 1756, + "lr": 0.0001, + "sup_loss": 1.4169794308392718 + }, + { + "step": 1757, + "lr": 0.0001, + "sup_loss": 1.390763599809836 + }, + { + "step": 1758, + "lr": 0.0001, + "sup_loss": 1.404710588437579 + }, + { + "step": 1759, + "lr": 0.0001, + "sup_loss": 1.3681668535604654 + }, + { + "step": 1760, + "lr": 0.0001, + "sup_loss": 1.3634273104455894 + }, + { + "step": 1761, + "lr": 0.0001, + "sup_loss": 1.404242511108502 + }, + { + "step": 1762, + "lr": 0.0001, + "sup_loss": 1.3987200136279294 + }, + { + "step": 1763, + "lr": 0.0001, + "sup_loss": 1.3850970972823995 + }, + { + "step": 1764, + "lr": 0.0001, + "sup_loss": 1.4217052902258702 + }, + { + "step": 1765, + "lr": 0.0001, + "sup_loss": 1.3811997233220383 + }, + { + "step": 1766, + "lr": 0.0001, + "sup_loss": 1.385024861362425 + }, + { + "step": 1767, + "lr": 0.0001, + "sup_loss": 1.3981786536608454 + }, + { + "step": 1768, + "lr": 0.0001, + "sup_loss": 1.4028420398945523 + }, + { + "step": 1769, + "lr": 0.0001, + "sup_loss": 1.380682339552197 + }, + { + "step": 1770, + "lr": 0.0001, + "sup_loss": 1.3938850645461112 + }, + { + "step": 1771, + "lr": 0.0001, + "sup_loss": 1.363675775688171 + }, + { + "step": 1772, + "lr": 0.0001, + "sup_loss": 1.4344493868953614 + }, + { + "step": 1773, + "lr": 0.0001, + "sup_loss": 1.402734453717096 + }, + { + "step": 1774, + "lr": 0.0001, + "sup_loss": 1.3663507661756613 + }, + { + "step": 1775, + "lr": 0.0001, + "sup_loss": 1.4146546193642076 + }, + { + "step": 1776, + "lr": 0.0001, + "sup_loss": 1.4156933171663155 + }, + { + "step": 1777, + "lr": 0.0001, + "sup_loss": 1.4110230298621136 + }, + { + "step": 1778, + "lr": 0.0001, + "sup_loss": 1.4096749192534321 + }, + { + "step": 1779, + "lr": 0.0001, + "sup_loss": 1.3991252063761694 + }, + { + "step": 1780, + "lr": 0.0001, + "sup_loss": 1.405071876213828 + }, + { + "step": 1781, + "lr": 0.0001, + "sup_loss": 1.4219530273745287 + }, + { + "step": 1782, + "lr": 0.0001, + "sup_loss": 1.3915157016652242 + }, + { + "step": 1783, + "lr": 0.0001, + "sup_loss": 1.3362497782625802 + }, + { + "step": 1784, + "lr": 0.0001, + "sup_loss": 1.4000166314644689 + }, + { + "step": 1785, + "lr": 0.0001, + "sup_loss": 1.4572615126400756 + }, + { + "step": 1786, + "lr": 0.0001, + "sup_loss": 1.3976051043897242 + }, + { + "step": 1787, + "lr": 0.0001, + "sup_loss": 1.417975945208371 + }, + { + "step": 1788, + "lr": 0.0001, + "sup_loss": 1.358364193911282 + }, + { + "step": 1789, + "lr": 0.0001, + "sup_loss": 1.358574264083261 + }, + { + "step": 1790, + "lr": 0.0001, + "sup_loss": 1.3813598780221896 + }, + { + "step": 1791, + "lr": 0.0001, + "sup_loss": 1.4057036055515737 + }, + { + "step": 1792, + "lr": 0.0001, + "sup_loss": 1.3790889254336423 + }, + { + "step": 1793, + "lr": 0.0001, + "sup_loss": 1.4056022920146933 + }, + { + "step": 1794, + "lr": 0.0001, + "sup_loss": 1.3881851822661222 + }, + { + "step": 1795, + "lr": 0.0001, + "sup_loss": 1.424536986619044 + }, + { + "step": 1796, + "lr": 0.0001, + "sup_loss": 1.3979382553177466 + }, + { + "step": 1797, + "lr": 0.0001, + "sup_loss": 1.3859685073661732 + }, + { + "step": 1798, + "lr": 0.0001, + "sup_loss": 1.357773099034569 + }, + { + "step": 1799, + "lr": 0.0001, + "sup_loss": 1.3601298417933319 + }, + { + "step": 1800, + "lr": 0.0001, + "sup_loss": 1.4187908267431826, + "lyap1_mean": -4.62960958480835, + "lyap1_max": -4.570792198181152, + "lyap_spec_mean": [ + -4.62960958480835, + -4.651381492614746 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1801, + "lr": 0.0001, + "sup_loss": 1.3574161597477685 + }, + { + "step": 1802, + "lr": 0.0001, + "sup_loss": 1.355414399823228 + }, + { + "step": 1803, + "lr": 0.0001, + "sup_loss": 1.4024833809974402 + }, + { + "step": 1804, + "lr": 0.0001, + "sup_loss": 1.4281579382471437 + }, + { + "step": 1805, + "lr": 0.0001, + "sup_loss": 1.4105774814866274 + }, + { + "step": 1806, + "lr": 0.0001, + "sup_loss": 1.351410120821957 + }, + { + "step": 1807, + "lr": 0.0001, + "sup_loss": 1.3991370284857265 + }, + { + "step": 1808, + "lr": 0.0001, + "sup_loss": 1.396684954646597 + }, + { + "step": 1809, + "lr": 0.0001, + "sup_loss": 1.374611939318962 + }, + { + "step": 1810, + "lr": 0.0001, + "sup_loss": 1.4500494639353956 + }, + { + "step": 1811, + "lr": 0.0001, + "sup_loss": 1.4191725427228916 + }, + { + "step": 1812, + "lr": 0.0001, + "sup_loss": 1.3995994802422211 + }, + { + "step": 1813, + "lr": 0.0001, + "sup_loss": 1.410303378418079 + }, + { + "step": 1814, + "lr": 0.0001, + "sup_loss": 1.442840957922918 + }, + { + "step": 1815, + "lr": 0.0001, + "sup_loss": 1.423430399397906 + }, + { + "step": 1816, + "lr": 0.0001, + "sup_loss": 1.4162455501383775 + }, + { + "step": 1817, + "lr": 0.0001, + "sup_loss": 1.3957264635105329 + }, + { + "step": 1818, + "lr": 0.0001, + "sup_loss": 1.3883472385148203 + }, + { + "step": 1819, + "lr": 0.0001, + "sup_loss": 1.3986012840542092 + }, + { + "step": 1820, + "lr": 0.0001, + "sup_loss": 1.4149770767182999 + }, + { + "step": 1821, + "lr": 0.0001, + "sup_loss": 1.4158087284454697 + }, + { + "step": 1822, + "lr": 0.0001, + "sup_loss": 1.34151407297393 + }, + { + "step": 1823, + "lr": 0.0001, + "sup_loss": 1.429926544348806 + }, + { + "step": 1824, + "lr": 0.0001, + "sup_loss": 1.3765306548971357 + }, + { + "step": 1825, + "lr": 0.0001, + "sup_loss": 1.3793901430864282 + }, + { + "step": 1826, + "lr": 0.0001, + "sup_loss": 1.4258960331562627 + }, + { + "step": 1827, + "lr": 0.0001, + "sup_loss": 1.375496475255633 + }, + { + "step": 1828, + "lr": 0.0001, + "sup_loss": 1.3961845866896458 + }, + { + "step": 1829, + "lr": 0.0001, + "sup_loss": 1.3705326303916847 + }, + { + "step": 1830, + "lr": 0.0001, + "sup_loss": 1.397554361593062 + }, + { + "step": 1831, + "lr": 0.0001, + "sup_loss": 1.373649047047743 + }, + { + "step": 1832, + "lr": 0.0001, + "sup_loss": 1.4103971124850156 + }, + { + "step": 1833, + "lr": 0.0001, + "sup_loss": 1.3926332421959795 + }, + { + "step": 1834, + "lr": 0.0001, + "sup_loss": 1.3545180573066256 + }, + { + "step": 1835, + "lr": 0.0001, + "sup_loss": 1.372013159211798 + }, + { + "step": 1836, + "lr": 0.0001, + "sup_loss": 1.3798558807830417 + }, + { + "step": 1837, + "lr": 0.0001, + "sup_loss": 1.383717975292924 + }, + { + "step": 1838, + "lr": 0.0001, + "sup_loss": 1.4071825646725735 + }, + { + "step": 1839, + "lr": 0.0001, + "sup_loss": 1.3979549619438212 + }, + { + "step": 1840, + "lr": 0.0001, + "sup_loss": 1.4056753356756657 + }, + { + "step": 1841, + "lr": 0.0001, + "sup_loss": 1.4127502290037786 + }, + { + "step": 1842, + "lr": 0.0001, + "sup_loss": 1.3886539086301197 + }, + { + "step": 1843, + "lr": 0.0001, + "sup_loss": 1.4014478585827812 + }, + { + "step": 1844, + "lr": 0.0001, + "sup_loss": 1.3542597916264838 + }, + { + "step": 1845, + "lr": 0.0001, + "sup_loss": 1.4282028043211747 + }, + { + "step": 1846, + "lr": 0.0001, + "sup_loss": 1.370276096968387 + }, + { + "step": 1847, + "lr": 0.0001, + "sup_loss": 1.341062965962153 + }, + { + "step": 1848, + "lr": 0.0001, + "sup_loss": 1.3682682882270654 + }, + { + "step": 1849, + "lr": 0.0001, + "sup_loss": 1.428371940704332 + }, + { + "step": 1850, + "lr": 0.0001, + "sup_loss": 1.3711423610060844, + "lyap1_mean": -4.5718889236450195, + "lyap1_max": -4.5085368156433105, + "lyap_spec_mean": [ + -4.5718889236450195, + -4.579253196716309 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1851, + "lr": 0.0001, + "sup_loss": 1.348380735959665 + }, + { + "step": 1852, + "lr": 0.0001, + "sup_loss": 1.3986638988927877 + }, + { + "step": 1853, + "lr": 0.0001, + "sup_loss": 1.4137244831379143 + }, + { + "step": 1854, + "lr": 0.0001, + "sup_loss": 1.4296081719085751 + }, + { + "step": 1855, + "lr": 0.0001, + "sup_loss": 1.371001389861114 + }, + { + "step": 1856, + "lr": 0.0001, + "sup_loss": 1.3773995933767789 + }, + { + "step": 1857, + "lr": 0.0001, + "sup_loss": 1.388658394430795 + }, + { + "step": 1858, + "lr": 0.0001, + "sup_loss": 1.4196961903670395 + }, + { + "step": 1859, + "lr": 0.0001, + "sup_loss": 1.3293056408568562 + }, + { + "step": 1860, + "lr": 0.0001, + "sup_loss": 1.399139365002204 + }, + { + "step": 1861, + "lr": 0.0001, + "sup_loss": 1.3670867419921997 + }, + { + "step": 1862, + "lr": 0.0001, + "sup_loss": 1.3892964577941027 + }, + { + "step": 1863, + "lr": 0.0001, + "sup_loss": 1.3572912979517981 + }, + { + "step": 1864, + "lr": 0.0001, + "sup_loss": 1.3664007912623206 + }, + { + "step": 1865, + "lr": 0.0001, + "sup_loss": 1.3731805600850344 + }, + { + "step": 1866, + "lr": 0.0001, + "sup_loss": 1.4258346218510285 + }, + { + "step": 1867, + "lr": 0.0001, + "sup_loss": 1.416389178871045 + }, + { + "step": 1868, + "lr": 0.0001, + "sup_loss": 1.348899805158484 + }, + { + "step": 1869, + "lr": 0.0001, + "sup_loss": 1.4312741790962196 + }, + { + "step": 1870, + "lr": 0.0001, + "sup_loss": 1.3691486633477414 + }, + { + "step": 1871, + "lr": 0.0001, + "sup_loss": 1.2970178135696564 + }, + { + "step": 1872, + "lr": 0.0001, + "sup_loss": 1.3593750928806394 + }, + { + "step": 1873, + "lr": 0.0001, + "sup_loss": 1.3980547495966205 + }, + { + "step": 1874, + "lr": 0.0001, + "sup_loss": 1.3832893584152175 + }, + { + "step": 1875, + "lr": 0.0001, + "sup_loss": 1.3819986745020088 + }, + { + "step": 1876, + "lr": 0.0001, + "sup_loss": 1.351680373549221 + }, + { + "step": 1877, + "lr": 0.0001, + "sup_loss": 1.3691332268933585 + }, + { + "step": 1878, + "lr": 0.0001, + "sup_loss": 1.3921900335628232 + }, + { + "step": 1879, + "lr": 0.0001, + "sup_loss": 1.428229541835207 + }, + { + "step": 1880, + "lr": 0.0001, + "sup_loss": 1.4162603156116025 + }, + { + "step": 1881, + "lr": 0.0001, + "sup_loss": 1.3940852528683518 + }, + { + "step": 1882, + "lr": 0.0001, + "sup_loss": 1.400741486875141 + }, + { + "step": 1883, + "lr": 0.0001, + "sup_loss": 1.4076794064796443 + }, + { + "step": 1884, + "lr": 0.0001, + "sup_loss": 1.3663377710270712 + }, + { + "step": 1885, + "lr": 0.0001, + "sup_loss": 1.4084374549276877 + }, + { + "step": 1886, + "lr": 0.0001, + "sup_loss": 1.3710445340193365 + }, + { + "step": 1887, + "lr": 0.0001, + "sup_loss": 1.3391371076123924 + }, + { + "step": 1888, + "lr": 0.0001, + "sup_loss": 1.38349138572003 + }, + { + "step": 1889, + "lr": 0.0001, + "sup_loss": 1.376654163245699 + }, + { + "step": 1890, + "lr": 0.0001, + "sup_loss": 1.3542202794361082 + }, + { + "step": 1891, + "lr": 0.0001, + "sup_loss": 1.4102894048653727 + }, + { + "step": 1892, + "lr": 0.0001, + "sup_loss": 1.382272815501765 + }, + { + "step": 1893, + "lr": 0.0001, + "sup_loss": 1.370449816666093 + }, + { + "step": 1894, + "lr": 0.0001, + "sup_loss": 1.3641785166006943 + }, + { + "step": 1895, + "lr": 0.0001, + "sup_loss": 1.3873136818146585 + }, + { + "step": 1896, + "lr": 0.0001, + "sup_loss": 1.392690423088317 + }, + { + "step": 1897, + "lr": 0.0001, + "sup_loss": 1.3981610075934223 + }, + { + "step": 1898, + "lr": 0.0001, + "sup_loss": 1.3625417705878673 + }, + { + "step": 1899, + "lr": 0.0001, + "sup_loss": 1.4160414109306536 + }, + { + "step": 1900, + "lr": 0.0001, + "sup_loss": 1.4053043733147106, + "lyap1_mean": -4.53842830657959, + "lyap1_max": -4.492893218994141, + "lyap_spec_mean": [ + -4.53842830657959, + -4.5512495040893555 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1901, + "lr": 0.0001, + "sup_loss": 1.4107048570158751 + }, + { + "step": 1902, + "lr": 0.0001, + "sup_loss": 1.3964958672363343 + }, + { + "step": 1903, + "lr": 0.0001, + "sup_loss": 1.3556434403984523 + }, + { + "step": 1904, + "lr": 0.0001, + "sup_loss": 1.4003847344722227 + }, + { + "step": 1905, + "lr": 0.0001, + "sup_loss": 1.3701481630931416 + }, + { + "step": 1906, + "lr": 0.0001, + "sup_loss": 1.4036731353597947 + }, + { + "step": 1907, + "lr": 0.0001, + "sup_loss": 1.3701097420037023 + }, + { + "step": 1908, + "lr": 0.0001, + "sup_loss": 1.3820651570078104 + }, + { + "step": 1909, + "lr": 0.0001, + "sup_loss": 1.351438467875153 + }, + { + "step": 1910, + "lr": 0.0001, + "sup_loss": 1.3710336553706137 + }, + { + "step": 1911, + "lr": 0.0001, + "sup_loss": 1.404502492106915 + }, + { + "step": 1912, + "lr": 0.0001, + "sup_loss": 1.349230291289015 + }, + { + "step": 1913, + "lr": 0.0001, + "sup_loss": 1.3736967708773877 + }, + { + "step": 1914, + "lr": 0.0001, + "sup_loss": 1.3658889705251354 + }, + { + "step": 1915, + "lr": 0.0001, + "sup_loss": 1.3860899697779687 + }, + { + "step": 1916, + "lr": 0.0001, + "sup_loss": 1.4095423822978685 + }, + { + "step": 1917, + "lr": 0.0001, + "sup_loss": 1.3913746743084574 + }, + { + "step": 1918, + "lr": 0.0001, + "sup_loss": 1.3960645178836417 + }, + { + "step": 1919, + "lr": 0.0001, + "sup_loss": 1.428211219191195 + }, + { + "step": 1920, + "lr": 0.0001, + "sup_loss": 1.35381152076921 + }, + { + "step": 1921, + "lr": 0.0001, + "sup_loss": 1.385247152655118 + }, + { + "step": 1922, + "lr": 0.0001, + "sup_loss": 1.384072328326964 + }, + { + "step": 1923, + "lr": 0.0001, + "sup_loss": 1.390136799768782 + }, + { + "step": 1924, + "lr": 0.0001, + "sup_loss": 1.3208568632871849 + }, + { + "step": 1925, + "lr": 0.0001, + "sup_loss": 1.3798523590951521 + }, + { + "step": 1926, + "lr": 0.0001, + "sup_loss": 1.4030636325146573 + }, + { + "step": 1927, + "lr": 0.0001, + "sup_loss": 1.3849178670786395 + }, + { + "step": 1928, + "lr": 0.0001, + "sup_loss": 1.3421631008540733 + }, + { + "step": 1929, + "lr": 0.0001, + "sup_loss": 1.3928845883950856 + }, + { + "step": 1930, + "lr": 0.0001, + "sup_loss": 1.4034779927886587 + }, + { + "step": 1931, + "lr": 0.0001, + "sup_loss": 1.3333934110961831 + }, + { + "step": 1932, + "lr": 0.0001, + "sup_loss": 1.398043299923857 + }, + { + "step": 1933, + "lr": 0.0001, + "sup_loss": 1.3400236050026983 + }, + { + "step": 1934, + "lr": 0.0001, + "sup_loss": 1.3553997056338085 + }, + { + "step": 1935, + "lr": 0.0001, + "sup_loss": 1.390199603919459 + }, + { + "step": 1936, + "lr": 0.0001, + "sup_loss": 1.347300077002741 + }, + { + "step": 1937, + "lr": 0.0001, + "sup_loss": 1.3652634305398639 + }, + { + "step": 1938, + "lr": 0.0001, + "sup_loss": 1.3636650235783987 + }, + { + "step": 1939, + "lr": 0.0001, + "sup_loss": 1.3530957622303976 + }, + { + "step": 1940, + "lr": 0.0001, + "sup_loss": 1.3438039199411747 + }, + { + "step": 1941, + "lr": 0.0001, + "sup_loss": 1.3657952402072888 + }, + { + "step": 1942, + "lr": 0.0001, + "sup_loss": 1.3590142963982565 + }, + { + "step": 1943, + "lr": 0.0001, + "sup_loss": 1.3876171940240831 + }, + { + "step": 1944, + "lr": 0.0001, + "sup_loss": 1.3701369302199544 + }, + { + "step": 1945, + "lr": 0.0001, + "sup_loss": 1.338177333367471 + }, + { + "step": 1946, + "lr": 0.0001, + "sup_loss": 1.3602897982810362 + }, + { + "step": 1947, + "lr": 0.0001, + "sup_loss": 1.384471648891986 + }, + { + "step": 1948, + "lr": 0.0001, + "sup_loss": 1.3604608414742663 + }, + { + "step": 1949, + "lr": 0.0001, + "sup_loss": 1.3703963081130108 + }, + { + "step": 1950, + "lr": 0.0001, + "sup_loss": 1.4018406171793427, + "lyap1_mean": -4.516169548034668, + "lyap1_max": -4.479403972625732, + "lyap_spec_mean": [ + -4.516169548034668, + -4.495737075805664 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1951, + "lr": 0.0001, + "sup_loss": 1.3500330270163146 + }, + { + "step": 1952, + "lr": 0.0001, + "sup_loss": 1.363580224458456 + }, + { + "step": 1953, + "lr": 0.0001, + "sup_loss": 1.3825467728430256 + }, + { + "step": 1954, + "lr": 0.0001, + "sup_loss": 1.3764015580633722 + }, + { + "step": 1955, + "lr": 0.0001, + "sup_loss": 1.3986757844247562 + }, + { + "step": 1956, + "lr": 0.0001, + "sup_loss": 1.3747816825360306 + }, + { + "step": 1957, + "lr": 0.0001, + "sup_loss": 1.4085027575932358 + }, + { + "step": 1958, + "lr": 0.0001, + "sup_loss": 1.3716618639585965 + }, + { + "step": 1959, + "lr": 0.0001, + "sup_loss": 1.3661716399809094 + }, + { + "step": 1960, + "lr": 0.0001, + "sup_loss": 1.3768941721439172 + }, + { + "step": 1961, + "lr": 0.0001, + "sup_loss": 1.3784926722056297 + }, + { + "step": 1962, + "lr": 0.0001, + "sup_loss": 1.3340065422054161 + }, + { + "step": 1963, + "lr": 0.0001, + "sup_loss": 1.3227949248290736 + }, + { + "step": 1964, + "lr": 0.0001, + "sup_loss": 1.3929235162318456 + }, + { + "step": 1965, + "lr": 0.0001, + "sup_loss": 1.3218183026505725 + }, + { + "step": 1966, + "lr": 0.0001, + "sup_loss": 1.444029727840847 + }, + { + "step": 1967, + "lr": 0.0001, + "sup_loss": 1.4018837207395192 + }, + { + "step": 1968, + "lr": 0.0001, + "sup_loss": 1.3633056906174885 + }, + { + "step": 1969, + "lr": 0.0001, + "sup_loss": 1.3967052206371235 + }, + { + "step": 1970, + "lr": 0.0001, + "sup_loss": 1.3751573463676405 + }, + { + "step": 1971, + "lr": 0.0001, + "sup_loss": 1.4010431141358848 + }, + { + "step": 1972, + "lr": 0.0001, + "sup_loss": 1.3643291837044647 + }, + { + "step": 1973, + "lr": 0.0001, + "sup_loss": 1.343893317074618 + }, + { + "step": 1974, + "lr": 0.0001, + "sup_loss": 1.363457349721676 + }, + { + "step": 1975, + "lr": 0.0001, + "sup_loss": 1.3949115992663166 + }, + { + "step": 1976, + "lr": 0.0001, + "sup_loss": 1.3893924954110135 + }, + { + "step": 1977, + "lr": 0.0001, + "sup_loss": 1.4092470410492952 + }, + { + "step": 1978, + "lr": 0.0001, + "sup_loss": 1.3345610672982104 + }, + { + "step": 1979, + "lr": 0.0001, + "sup_loss": 1.3902674244951962 + }, + { + "step": 1980, + "lr": 0.0001, + "sup_loss": 1.4075058165576269 + }, + { + "step": 1981, + "lr": 0.0001, + "sup_loss": 1.3316610519409164 + }, + { + "step": 1982, + "lr": 0.0001, + "sup_loss": 1.4125409922811145 + }, + { + "step": 1983, + "lr": 0.0001, + "sup_loss": 1.3095982795860546 + }, + { + "step": 1984, + "lr": 0.0001, + "sup_loss": 1.3758205076958196 + }, + { + "step": 1985, + "lr": 0.0001, + "sup_loss": 1.3452453134603115 + }, + { + "step": 1986, + "lr": 0.0001, + "sup_loss": 1.3753948695071716 + }, + { + "step": 1987, + "lr": 0.0001, + "sup_loss": 1.377792582050537 + }, + { + "step": 1988, + "lr": 0.0001, + "sup_loss": 1.3718805125867044 + }, + { + "step": 1989, + "lr": 0.0001, + "sup_loss": 1.3778118905184646 + }, + { + "step": 1990, + "lr": 0.0001, + "sup_loss": 1.3293731493176637 + }, + { + "step": 1991, + "lr": 0.0001, + "sup_loss": 1.4046274258849833 + }, + { + "step": 1992, + "lr": 0.0001, + "sup_loss": 1.4007280834306206 + }, + { + "step": 1993, + "lr": 0.0001, + "sup_loss": 1.3563045856835036 + }, + { + "step": 1994, + "lr": 0.0001, + "sup_loss": 1.3696906464910985 + }, + { + "step": 1995, + "lr": 0.0001, + "sup_loss": 1.36281491055303 + }, + { + "step": 1996, + "lr": 0.0001, + "sup_loss": 1.3837184897117962 + }, + { + "step": 1997, + "lr": 0.0001, + "sup_loss": 1.3544101406493183 + }, + { + "step": 1998, + "lr": 0.0001, + "sup_loss": 1.3298358982086458 + }, + { + "step": 1999, + "lr": 0.0001, + "sup_loss": 1.3730415539217413 + }, + { + "step": 2000, + "lr": 0.0001, + "sup_loss": 1.3339980567745424, + "lyap1_mean": -4.510406970977783, + "lyap1_max": -4.459565162658691, + "lyap_spec_mean": [ + -4.510406970977783, + -4.500662803649902 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2001, + "lr": 0.0001, + "sup_loss": 1.3371870354264936 + }, + { + "step": 2002, + "lr": 0.0001, + "sup_loss": 1.4213587230559992 + }, + { + "step": 2003, + "lr": 0.0001, + "sup_loss": 1.3779638016660272 + }, + { + "step": 2004, + "lr": 0.0001, + "sup_loss": 1.3608991042341083 + }, + { + "step": 2005, + "lr": 0.0001, + "sup_loss": 1.3399255695591843 + }, + { + "step": 2006, + "lr": 0.0001, + "sup_loss": 1.3752057915289775 + }, + { + "step": 2007, + "lr": 0.0001, + "sup_loss": 1.3577482652881274 + }, + { + "step": 2008, + "lr": 0.0001, + "sup_loss": 1.420962343146364 + }, + { + "step": 2009, + "lr": 0.0001, + "sup_loss": 1.3869896191585642 + }, + { + "step": 2010, + "lr": 0.0001, + "sup_loss": 1.3300569782640892 + }, + { + "step": 2011, + "lr": 0.0001, + "sup_loss": 1.3518849763589253 + }, + { + "step": 2012, + "lr": 0.0001, + "sup_loss": 1.3633507773214506 + }, + { + "step": 2013, + "lr": 0.0001, + "sup_loss": 1.3668917123030626 + }, + { + "step": 2014, + "lr": 0.0001, + "sup_loss": 1.2853489140843513 + }, + { + "step": 2015, + "lr": 0.0001, + "sup_loss": 1.3752328556622242 + }, + { + "step": 2016, + "lr": 0.0001, + "sup_loss": 1.3664280062232317 + }, + { + "step": 2017, + "lr": 0.0001, + "sup_loss": 1.3447542337864204 + }, + { + "step": 2018, + "lr": 0.0001, + "sup_loss": 1.401180823346046 + }, + { + "step": 2019, + "lr": 0.0001, + "sup_loss": 1.3402864764695757 + }, + { + "step": 2020, + "lr": 0.0001, + "sup_loss": 1.372260005720886 + }, + { + "step": 2021, + "lr": 0.0001, + "sup_loss": 1.3881757923395248 + }, + { + "step": 2022, + "lr": 0.0001, + "sup_loss": 1.359384677600057 + }, + { + "step": 2023, + "lr": 0.0001, + "sup_loss": 1.355233183048644 + }, + { + "step": 2024, + "lr": 0.0001, + "sup_loss": 1.3470691102570524 + }, + { + "step": 2025, + "lr": 0.0001, + "sup_loss": 1.4058171736607916 + }, + { + "step": 2026, + "lr": 0.0001, + "sup_loss": 1.356862271584181 + }, + { + "step": 2027, + "lr": 0.0001, + "sup_loss": 1.340608530338701 + }, + { + "step": 2028, + "lr": 0.0001, + "sup_loss": 1.3545155323893496 + }, + { + "step": 2029, + "lr": 0.0001, + "sup_loss": 1.3133003500655724 + }, + { + "step": 2030, + "lr": 0.0001, + "sup_loss": 1.3365171777295242 + }, + { + "step": 2031, + "lr": 0.0001, + "sup_loss": 1.3352465254532073 + }, + { + "step": 2032, + "lr": 0.0001, + "sup_loss": 1.3290639518194507 + }, + { + "step": 2033, + "lr": 0.0001, + "sup_loss": 1.3632602898580546 + }, + { + "step": 2034, + "lr": 0.0001, + "sup_loss": 1.3562538715277097 + }, + { + "step": 2035, + "lr": 0.0001, + "sup_loss": 1.3295290558729622 + }, + { + "step": 2036, + "lr": 0.0001, + "sup_loss": 1.322156576770507 + }, + { + "step": 2037, + "lr": 0.0001, + "sup_loss": 1.3888129229545707 + }, + { + "step": 2038, + "lr": 0.0001, + "sup_loss": 1.3752207107333982 + }, + { + "step": 2039, + "lr": 0.0001, + "sup_loss": 1.3446958521638506 + }, + { + "step": 2040, + "lr": 0.0001, + "sup_loss": 1.3366004534173135 + }, + { + "step": 2041, + "lr": 0.0001, + "sup_loss": 1.3467699518232084 + }, + { + "step": 2042, + "lr": 0.0001, + "sup_loss": 1.3656164575958503 + }, + { + "step": 2043, + "lr": 0.0001, + "sup_loss": 1.3585792298831325 + }, + { + "step": 2044, + "lr": 0.0001, + "sup_loss": 1.325510437750258 + }, + { + "step": 2045, + "lr": 0.0001, + "sup_loss": 1.3482649999496745 + }, + { + "step": 2046, + "lr": 0.0001, + "sup_loss": 1.3387166154589614 + }, + { + "step": 2047, + "lr": 0.0001, + "sup_loss": 1.3407015457823028 + }, + { + "step": 2048, + "lr": 0.0001, + "sup_loss": 1.3371180036322672 + }, + { + "step": 2049, + "lr": 0.0001, + "sup_loss": 1.340069140539834 + }, + { + "step": 2050, + "lr": 0.0001, + "sup_loss": 1.3547002810767044, + "lyap1_mean": -4.465114116668701, + "lyap1_max": -4.41412878036499, + "lyap_spec_mean": [ + -4.465114116668701, + -4.451486587524414 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2051, + "lr": 0.0001, + "sup_loss": 1.3396033592056957 + }, + { + "step": 2052, + "lr": 0.0001, + "sup_loss": 1.352171886298948 + }, + { + "step": 2053, + "lr": 0.0001, + "sup_loss": 1.3261017469531906 + }, + { + "step": 2054, + "lr": 0.0001, + "sup_loss": 1.3929580943568656 + }, + { + "step": 2055, + "lr": 0.0001, + "sup_loss": 1.3425537789890651 + }, + { + "step": 2056, + "lr": 0.0001, + "sup_loss": 1.3701583894130607 + }, + { + "step": 2057, + "lr": 0.0001, + "sup_loss": 1.3095096645410134 + }, + { + "step": 2058, + "lr": 0.0001, + "sup_loss": 1.3348805390814356 + }, + { + "step": 2059, + "lr": 0.0001, + "sup_loss": 1.3426512652266922 + }, + { + "step": 2060, + "lr": 0.0001, + "sup_loss": 1.3687856290577065 + }, + { + "step": 2061, + "lr": 0.0001, + "sup_loss": 1.3560780345011156 + }, + { + "step": 2062, + "lr": 0.0001, + "sup_loss": 1.3361326136446814 + }, + { + "step": 2063, + "lr": 0.0001, + "sup_loss": 1.366525492464688 + }, + { + "step": 2064, + "lr": 0.0001, + "sup_loss": 1.3551578869803815 + }, + { + "step": 2065, + "lr": 0.0001, + "sup_loss": 1.3209476441323327 + }, + { + "step": 2066, + "lr": 0.0001, + "sup_loss": 1.316228756887557 + }, + { + "step": 2067, + "lr": 0.0001, + "sup_loss": 1.3781661060899306 + }, + { + "step": 2068, + "lr": 0.0001, + "sup_loss": 1.3328621944853336 + }, + { + "step": 2069, + "lr": 0.0001, + "sup_loss": 1.3453852084449516 + }, + { + "step": 2070, + "lr": 0.0001, + "sup_loss": 1.329907436850029 + }, + { + "step": 2071, + "lr": 0.0001, + "sup_loss": 1.3992864140100056 + }, + { + "step": 2072, + "lr": 0.0001, + "sup_loss": 1.3695568559796663 + }, + { + "step": 2073, + "lr": 0.0001, + "sup_loss": 1.3698310707279227 + }, + { + "step": 2074, + "lr": 0.0001, + "sup_loss": 1.3681777200729452 + }, + { + "step": 2075, + "lr": 0.0001, + "sup_loss": 1.3069135426602005 + }, + { + "step": 2076, + "lr": 0.0001, + "sup_loss": 1.3980712289755775 + }, + { + "step": 2077, + "lr": 0.0001, + "sup_loss": 1.2731080181536578 + }, + { + "step": 2078, + "lr": 0.0001, + "sup_loss": 1.338121006075292 + }, + { + "step": 2079, + "lr": 0.0001, + "sup_loss": 1.3497756942480432 + }, + { + "step": 2080, + "lr": 0.0001, + "sup_loss": 1.4212349587276163 + }, + { + "step": 2081, + "lr": 0.0001, + "sup_loss": 1.3632858708765143 + }, + { + "step": 2082, + "lr": 0.0001, + "sup_loss": 1.365354353732921 + }, + { + "step": 2083, + "lr": 0.0001, + "sup_loss": 1.3493640688629125 + }, + { + "step": 2084, + "lr": 0.0001, + "sup_loss": 1.3673073397610955 + }, + { + "step": 2085, + "lr": 0.0001, + "sup_loss": 1.3180393474774583 + }, + { + "step": 2086, + "lr": 0.0001, + "sup_loss": 1.28029072671823 + }, + { + "step": 2087, + "lr": 0.0001, + "sup_loss": 1.3508910012541286 + }, + { + "step": 2088, + "lr": 0.0001, + "sup_loss": 1.335003578735963 + }, + { + "step": 2089, + "lr": 0.0001, + "sup_loss": 1.3512171262795194 + }, + { + "step": 2090, + "lr": 0.0001, + "sup_loss": 1.3447356327153654 + }, + { + "step": 2091, + "lr": 0.0001, + "sup_loss": 1.342019770746191 + }, + { + "step": 2092, + "lr": 0.0001, + "sup_loss": 1.3466366556492275 + }, + { + "step": 2093, + "lr": 0.0001, + "sup_loss": 1.3204282386642654 + }, + { + "step": 2094, + "lr": 0.0001, + "sup_loss": 1.3615720014134431 + }, + { + "step": 2095, + "lr": 0.0001, + "sup_loss": 1.3624110569628538 + }, + { + "step": 2096, + "lr": 0.0001, + "sup_loss": 1.3553442730115235 + }, + { + "step": 2097, + "lr": 0.0001, + "sup_loss": 1.3922013558258723 + }, + { + "step": 2098, + "lr": 0.0001, + "sup_loss": 1.3597693337119026 + }, + { + "step": 2099, + "lr": 0.0001, + "sup_loss": 1.3558919642597578 + }, + { + "step": 2100, + "lr": 0.0001, + "sup_loss": 1.3511568913760699, + "lyap1_mean": -4.426387310028076, + "lyap1_max": -4.368574142456055, + "lyap_spec_mean": [ + -4.426387786865234, + -4.434847831726074 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2101, + "lr": 0.0001, + "sup_loss": 1.3633861451477107 + }, + { + "step": 2102, + "lr": 0.0001, + "sup_loss": 1.3735655305801195 + }, + { + "step": 2103, + "lr": 0.0001, + "sup_loss": 1.3204364616801423 + }, + { + "step": 2104, + "lr": 0.0001, + "sup_loss": 1.3327367752349657 + }, + { + "step": 2105, + "lr": 0.0001, + "sup_loss": 1.3801328532419268 + }, + { + "step": 2106, + "lr": 0.0001, + "sup_loss": 1.3063631329299508 + }, + { + "step": 2107, + "lr": 0.0001, + "sup_loss": 1.3239978170888282 + }, + { + "step": 2108, + "lr": 0.0001, + "sup_loss": 1.3320880848681536 + }, + { + "step": 2109, + "lr": 0.0001, + "sup_loss": 1.3122370970472008 + }, + { + "step": 2110, + "lr": 0.0001, + "sup_loss": 1.321575087004928 + }, + { + "step": 2111, + "lr": 0.0001, + "sup_loss": 1.290006027355005 + }, + { + "step": 2112, + "lr": 0.0001, + "sup_loss": 1.3572941458122838 + }, + { + "step": 2113, + "lr": 0.0001, + "sup_loss": 1.3556785932203983 + }, + { + "step": 2114, + "lr": 0.0001, + "sup_loss": 1.3716379227489683 + }, + { + "step": 2115, + "lr": 0.0001, + "sup_loss": 1.3391857244217897 + }, + { + "step": 2116, + "lr": 0.0001, + "sup_loss": 1.324222243327907 + }, + { + "step": 2117, + "lr": 0.0001, + "sup_loss": 1.3140086931468338 + }, + { + "step": 2118, + "lr": 0.0001, + "sup_loss": 1.3406439871418883 + }, + { + "step": 2119, + "lr": 0.0001, + "sup_loss": 1.3089188618763732 + }, + { + "step": 2120, + "lr": 0.0001, + "sup_loss": 1.3183756897757082 + }, + { + "step": 2121, + "lr": 0.0001, + "sup_loss": 1.3707324574692803 + }, + { + "step": 2122, + "lr": 0.0001, + "sup_loss": 1.360244853925411 + }, + { + "step": 2123, + "lr": 0.0001, + "sup_loss": 1.3196727535568096 + }, + { + "step": 2124, + "lr": 0.0001, + "sup_loss": 1.3134277358290887 + }, + { + "step": 2125, + "lr": 0.0001, + "sup_loss": 1.3413463563050358 + }, + { + "step": 2126, + "lr": 0.0001, + "sup_loss": 1.3295564824664874 + }, + { + "step": 2127, + "lr": 0.0001, + "sup_loss": 1.31666693383427 + }, + { + "step": 2128, + "lr": 0.0001, + "sup_loss": 1.3349042630653463 + }, + { + "step": 2129, + "lr": 0.0001, + "sup_loss": 1.349416817670363 + }, + { + "step": 2130, + "lr": 0.0001, + "sup_loss": 1.329484926157641 + }, + { + "step": 2131, + "lr": 0.0001, + "sup_loss": 1.3489910525913433 + }, + { + "step": 2132, + "lr": 0.0001, + "sup_loss": 1.3445975400784798 + }, + { + "step": 2133, + "lr": 0.0001, + "sup_loss": 1.3376441114276323 + }, + { + "step": 2134, + "lr": 0.0001, + "sup_loss": 1.256542993328879 + }, + { + "step": 2135, + "lr": 0.0001, + "sup_loss": 1.3387028657961095 + }, + { + "step": 2136, + "lr": 0.0001, + "sup_loss": 1.3055971455988815 + }, + { + "step": 2137, + "lr": 0.0001, + "sup_loss": 1.3168458387730453 + }, + { + "step": 2138, + "lr": 0.0001, + "sup_loss": 1.3360651781669515 + }, + { + "step": 2139, + "lr": 0.0001, + "sup_loss": 1.306980656895867 + }, + { + "step": 2140, + "lr": 0.0001, + "sup_loss": 1.3337176120239893 + }, + { + "step": 2141, + "lr": 0.0001, + "sup_loss": 1.3939107089815508 + }, + { + "step": 2142, + "lr": 0.0001, + "sup_loss": 1.328108416397912 + }, + { + "step": 2143, + "lr": 0.0001, + "sup_loss": 1.37899123264887 + }, + { + "step": 2144, + "lr": 0.0001, + "sup_loss": 1.2823953273307112 + }, + { + "step": 2145, + "lr": 0.0001, + "sup_loss": 1.3776023411104743 + }, + { + "step": 2146, + "lr": 0.0001, + "sup_loss": 1.315631824995313 + }, + { + "step": 2147, + "lr": 0.0001, + "sup_loss": 1.3805747888653808 + }, + { + "step": 2148, + "lr": 0.0001, + "sup_loss": 1.3148594965007039 + }, + { + "step": 2149, + "lr": 0.0001, + "sup_loss": 1.3334621456026423 + }, + { + "step": 2150, + "lr": 0.0001, + "sup_loss": 1.3679707923892126, + "lyap1_mean": -4.389822006225586, + "lyap1_max": -4.314357757568359, + "lyap_spec_mean": [ + -4.389822006225586, + -4.425333023071289 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2151, + "lr": 0.0001, + "sup_loss": 1.3273265694685552 + }, + { + "step": 2152, + "lr": 0.0001, + "sup_loss": 1.3517546055228877 + }, + { + "step": 2153, + "lr": 0.0001, + "sup_loss": 1.3468313820272872 + }, + { + "step": 2154, + "lr": 0.0001, + "sup_loss": 1.375585123908039 + }, + { + "step": 2155, + "lr": 0.0001, + "sup_loss": 1.3140645252511967 + }, + { + "step": 2156, + "lr": 0.0001, + "sup_loss": 1.3143960263942513 + }, + { + "step": 2157, + "lr": 0.0001, + "sup_loss": 1.3008154481749927 + }, + { + "step": 2158, + "lr": 0.0001, + "sup_loss": 1.303763768006566 + }, + { + "step": 2159, + "lr": 0.0001, + "sup_loss": 1.3435816600898083 + }, + { + "step": 2160, + "lr": 0.0001, + "sup_loss": 1.3838424955103175 + }, + { + "step": 2161, + "lr": 0.0001, + "sup_loss": 1.3337795866598 + }, + { + "step": 2162, + "lr": 0.0001, + "sup_loss": 1.3173088087504823 + }, + { + "step": 2163, + "lr": 0.0001, + "sup_loss": 1.2535935961558726 + }, + { + "step": 2164, + "lr": 0.0001, + "sup_loss": 1.336207887569674 + }, + { + "step": 2165, + "lr": 0.0001, + "sup_loss": 1.3776271528931472 + }, + { + "step": 2166, + "lr": 0.0001, + "sup_loss": 1.3717231231434135 + }, + { + "step": 2167, + "lr": 0.0001, + "sup_loss": 1.3203665193621015 + }, + { + "step": 2168, + "lr": 0.0001, + "sup_loss": 1.3119285377262329 + }, + { + "step": 2169, + "lr": 0.0001, + "sup_loss": 1.3479718556201845 + }, + { + "step": 2170, + "lr": 0.0001, + "sup_loss": 1.362475173256755 + }, + { + "step": 2171, + "lr": 0.0001, + "sup_loss": 1.3081708880558727 + }, + { + "step": 2172, + "lr": 0.0001, + "sup_loss": 1.3457334206263774 + }, + { + "step": 2173, + "lr": 0.0001, + "sup_loss": 1.3737304003604383 + }, + { + "step": 2174, + "lr": 0.0001, + "sup_loss": 1.3785094760778036 + }, + { + "step": 2175, + "lr": 0.0001, + "sup_loss": 1.3145302411479918 + }, + { + "step": 2176, + "lr": 0.0001, + "sup_loss": 1.2889087368696637 + }, + { + "step": 2177, + "lr": 0.0001, + "sup_loss": 1.3651930000152803 + }, + { + "step": 2178, + "lr": 0.0001, + "sup_loss": 1.324622184134317 + }, + { + "step": 2179, + "lr": 0.0001, + "sup_loss": 1.3358459274740715 + }, + { + "step": 2180, + "lr": 0.0001, + "sup_loss": 1.3250251432795193 + }, + { + "step": 2181, + "lr": 0.0001, + "sup_loss": 1.3237568861540607 + }, + { + "step": 2182, + "lr": 0.0001, + "sup_loss": 1.2773238439109593 + }, + { + "step": 2183, + "lr": 0.0001, + "sup_loss": 1.3419788055225594 + }, + { + "step": 2184, + "lr": 0.0001, + "sup_loss": 1.319439250293323 + }, + { + "step": 2185, + "lr": 0.0001, + "sup_loss": 1.3383511134124657 + }, + { + "step": 2186, + "lr": 0.0001, + "sup_loss": 1.331248553676409 + }, + { + "step": 2187, + "lr": 0.0001, + "sup_loss": 1.3689994789626085 + }, + { + "step": 2188, + "lr": 0.0001, + "sup_loss": 1.3183586671511562 + }, + { + "step": 2189, + "lr": 0.0001, + "sup_loss": 1.3825720535483856 + }, + { + "step": 2190, + "lr": 0.0001, + "sup_loss": 1.2898108296775017 + }, + { + "step": 2191, + "lr": 0.0001, + "sup_loss": 1.2683795984634725 + }, + { + "step": 2192, + "lr": 0.0001, + "sup_loss": 1.3177387219222993 + }, + { + "step": 2193, + "lr": 0.0001, + "sup_loss": 1.3266058989620944 + }, + { + "step": 2194, + "lr": 0.0001, + "sup_loss": 1.344716575426023 + }, + { + "step": 2195, + "lr": 0.0001, + "sup_loss": 1.31382421395051 + }, + { + "step": 2196, + "lr": 0.0001, + "sup_loss": 1.2283617960933486 + }, + { + "step": 2197, + "lr": 0.0001, + "sup_loss": 1.2693317218364104 + }, + { + "step": 2198, + "lr": 0.0001, + "sup_loss": 1.2688353157655747 + }, + { + "step": 2199, + "lr": 0.0001, + "sup_loss": 1.2738660730075944 + }, + { + "step": 2200, + "lr": 0.0001, + "sup_loss": 1.330733796066386, + "lyap1_mean": -4.370917797088623, + "lyap1_max": -4.299170970916748, + "lyap_spec_mean": [ + -4.370917320251465, + -4.375146865844727 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2201, + "lr": 0.0001, + "sup_loss": 1.3092836169271123 + }, + { + "step": 2202, + "lr": 0.0001, + "sup_loss": 1.3644149166789004 + }, + { + "step": 2203, + "lr": 0.0001, + "sup_loss": 1.2639815556685523 + }, + { + "step": 2204, + "lr": 0.0001, + "sup_loss": 1.3415081728215852 + }, + { + "step": 2205, + "lr": 0.0001, + "sup_loss": 1.2956755063280603 + }, + { + "step": 2206, + "lr": 0.0001, + "sup_loss": 1.3345821212434483 + }, + { + "step": 2207, + "lr": 0.0001, + "sup_loss": 1.355644881675072 + }, + { + "step": 2208, + "lr": 0.0001, + "sup_loss": 1.3000607012227765 + }, + { + "step": 2209, + "lr": 0.0001, + "sup_loss": 1.344162214182756 + }, + { + "step": 2210, + "lr": 0.0001, + "sup_loss": 1.331940077121537 + }, + { + "step": 2211, + "lr": 0.0001, + "sup_loss": 1.3348133735796532 + }, + { + "step": 2212, + "lr": 0.0001, + "sup_loss": 1.3100609876390796 + }, + { + "step": 2213, + "lr": 0.0001, + "sup_loss": 1.3512849573840575 + }, + { + "step": 2214, + "lr": 0.0001, + "sup_loss": 1.307766064450689 + }, + { + "step": 2215, + "lr": 0.0001, + "sup_loss": 1.3768194698259641 + }, + { + "step": 2216, + "lr": 0.0001, + "sup_loss": 1.3288311600790281 + }, + { + "step": 2217, + "lr": 0.0001, + "sup_loss": 1.2884017186125263 + }, + { + "step": 2218, + "lr": 0.0001, + "sup_loss": 1.3851612050792368 + }, + { + "step": 2219, + "lr": 0.0001, + "sup_loss": 1.35625183607619 + }, + { + "step": 2220, + "lr": 0.0001, + "sup_loss": 1.3553910626121708 + }, + { + "step": 2221, + "lr": 0.0001, + "sup_loss": 1.3277680553585989 + }, + { + "step": 2222, + "lr": 0.0001, + "sup_loss": 1.316930014170969 + }, + { + "step": 2223, + "lr": 0.0001, + "sup_loss": 1.3302656840314706 + }, + { + "step": 2224, + "lr": 0.0001, + "sup_loss": 1.3171828529588672 + }, + { + "step": 2225, + "lr": 0.0001, + "sup_loss": 1.2797836066447923 + }, + { + "step": 2226, + "lr": 0.0001, + "sup_loss": 1.3426658261742412 + }, + { + "step": 2227, + "lr": 0.0001, + "sup_loss": 1.3537414298182049 + }, + { + "step": 2228, + "lr": 0.0001, + "sup_loss": 1.3010493371177732 + }, + { + "step": 2229, + "lr": 0.0001, + "sup_loss": 1.3117608979623676 + }, + { + "step": 2230, + "lr": 0.0001, + "sup_loss": 1.2627483786912177 + }, + { + "step": 2231, + "lr": 0.0001, + "sup_loss": 1.3356480976978193 + }, + { + "step": 2232, + "lr": 0.0001, + "sup_loss": 1.3459694476206499 + }, + { + "step": 2233, + "lr": 0.0001, + "sup_loss": 1.3710728329698072 + }, + { + "step": 2234, + "lr": 0.0001, + "sup_loss": 1.3033581821387938 + }, + { + "step": 2235, + "lr": 0.0001, + "sup_loss": 1.3140669878439564 + }, + { + "step": 2236, + "lr": 0.0001, + "sup_loss": 1.304058596801065 + }, + { + "step": 2237, + "lr": 0.0001, + "sup_loss": 1.2970413362349422 + }, + { + "step": 2238, + "lr": 0.0001, + "sup_loss": 1.3094246789408701 + }, + { + "step": 2239, + "lr": 0.0001, + "sup_loss": 1.3056587509706588 + }, + { + "step": 2240, + "lr": 0.0001, + "sup_loss": 1.2962148972815346 + }, + { + "step": 2241, + "lr": 0.0001, + "sup_loss": 1.3243387376259508 + }, + { + "step": 2242, + "lr": 0.0001, + "sup_loss": 1.293375948791174 + }, + { + "step": 2243, + "lr": 0.0001, + "sup_loss": 1.357990089939213 + }, + { + "step": 2244, + "lr": 0.0001, + "sup_loss": 1.318934825510924 + }, + { + "step": 2245, + "lr": 0.0001, + "sup_loss": 1.3476487393393164 + }, + { + "step": 2246, + "lr": 0.0001, + "sup_loss": 1.3425358741698097 + }, + { + "step": 2247, + "lr": 0.0001, + "sup_loss": 1.2768490666320824 + }, + { + "step": 2248, + "lr": 0.0001, + "sup_loss": 1.2994146343239787 + }, + { + "step": 2249, + "lr": 0.0001, + "sup_loss": 1.3038492994490618 + }, + { + "step": 2250, + "lr": 0.0001, + "sup_loss": 1.3314278343756802, + "lyap1_mean": -4.340500354766846, + "lyap1_max": -4.291837215423584, + "lyap_spec_mean": [ + -4.340500354766846, + -4.350625038146973 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2251, + "lr": 0.0001, + "sup_loss": 1.3000639445656519 + }, + { + "step": 2252, + "lr": 0.0001, + "sup_loss": 1.3034281148985618 + }, + { + "step": 2253, + "lr": 0.0001, + "sup_loss": 1.3396158917665675 + }, + { + "step": 2254, + "lr": 0.0001, + "sup_loss": 1.3285482061508742 + }, + { + "step": 2255, + "lr": 0.0001, + "sup_loss": 1.297098097064154 + }, + { + "step": 2256, + "lr": 0.0001, + "sup_loss": 1.312664637711699 + }, + { + "step": 2257, + "lr": 0.0001, + "sup_loss": 1.2923063818965888 + }, + { + "step": 2258, + "lr": 0.0001, + "sup_loss": 1.3235237463312237 + }, + { + "step": 2259, + "lr": 0.0001, + "sup_loss": 1.234957318875583 + }, + { + "step": 2260, + "lr": 0.0001, + "sup_loss": 1.286953514482732 + }, + { + "step": 2261, + "lr": 0.0001, + "sup_loss": 1.3518256800519548 + }, + { + "step": 2262, + "lr": 0.0001, + "sup_loss": 1.33673899038314 + }, + { + "step": 2263, + "lr": 0.0001, + "sup_loss": 1.3243817718249924 + }, + { + "step": 2264, + "lr": 0.0001, + "sup_loss": 1.2935011632597295 + }, + { + "step": 2265, + "lr": 0.0001, + "sup_loss": 1.2461945702068586 + }, + { + "step": 2266, + "lr": 0.0001, + "sup_loss": 1.2963698984102026 + }, + { + "step": 2267, + "lr": 0.0001, + "sup_loss": 1.3614569599735733 + }, + { + "step": 2268, + "lr": 0.0001, + "sup_loss": 1.3250678520458063 + }, + { + "step": 2269, + "lr": 0.0001, + "sup_loss": 1.2831093288148319 + }, + { + "step": 2270, + "lr": 0.0001, + "sup_loss": 1.304212326643754 + }, + { + "step": 2271, + "lr": 0.0001, + "sup_loss": 1.2703855860986704 + }, + { + "step": 2272, + "lr": 0.0001, + "sup_loss": 1.346533419892547 + }, + { + "step": 2273, + "lr": 0.0001, + "sup_loss": 1.3059458536543742 + }, + { + "step": 2274, + "lr": 0.0001, + "sup_loss": 1.3303298092786495 + }, + { + "step": 2275, + "lr": 0.0001, + "sup_loss": 1.3028803981568586 + }, + { + "step": 2276, + "lr": 0.0001, + "sup_loss": 1.322915034185758 + }, + { + "step": 2277, + "lr": 0.0001, + "sup_loss": 1.3117877563801217 + }, + { + "step": 2278, + "lr": 0.0001, + "sup_loss": 1.3404323568512164 + }, + { + "step": 2279, + "lr": 0.0001, + "sup_loss": 1.3483420061442617 + }, + { + "step": 2280, + "lr": 0.0001, + "sup_loss": 1.288083327219387 + }, + { + "step": 2281, + "lr": 0.0001, + "sup_loss": 1.3356251847063094 + }, + { + "step": 2282, + "lr": 0.0001, + "sup_loss": 1.2898513851524769 + }, + { + "step": 2283, + "lr": 0.0001, + "sup_loss": 1.3569953548869098 + }, + { + "step": 2284, + "lr": 0.0001, + "sup_loss": 1.264187957193366 + }, + { + "step": 2285, + "lr": 0.0001, + "sup_loss": 1.2627206931312875 + }, + { + "step": 2286, + "lr": 0.0001, + "sup_loss": 1.2841785237360703 + }, + { + "step": 2287, + "lr": 0.0001, + "sup_loss": 1.3559804829356044 + }, + { + "step": 2288, + "lr": 0.0001, + "sup_loss": 1.296922265892696 + }, + { + "step": 2289, + "lr": 0.0001, + "sup_loss": 1.2987764845396852 + }, + { + "step": 2290, + "lr": 0.0001, + "sup_loss": 1.2793764593998067 + }, + { + "step": 2291, + "lr": 0.0001, + "sup_loss": 1.3221652888963566 + }, + { + "step": 2292, + "lr": 0.0001, + "sup_loss": 1.3641623010100081 + }, + { + "step": 2293, + "lr": 0.0001, + "sup_loss": 1.213996607716935 + }, + { + "step": 2294, + "lr": 0.0001, + "sup_loss": 1.3065556275077215 + }, + { + "step": 2295, + "lr": 0.0001, + "sup_loss": 1.2874602900935868 + }, + { + "step": 2296, + "lr": 0.0001, + "sup_loss": 1.3087776584520365 + }, + { + "step": 2297, + "lr": 0.0001, + "sup_loss": 1.3401450155556218 + }, + { + "step": 2298, + "lr": 0.0001, + "sup_loss": 1.2629370985379633 + }, + { + "step": 2299, + "lr": 0.0001, + "sup_loss": 1.281192253778699 + }, + { + "step": 2300, + "lr": 0.0001, + "sup_loss": 1.3378097323120646, + "lyap1_mean": -4.279443740844727, + "lyap1_max": -4.232276916503906, + "lyap_spec_mean": [ + -4.279443740844727, + -4.32424783706665 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2301, + "lr": 0.0001, + "sup_loss": 1.3007858713339784 + }, + { + "step": 2302, + "lr": 0.0001, + "sup_loss": 1.3257725206791242 + }, + { + "step": 2303, + "lr": 0.0001, + "sup_loss": 1.2849550329691797 + }, + { + "step": 2304, + "lr": 0.0001, + "sup_loss": 1.31333355293645 + }, + { + "step": 2305, + "lr": 0.0001, + "sup_loss": 1.242690810045472 + }, + { + "step": 2306, + "lr": 0.0001, + "sup_loss": 1.2955308721126266 + }, + { + "step": 2307, + "lr": 0.0001, + "sup_loss": 1.297947357002302 + }, + { + "step": 2308, + "lr": 0.0001, + "sup_loss": 1.3009446400414433 + }, + { + "step": 2309, + "lr": 0.0001, + "sup_loss": 1.306174469920154 + }, + { + "step": 2310, + "lr": 0.0001, + "sup_loss": 1.3386318546068736 + }, + { + "step": 2311, + "lr": 0.0001, + "sup_loss": 1.297960029837302 + }, + { + "step": 2312, + "lr": 0.0001, + "sup_loss": 1.301810373709632 + }, + { + "step": 2313, + "lr": 0.0001, + "sup_loss": 1.3089081595526004 + }, + { + "step": 2314, + "lr": 0.0001, + "sup_loss": 1.3210347724118987 + }, + { + "step": 2315, + "lr": 0.0001, + "sup_loss": 1.3101104274607664 + }, + { + "step": 2316, + "lr": 0.0001, + "sup_loss": 1.320553458028757 + }, + { + "step": 2317, + "lr": 0.0001, + "sup_loss": 1.290975766284836 + }, + { + "step": 2318, + "lr": 0.0001, + "sup_loss": 1.316027805083375 + }, + { + "step": 2319, + "lr": 0.0001, + "sup_loss": 1.2880183877077627 + }, + { + "step": 2320, + "lr": 0.0001, + "sup_loss": 1.3236236642933614 + }, + { + "step": 2321, + "lr": 0.0001, + "sup_loss": 1.305926610724108 + }, + { + "step": 2322, + "lr": 0.0001, + "sup_loss": 1.3326279069545166 + }, + { + "step": 2323, + "lr": 0.0001, + "sup_loss": 1.2911964906673188 + }, + { + "step": 2324, + "lr": 0.0001, + "sup_loss": 1.3329212298071615 + }, + { + "step": 2325, + "lr": 0.0001, + "sup_loss": 1.2927224436214344 + }, + { + "step": 2326, + "lr": 0.0001, + "sup_loss": 1.2902418414141705 + }, + { + "step": 2327, + "lr": 0.0001, + "sup_loss": 1.2870042702625197 + }, + { + "step": 2328, + "lr": 0.0001, + "sup_loss": 1.3122620901358362 + }, + { + "step": 2329, + "lr": 0.0001, + "sup_loss": 1.3147664825114576 + }, + { + "step": 2330, + "lr": 0.0001, + "sup_loss": 1.2560404035459811 + }, + { + "step": 2331, + "lr": 0.0001, + "sup_loss": 1.262692801802689 + }, + { + "step": 2332, + "lr": 0.0001, + "sup_loss": 1.3433351122740804 + }, + { + "step": 2333, + "lr": 0.0001, + "sup_loss": 1.2864438095682962 + }, + { + "step": 2334, + "lr": 0.0001, + "sup_loss": 1.3354133653173805 + }, + { + "step": 2335, + "lr": 0.0001, + "sup_loss": 1.282173158019353 + }, + { + "step": 2336, + "lr": 0.0001, + "sup_loss": 1.2734277734282753 + }, + { + "step": 2337, + "lr": 0.0001, + "sup_loss": 1.3083462499143748 + }, + { + "step": 2338, + "lr": 0.0001, + "sup_loss": 1.2852205000510637 + }, + { + "step": 2339, + "lr": 0.0001, + "sup_loss": 1.2773747932786292 + }, + { + "step": 2340, + "lr": 0.0001, + "sup_loss": 1.3204464365240287 + }, + { + "step": 2341, + "lr": 0.0001, + "sup_loss": 1.3126080650564995 + }, + { + "step": 2342, + "lr": 0.0001, + "sup_loss": 1.296881097565329 + }, + { + "step": 2343, + "lr": 0.0001, + "sup_loss": 1.2790243381960729 + }, + { + "step": 2344, + "lr": 0.0001, + "sup_loss": 1.2858772879976104 + }, + { + "step": 2345, + "lr": 0.0001, + "sup_loss": 1.2891286760926386 + }, + { + "step": 2346, + "lr": 0.0001, + "sup_loss": 1.2891443964823313 + }, + { + "step": 2347, + "lr": 0.0001, + "sup_loss": 1.2526664140698167 + }, + { + "step": 2348, + "lr": 0.0001, + "sup_loss": 1.2566865753657532 + }, + { + "step": 2349, + "lr": 0.0001, + "sup_loss": 1.2132622038014034 + }, + { + "step": 2350, + "lr": 0.0001, + "sup_loss": 1.3199532792448685, + "lyap1_mean": -4.297420024871826, + "lyap1_max": -4.208258628845215, + "lyap_spec_mean": [ + -4.297419548034668, + -4.2948503494262695 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2351, + "lr": 0.0001, + "sup_loss": 1.3260009331325753 + }, + { + "step": 2352, + "lr": 0.0001, + "sup_loss": 1.287499754615069 + }, + { + "step": 2353, + "lr": 0.0001, + "sup_loss": 1.3175281646058237 + }, + { + "step": 2354, + "lr": 0.0001, + "sup_loss": 1.3285812832935602 + }, + { + "step": 2355, + "lr": 0.0001, + "sup_loss": 1.338241712745097 + }, + { + "step": 2356, + "lr": 0.0001, + "sup_loss": 1.3458525924552727 + }, + { + "step": 2357, + "lr": 0.0001, + "sup_loss": 1.2952742371828823 + }, + { + "step": 2358, + "lr": 0.0001, + "sup_loss": 1.2742454256445066 + }, + { + "step": 2359, + "lr": 0.0001, + "sup_loss": 1.2319133298195264 + }, + { + "step": 2360, + "lr": 0.0001, + "sup_loss": 1.285986288064156 + }, + { + "step": 2361, + "lr": 0.0001, + "sup_loss": 1.3126345716440837 + }, + { + "step": 2362, + "lr": 0.0001, + "sup_loss": 1.3206846608730833 + }, + { + "step": 2363, + "lr": 0.0001, + "sup_loss": 1.3302782117074967 + }, + { + "step": 2364, + "lr": 0.0001, + "sup_loss": 1.2971833068745262 + }, + { + "step": 2365, + "lr": 0.0001, + "sup_loss": 1.3219994814629266 + }, + { + "step": 2366, + "lr": 0.0001, + "sup_loss": 1.2504207152502218 + }, + { + "step": 2367, + "lr": 0.0001, + "sup_loss": 1.2684664150500888 + }, + { + "step": 2368, + "lr": 0.0001, + "sup_loss": 1.285101694636452 + }, + { + "step": 2369, + "lr": 0.0001, + "sup_loss": 1.2934585618586536 + }, + { + "step": 2370, + "lr": 0.0001, + "sup_loss": 1.2777614769302308 + }, + { + "step": 2371, + "lr": 0.0001, + "sup_loss": 1.2467405513847356 + }, + { + "step": 2372, + "lr": 0.0001, + "sup_loss": 1.2657364330853962 + }, + { + "step": 2373, + "lr": 0.0001, + "sup_loss": 1.2940289240047569 + }, + { + "step": 2374, + "lr": 0.0001, + "sup_loss": 1.3017253523917187 + }, + { + "step": 2375, + "lr": 0.0001, + "sup_loss": 1.3413423609738866 + }, + { + "step": 2376, + "lr": 0.0001, + "sup_loss": 1.3098601447706604 + }, + { + "step": 2377, + "lr": 0.0001, + "sup_loss": 1.2789357624995552 + }, + { + "step": 2378, + "lr": 0.0001, + "sup_loss": 1.2458950663824655 + }, + { + "step": 2379, + "lr": 0.0001, + "sup_loss": 1.2543408363804593 + }, + { + "step": 2380, + "lr": 0.0001, + "sup_loss": 1.2470318968628917 + }, + { + "step": 2381, + "lr": 0.0001, + "sup_loss": 1.2973113604321573 + }, + { + "step": 2382, + "lr": 0.0001, + "sup_loss": 1.296711494089275 + }, + { + "step": 2383, + "lr": 0.0001, + "sup_loss": 1.298832765455335 + }, + { + "step": 2384, + "lr": 0.0001, + "sup_loss": 1.2254319481020883 + }, + { + "step": 2385, + "lr": 0.0001, + "sup_loss": 1.3381799252639806 + }, + { + "step": 2386, + "lr": 0.0001, + "sup_loss": 1.2801464966189444 + }, + { + "step": 2387, + "lr": 0.0001, + "sup_loss": 1.308961306638708 + }, + { + "step": 2388, + "lr": 0.0001, + "sup_loss": 1.225490189906738 + }, + { + "step": 2389, + "lr": 0.0001, + "sup_loss": 1.26901640122553 + }, + { + "step": 2390, + "lr": 0.0001, + "sup_loss": 1.2838795127609883 + }, + { + "step": 2391, + "lr": 0.0001, + "sup_loss": 1.3317961906134506 + }, + { + "step": 2392, + "lr": 0.0001, + "sup_loss": 1.2411537722106925 + }, + { + "step": 2393, + "lr": 0.0001, + "sup_loss": 1.2786701588518103 + }, + { + "step": 2394, + "lr": 0.0001, + "sup_loss": 1.2948194968822064 + }, + { + "step": 2395, + "lr": 0.0001, + "sup_loss": 1.2738479733538153 + }, + { + "step": 2396, + "lr": 0.0001, + "sup_loss": 1.2218023667057016 + }, + { + "step": 2397, + "lr": 0.0001, + "sup_loss": 1.2689629790696668 + }, + { + "step": 2398, + "lr": 0.0001, + "sup_loss": 1.280866070768354 + }, + { + "step": 2399, + "lr": 0.0001, + "sup_loss": 1.293000455540708 + }, + { + "step": 2400, + "lr": 0.0001, + "sup_loss": 1.3015417143182788, + "lyap1_mean": -4.240116596221924, + "lyap1_max": -4.1728386878967285, + "lyap_spec_mean": [ + -4.240116596221924, + -4.259872913360596 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2401, + "lr": 0.0001, + "sup_loss": 1.2946066138545067 + }, + { + "step": 2402, + "lr": 0.0001, + "sup_loss": 1.306802843214579 + }, + { + "step": 2403, + "lr": 0.0001, + "sup_loss": 1.2859214071476572 + }, + { + "step": 2404, + "lr": 0.0001, + "sup_loss": 1.2818976045991124 + }, + { + "step": 2405, + "lr": 0.0001, + "sup_loss": 1.298702635190277 + }, + { + "step": 2406, + "lr": 0.0001, + "sup_loss": 1.2683154138622867 + }, + { + "step": 2407, + "lr": 0.0001, + "sup_loss": 1.2930010072673241 + }, + { + "step": 2408, + "lr": 0.0001, + "sup_loss": 1.28000310706559 + }, + { + "step": 2409, + "lr": 0.0001, + "sup_loss": 1.2550430496003386 + }, + { + "step": 2410, + "lr": 0.0001, + "sup_loss": 1.2639766048330499 + }, + { + "step": 2411, + "lr": 0.0001, + "sup_loss": 1.254134811758989 + }, + { + "step": 2412, + "lr": 0.0001, + "sup_loss": 1.2460116320165038 + }, + { + "step": 2413, + "lr": 0.0001, + "sup_loss": 1.2728647458887183 + }, + { + "step": 2414, + "lr": 0.0001, + "sup_loss": 1.318575239374344 + }, + { + "step": 2415, + "lr": 0.0001, + "sup_loss": 1.30180953985345 + }, + { + "step": 2416, + "lr": 0.0001, + "sup_loss": 1.2474197947403924 + }, + { + "step": 2417, + "lr": 0.0001, + "sup_loss": 1.2773624807953932 + }, + { + "step": 2418, + "lr": 0.0001, + "sup_loss": 1.299110880659272 + }, + { + "step": 2419, + "lr": 0.0001, + "sup_loss": 1.2912462042806558 + }, + { + "step": 2420, + "lr": 0.0001, + "sup_loss": 1.2781167180210447 + }, + { + "step": 2421, + "lr": 0.0001, + "sup_loss": 1.2624044367222242 + }, + { + "step": 2422, + "lr": 0.0001, + "sup_loss": 1.2567010590057595 + }, + { + "step": 2423, + "lr": 0.0001, + "sup_loss": 1.2735732058618787 + }, + { + "step": 2424, + "lr": 0.0001, + "sup_loss": 1.2617191771051555 + }, + { + "step": 2425, + "lr": 0.0001, + "sup_loss": 1.254120966278177 + }, + { + "step": 2426, + "lr": 0.0001, + "sup_loss": 1.2726287865388617 + }, + { + "step": 2427, + "lr": 0.0001, + "sup_loss": 1.296165394747546 + }, + { + "step": 2428, + "lr": 0.0001, + "sup_loss": 1.2621463093106076 + }, + { + "step": 2429, + "lr": 0.0001, + "sup_loss": 1.2831554361124158 + }, + { + "step": 2430, + "lr": 0.0001, + "sup_loss": 1.2736333565784848 + }, + { + "step": 2431, + "lr": 0.0001, + "sup_loss": 1.2457267881996474 + }, + { + "step": 2432, + "lr": 0.0001, + "sup_loss": 1.3013917129349992 + }, + { + "step": 2433, + "lr": 0.0001, + "sup_loss": 1.3168799829825315 + }, + { + "step": 2434, + "lr": 0.0001, + "sup_loss": 1.2908628853594517 + }, + { + "step": 2435, + "lr": 0.0001, + "sup_loss": 1.2259381371584663 + }, + { + "step": 2436, + "lr": 0.0001, + "sup_loss": 1.2320734466792544 + }, + { + "step": 2437, + "lr": 0.0001, + "sup_loss": 1.3015850702337257 + }, + { + "step": 2438, + "lr": 0.0001, + "sup_loss": 1.272234096262415 + }, + { + "step": 2439, + "lr": 0.0001, + "sup_loss": 1.2892698855646032 + }, + { + "step": 2440, + "lr": 0.0001, + "sup_loss": 1.2817456438351889 + }, + { + "step": 2441, + "lr": 0.0001, + "sup_loss": 1.3086320329068524 + }, + { + "step": 2442, + "lr": 0.0001, + "sup_loss": 1.2150263443886868 + }, + { + "step": 2443, + "lr": 0.0001, + "sup_loss": 1.3220695734040846 + }, + { + "step": 2444, + "lr": 0.0001, + "sup_loss": 1.2880881516594758 + }, + { + "step": 2445, + "lr": 0.0001, + "sup_loss": 1.255566127178477 + }, + { + "step": 2446, + "lr": 0.0001, + "sup_loss": 1.2918217794722304 + }, + { + "step": 2447, + "lr": 0.0001, + "sup_loss": 1.265959883368338 + }, + { + "step": 2448, + "lr": 0.0001, + "sup_loss": 1.2508129313012017 + }, + { + "step": 2449, + "lr": 0.0001, + "sup_loss": 1.2788072625567455 + }, + { + "step": 2450, + "lr": 0.0001, + "sup_loss": 1.2966244264743472, + "lyap1_mean": -4.229255199432373, + "lyap1_max": -4.196020603179932, + "lyap_spec_mean": [ + -4.229255199432373, + -4.262547492980957 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2451, + "lr": 0.0001, + "sup_loss": 1.2504113864509403 + }, + { + "step": 2452, + "lr": 0.0001, + "sup_loss": 1.246732217642998 + }, + { + "step": 2453, + "lr": 0.0001, + "sup_loss": 1.2849490521020903 + }, + { + "step": 2454, + "lr": 0.0001, + "sup_loss": 1.2892670504853885 + }, + { + "step": 2455, + "lr": 0.0001, + "sup_loss": 1.2117253504047698 + }, + { + "step": 2456, + "lr": 0.0001, + "sup_loss": 1.3424819013472709 + }, + { + "step": 2457, + "lr": 0.0001, + "sup_loss": 1.289108257120321 + }, + { + "step": 2458, + "lr": 0.0001, + "sup_loss": 1.2898390222001384 + }, + { + "step": 2459, + "lr": 0.0001, + "sup_loss": 1.2827777128936115 + }, + { + "step": 2460, + "lr": 0.0001, + "sup_loss": 1.2716542126633752 + }, + { + "step": 2461, + "lr": 0.0001, + "sup_loss": 1.267684941688289 + }, + { + "step": 2462, + "lr": 0.0001, + "sup_loss": 1.3097554413412988 + }, + { + "step": 2463, + "lr": 0.0001, + "sup_loss": 1.2194675708162992 + }, + { + "step": 2464, + "lr": 0.0001, + "sup_loss": 1.3035253313827346 + }, + { + "step": 2465, + "lr": 0.0001, + "sup_loss": 1.2342608405233164 + }, + { + "step": 2466, + "lr": 0.0001, + "sup_loss": 1.2967217855370181 + }, + { + "step": 2467, + "lr": 0.0001, + "sup_loss": 1.215006770190903 + }, + { + "step": 2468, + "lr": 0.0001, + "sup_loss": 1.3502632928250051 + }, + { + "step": 2469, + "lr": 0.0001, + "sup_loss": 1.2625590826493243 + }, + { + "step": 2470, + "lr": 0.0001, + "sup_loss": 1.2648553913926301 + }, + { + "step": 2471, + "lr": 0.0001, + "sup_loss": 1.295449054032632 + }, + { + "step": 2472, + "lr": 0.0001, + "sup_loss": 1.3336467729727075 + }, + { + "step": 2473, + "lr": 0.0001, + "sup_loss": 1.2827146369448235 + }, + { + "step": 2474, + "lr": 0.0001, + "sup_loss": 1.269958354153474 + }, + { + "step": 2475, + "lr": 0.0001, + "sup_loss": 1.2487130240178912 + }, + { + "step": 2476, + "lr": 0.0001, + "sup_loss": 1.279820387709458 + }, + { + "step": 2477, + "lr": 0.0001, + "sup_loss": 1.3022651386449888 + }, + { + "step": 2478, + "lr": 0.0001, + "sup_loss": 1.3054360744302722 + }, + { + "step": 2479, + "lr": 0.0001, + "sup_loss": 1.2928549955115907 + }, + { + "step": 2480, + "lr": 0.0001, + "sup_loss": 1.2153522634201608 + }, + { + "step": 2481, + "lr": 0.0001, + "sup_loss": 1.2875561582938806 + }, + { + "step": 2482, + "lr": 0.0001, + "sup_loss": 1.267215253171082 + }, + { + "step": 2483, + "lr": 0.0001, + "sup_loss": 1.229934348336454 + }, + { + "step": 2484, + "lr": 0.0001, + "sup_loss": 1.213394197330342 + }, + { + "step": 2485, + "lr": 0.0001, + "sup_loss": 1.2528756305615625 + }, + { + "step": 2486, + "lr": 0.0001, + "sup_loss": 1.2667774988689504 + }, + { + "step": 2487, + "lr": 0.0001, + "sup_loss": 1.283049295912387 + }, + { + "step": 2488, + "lr": 0.0001, + "sup_loss": 1.2600793211833328 + }, + { + "step": 2489, + "lr": 0.0001, + "sup_loss": 1.2370128738967054 + }, + { + "step": 2490, + "lr": 0.0001, + "sup_loss": 1.2739358335614426 + }, + { + "step": 2491, + "lr": 0.0001, + "sup_loss": 1.2538356788654506 + }, + { + "step": 2492, + "lr": 0.0001, + "sup_loss": 1.2763594495617916 + }, + { + "step": 2493, + "lr": 0.0001, + "sup_loss": 1.258723758548922 + }, + { + "step": 2494, + "lr": 0.0001, + "sup_loss": 1.279014893825708 + }, + { + "step": 2495, + "lr": 0.0001, + "sup_loss": 1.2417430219735484 + }, + { + "step": 2496, + "lr": 0.0001, + "sup_loss": 1.2090542912444422 + }, + { + "step": 2497, + "lr": 0.0001, + "sup_loss": 1.2765879568712852 + }, + { + "step": 2498, + "lr": 0.0001, + "sup_loss": 1.291159374913085 + }, + { + "step": 2499, + "lr": 0.0001, + "sup_loss": 1.2514413679581802 + }, + { + "step": 2500, + "lr": 0.0001, + "sup_loss": 1.2000801420569038, + "lyap1_mean": -4.217179298400879, + "lyap1_max": -4.172418117523193, + "lyap_spec_mean": [ + -4.217179298400879, + -4.243560314178467 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2501, + "lr": 0.0001, + "sup_loss": 1.2439328700323946 + }, + { + "step": 2502, + "lr": 0.0001, + "sup_loss": 1.3143452509486122 + }, + { + "step": 2503, + "lr": 0.0001, + "sup_loss": 1.2673685129943661 + }, + { + "step": 2504, + "lr": 0.0001, + "sup_loss": 1.2808396534469415 + }, + { + "step": 2505, + "lr": 0.0001, + "sup_loss": 1.2310802179945637 + }, + { + "step": 2506, + "lr": 0.0001, + "sup_loss": 1.2173190142924624 + }, + { + "step": 2507, + "lr": 0.0001, + "sup_loss": 1.3448147837455946 + }, + { + "step": 2508, + "lr": 0.0001, + "sup_loss": 1.3251125576385232 + }, + { + "step": 2509, + "lr": 0.0001, + "sup_loss": 1.287083943906881 + }, + { + "step": 2510, + "lr": 0.0001, + "sup_loss": 1.242063283896983 + }, + { + "step": 2511, + "lr": 0.0001, + "sup_loss": 1.2420088034638834 + }, + { + "step": 2512, + "lr": 0.0001, + "sup_loss": 1.2524525098606696 + }, + { + "step": 2513, + "lr": 0.0001, + "sup_loss": 1.2946149493114563 + }, + { + "step": 2514, + "lr": 0.0001, + "sup_loss": 1.1929837765898448 + }, + { + "step": 2515, + "lr": 0.0001, + "sup_loss": 1.2787169941663579 + }, + { + "step": 2516, + "lr": 0.0001, + "sup_loss": 1.3020050720880492 + }, + { + "step": 2517, + "lr": 0.0001, + "sup_loss": 1.2479310861141053 + }, + { + "step": 2518, + "lr": 0.0001, + "sup_loss": 1.2811408140366618 + }, + { + "step": 2519, + "lr": 0.0001, + "sup_loss": 1.3105722151334374 + }, + { + "step": 2520, + "lr": 0.0001, + "sup_loss": 1.235313479271157 + }, + { + "step": 2521, + "lr": 0.0001, + "sup_loss": 1.302235660512153 + }, + { + "step": 2522, + "lr": 0.0001, + "sup_loss": 1.298328913905733 + }, + { + "step": 2523, + "lr": 0.0001, + "sup_loss": 1.2939428741425083 + }, + { + "step": 2524, + "lr": 0.0001, + "sup_loss": 1.2890248321156432 + }, + { + "step": 2525, + "lr": 0.0001, + "sup_loss": 1.2472886043704121 + }, + { + "step": 2526, + "lr": 0.0001, + "sup_loss": 1.2207688130107575 + }, + { + "step": 2527, + "lr": 0.0001, + "sup_loss": 1.259757747898356 + }, + { + "step": 2528, + "lr": 0.0001, + "sup_loss": 1.3041591155231702 + }, + { + "step": 2529, + "lr": 0.0001, + "sup_loss": 1.2262317617995946 + }, + { + "step": 2530, + "lr": 0.0001, + "sup_loss": 1.294238339449141 + }, + { + "step": 2531, + "lr": 0.0001, + "sup_loss": 1.2852696308314464 + }, + { + "step": 2532, + "lr": 0.0001, + "sup_loss": 1.2075079080098947 + }, + { + "step": 2533, + "lr": 0.0001, + "sup_loss": 1.3465302715892369 + }, + { + "step": 2534, + "lr": 0.0001, + "sup_loss": 1.2334916595477938 + }, + { + "step": 2535, + "lr": 0.0001, + "sup_loss": 1.2522230589393377 + }, + { + "step": 2536, + "lr": 0.0001, + "sup_loss": 1.234307174978886 + }, + { + "step": 2537, + "lr": 0.0001, + "sup_loss": 1.275342292878406 + }, + { + "step": 2538, + "lr": 0.0001, + "sup_loss": 1.220077455024498 + }, + { + "step": 2539, + "lr": 0.0001, + "sup_loss": 1.2531405036580414 + }, + { + "step": 2540, + "lr": 0.0001, + "sup_loss": 1.2796458763756555 + }, + { + "step": 2541, + "lr": 0.0001, + "sup_loss": 1.2231552348355517 + }, + { + "step": 2542, + "lr": 0.0001, + "sup_loss": 1.2595762006221412 + }, + { + "step": 2543, + "lr": 0.0001, + "sup_loss": 1.2839878766508974 + }, + { + "step": 2544, + "lr": 0.0001, + "sup_loss": 1.2644498208258539 + }, + { + "step": 2545, + "lr": 0.0001, + "sup_loss": 1.257505981434681 + }, + { + "step": 2546, + "lr": 0.0001, + "sup_loss": 1.2642471513879407 + }, + { + "step": 2547, + "lr": 0.0001, + "sup_loss": 1.2607148550835343 + }, + { + "step": 2548, + "lr": 0.0001, + "sup_loss": 1.2942780475383033 + }, + { + "step": 2549, + "lr": 0.0001, + "sup_loss": 1.2503602850289803 + }, + { + "step": 2550, + "lr": 0.0001, + "sup_loss": 1.2673350576101512, + "lyap1_mean": -4.21592903137207, + "lyap1_max": -4.153813362121582, + "lyap_spec_mean": [ + -4.21592903137207, + -4.224661350250244 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2551, + "lr": 0.0001, + "sup_loss": 1.2887762823339455 + }, + { + "step": 2552, + "lr": 0.0001, + "sup_loss": 1.241141103749948 + }, + { + "step": 2553, + "lr": 0.0001, + "sup_loss": 1.262704091041656 + }, + { + "step": 2554, + "lr": 0.0001, + "sup_loss": 1.2535835200065104 + }, + { + "step": 2555, + "lr": 0.0001, + "sup_loss": 1.2706815370886373 + }, + { + "step": 2556, + "lr": 0.0001, + "sup_loss": 1.2076767199113734 + }, + { + "step": 2557, + "lr": 0.0001, + "sup_loss": 1.2880257333903509 + }, + { + "step": 2558, + "lr": 0.0001, + "sup_loss": 1.2545560325594527 + }, + { + "step": 2559, + "lr": 0.0001, + "sup_loss": 1.187565222373949 + }, + { + "step": 2560, + "lr": 0.0001, + "sup_loss": 1.2987840760479166 + }, + { + "step": 2561, + "lr": 0.0001, + "sup_loss": 1.257779380655759 + }, + { + "step": 2562, + "lr": 0.0001, + "sup_loss": 1.2682001244281944 + }, + { + "step": 2563, + "lr": 0.0001, + "sup_loss": 1.2517458785409872 + }, + { + "step": 2564, + "lr": 0.0001, + "sup_loss": 1.2393170114115994 + }, + { + "step": 2565, + "lr": 0.0001, + "sup_loss": 1.2290434838186444 + }, + { + "step": 2566, + "lr": 0.0001, + "sup_loss": 1.2609742271144768 + }, + { + "step": 2567, + "lr": 0.0001, + "sup_loss": 1.2380305560369997 + }, + { + "step": 2568, + "lr": 0.0001, + "sup_loss": 1.262483511359272 + }, + { + "step": 2569, + "lr": 0.0001, + "sup_loss": 1.2258235396637527 + }, + { + "step": 2570, + "lr": 0.0001, + "sup_loss": 1.2685639145590661 + }, + { + "step": 2571, + "lr": 0.0001, + "sup_loss": 1.2795514178158474 + }, + { + "step": 2572, + "lr": 0.0001, + "sup_loss": 1.2661739093207907 + }, + { + "step": 2573, + "lr": 0.0001, + "sup_loss": 1.279123890841784 + }, + { + "step": 2574, + "lr": 0.0001, + "sup_loss": 1.2732338339180482 + }, + { + "step": 2575, + "lr": 0.0001, + "sup_loss": 1.274722929177498 + }, + { + "step": 2576, + "lr": 0.0001, + "sup_loss": 1.2859501997540614 + }, + { + "step": 2577, + "lr": 0.0001, + "sup_loss": 1.2371998753680382 + }, + { + "step": 2578, + "lr": 0.0001, + "sup_loss": 1.1754687670090191 + }, + { + "step": 2579, + "lr": 0.0001, + "sup_loss": 1.2441350495107462 + }, + { + "step": 2580, + "lr": 0.0001, + "sup_loss": 1.2698504175062326 + }, + { + "step": 2581, + "lr": 0.0001, + "sup_loss": 1.1897744577146239 + }, + { + "step": 2582, + "lr": 0.0001, + "sup_loss": 1.2514506964149756 + }, + { + "step": 2583, + "lr": 0.0001, + "sup_loss": 1.2711750923048821 + }, + { + "step": 2584, + "lr": 0.0001, + "sup_loss": 1.2116420451454948 + }, + { + "step": 2585, + "lr": 0.0001, + "sup_loss": 1.2466947111136313 + }, + { + "step": 2586, + "lr": 0.0001, + "sup_loss": 1.2921165799422651 + }, + { + "step": 2587, + "lr": 0.0001, + "sup_loss": 1.2430475776854952 + }, + { + "step": 2588, + "lr": 0.0001, + "sup_loss": 1.2835425552678275 + }, + { + "step": 2589, + "lr": 0.0001, + "sup_loss": 1.2480793811438373 + }, + { + "step": 2590, + "lr": 0.0001, + "sup_loss": 1.276947118539664 + }, + { + "step": 2591, + "lr": 0.0001, + "sup_loss": 1.2519617062660808 + }, + { + "step": 2592, + "lr": 0.0001, + "sup_loss": 1.2455620593904937 + }, + { + "step": 2593, + "lr": 0.0001, + "sup_loss": 1.2547673263926888 + }, + { + "step": 2594, + "lr": 0.0001, + "sup_loss": 1.2863320880519333 + }, + { + "step": 2595, + "lr": 0.0001, + "sup_loss": 1.1976371455494406 + }, + { + "step": 2596, + "lr": 0.0001, + "sup_loss": 1.2509705637084054 + }, + { + "step": 2597, + "lr": 0.0001, + "sup_loss": 1.17630289895003 + }, + { + "step": 2598, + "lr": 0.0001, + "sup_loss": 1.2573487784809798 + }, + { + "step": 2599, + "lr": 0.0001, + "sup_loss": 1.2152991887407907 + }, + { + "step": 2600, + "lr": 0.0001, + "sup_loss": 1.266431682760794, + "lyap1_mean": -4.144527912139893, + "lyap1_max": -4.123527526855469, + "lyap_spec_mean": [ + -4.144528388977051, + -4.153683662414551 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2601, + "lr": 0.0001, + "sup_loss": 1.23895705764164 + }, + { + "step": 2602, + "lr": 0.0001, + "sup_loss": 1.2688821114915814 + }, + { + "step": 2603, + "lr": 0.0001, + "sup_loss": 1.2083745009795481 + }, + { + "step": 2604, + "lr": 0.0001, + "sup_loss": 1.2364987732156745 + }, + { + "step": 2605, + "lr": 0.0001, + "sup_loss": 1.232254561597169 + }, + { + "step": 2606, + "lr": 0.0001, + "sup_loss": 1.2348598758111264 + }, + { + "step": 2607, + "lr": 0.0001, + "sup_loss": 1.2353866991555655 + }, + { + "step": 2608, + "lr": 0.0001, + "sup_loss": 1.2618848504744413 + }, + { + "step": 2609, + "lr": 0.0001, + "sup_loss": 1.2672213401881256 + }, + { + "step": 2610, + "lr": 0.0001, + "sup_loss": 1.2346763322061476 + }, + { + "step": 2611, + "lr": 0.0001, + "sup_loss": 1.2315768670526297 + }, + { + "step": 2612, + "lr": 0.0001, + "sup_loss": 1.2328104890937324 + }, + { + "step": 2613, + "lr": 0.0001, + "sup_loss": 1.2395546664918784 + }, + { + "step": 2614, + "lr": 0.0001, + "sup_loss": 1.3157920437476391 + }, + { + "step": 2615, + "lr": 0.0001, + "sup_loss": 1.2821383600093852 + }, + { + "step": 2616, + "lr": 0.0001, + "sup_loss": 1.254724066414531 + }, + { + "step": 2617, + "lr": 0.0001, + "sup_loss": 1.2765015094135175 + }, + { + "step": 2618, + "lr": 0.0001, + "sup_loss": 1.2221009917192485 + }, + { + "step": 2619, + "lr": 0.0001, + "sup_loss": 1.1918406846055212 + }, + { + "step": 2620, + "lr": 0.0001, + "sup_loss": 1.199919375046413 + }, + { + "step": 2621, + "lr": 0.0001, + "sup_loss": 1.2121639153892665 + }, + { + "step": 2622, + "lr": 0.0001, + "sup_loss": 1.2493638022090487 + }, + { + "step": 2623, + "lr": 0.0001, + "sup_loss": 1.269026483053199 + }, + { + "step": 2624, + "lr": 0.0001, + "sup_loss": 1.1964097100302515 + }, + { + "step": 2625, + "lr": 0.0001, + "sup_loss": 1.227426960907999 + }, + { + "step": 2626, + "lr": 0.0001, + "sup_loss": 1.239799090813128 + }, + { + "step": 2627, + "lr": 0.0001, + "sup_loss": 1.215106039608587 + }, + { + "step": 2628, + "lr": 0.0001, + "sup_loss": 1.2362285599636007 + }, + { + "step": 2629, + "lr": 0.0001, + "sup_loss": 1.2645066298795702 + }, + { + "step": 2630, + "lr": 0.0001, + "sup_loss": 1.2202733650143411 + }, + { + "step": 2631, + "lr": 0.0001, + "sup_loss": 1.193141760689098 + }, + { + "step": 2632, + "lr": 0.0001, + "sup_loss": 1.2199125824204022 + }, + { + "step": 2633, + "lr": 0.0001, + "sup_loss": 1.2497159378308464 + }, + { + "step": 2634, + "lr": 0.0001, + "sup_loss": 1.2403857720403721 + }, + { + "step": 2635, + "lr": 0.0001, + "sup_loss": 1.2408493330085664 + }, + { + "step": 2636, + "lr": 0.0001, + "sup_loss": 1.2050154032179226 + }, + { + "step": 2637, + "lr": 0.0001, + "sup_loss": 1.2207398094125943 + }, + { + "step": 2638, + "lr": 0.0001, + "sup_loss": 1.2335285524949073 + }, + { + "step": 2639, + "lr": 0.0001, + "sup_loss": 1.2272963920441922 + }, + { + "step": 2640, + "lr": 0.0001, + "sup_loss": 1.2512620944172685 + }, + { + "step": 2641, + "lr": 0.0001, + "sup_loss": 1.2681297024570186 + }, + { + "step": 2642, + "lr": 0.0001, + "sup_loss": 1.2275861884123622 + }, + { + "step": 2643, + "lr": 0.0001, + "sup_loss": 1.2321183409350265 + }, + { + "step": 2644, + "lr": 0.0001, + "sup_loss": 1.233677161945556 + }, + { + "step": 2645, + "lr": 0.0001, + "sup_loss": 1.24285375712624 + }, + { + "step": 2646, + "lr": 0.0001, + "sup_loss": 1.229616931781458 + }, + { + "step": 2647, + "lr": 0.0001, + "sup_loss": 1.2251907950513785 + }, + { + "step": 2648, + "lr": 0.0001, + "sup_loss": 1.2131537689713905 + }, + { + "step": 2649, + "lr": 0.0001, + "sup_loss": 1.184480968859997 + }, + { + "step": 2650, + "lr": 0.0001, + "sup_loss": 1.2991780523945282, + "lyap1_mean": -4.168248176574707, + "lyap1_max": -4.102604866027832, + "lyap_spec_mean": [ + -4.168248176574707, + -4.166050910949707 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2651, + "lr": 0.0001, + "sup_loss": 1.2142627714607295 + }, + { + "step": 2652, + "lr": 0.0001, + "sup_loss": 1.2013653125026322 + }, + { + "step": 2653, + "lr": 0.0001, + "sup_loss": 1.2262574699294047 + }, + { + "step": 2654, + "lr": 0.0001, + "sup_loss": 1.2792890708553761 + }, + { + "step": 2655, + "lr": 0.0001, + "sup_loss": 1.2300058791782786 + }, + { + "step": 2656, + "lr": 0.0001, + "sup_loss": 1.2325860259605779 + }, + { + "step": 2657, + "lr": 0.0001, + "sup_loss": 1.2696026453761364 + }, + { + "step": 2658, + "lr": 0.0001, + "sup_loss": 1.25776689775061 + }, + { + "step": 2659, + "lr": 0.0001, + "sup_loss": 1.2688541152812087 + }, + { + "step": 2660, + "lr": 0.0001, + "sup_loss": 1.2317321669170054 + }, + { + "step": 2661, + "lr": 0.0001, + "sup_loss": 1.2573901181549034 + }, + { + "step": 2662, + "lr": 0.0001, + "sup_loss": 1.2595307405289533 + }, + { + "step": 2663, + "lr": 0.0001, + "sup_loss": 1.2617900769054098 + }, + { + "step": 2664, + "lr": 0.0001, + "sup_loss": 1.25702116125521 + }, + { + "step": 2665, + "lr": 0.0001, + "sup_loss": 1.2479849497406827 + }, + { + "step": 2666, + "lr": 0.0001, + "sup_loss": 1.2364926991864176 + }, + { + "step": 2667, + "lr": 0.0001, + "sup_loss": 1.2634144619841137 + }, + { + "step": 2668, + "lr": 0.0001, + "sup_loss": 1.2155289446713848 + }, + { + "step": 2669, + "lr": 0.0001, + "sup_loss": 1.210210627144147 + }, + { + "step": 2670, + "lr": 0.0001, + "sup_loss": 1.2645819816297246 + }, + { + "step": 2671, + "lr": 0.0001, + "sup_loss": 1.1652595961388679 + }, + { + "step": 2672, + "lr": 0.0001, + "sup_loss": 1.2211857736000638 + }, + { + "step": 2673, + "lr": 0.0001, + "sup_loss": 1.1928454166637739 + }, + { + "step": 2674, + "lr": 0.0001, + "sup_loss": 1.2192903549810428 + }, + { + "step": 2675, + "lr": 0.0001, + "sup_loss": 1.2613312822898783 + }, + { + "step": 2676, + "lr": 0.0001, + "sup_loss": 1.2030221459941097 + }, + { + "step": 2677, + "lr": 0.0001, + "sup_loss": 1.1617538199626793 + }, + { + "step": 2678, + "lr": 0.0001, + "sup_loss": 1.2262931189792678 + }, + { + "step": 2679, + "lr": 0.0001, + "sup_loss": 1.2605967333344523 + }, + { + "step": 2680, + "lr": 0.0001, + "sup_loss": 1.1678725193088018 + }, + { + "step": 2681, + "lr": 0.0001, + "sup_loss": 1.238400743383536 + }, + { + "step": 2682, + "lr": 0.0001, + "sup_loss": 1.261504046919922 + }, + { + "step": 2683, + "lr": 0.0001, + "sup_loss": 1.2479996616430884 + }, + { + "step": 2684, + "lr": 0.0001, + "sup_loss": 1.2452577420293118 + }, + { + "step": 2685, + "lr": 0.0001, + "sup_loss": 1.1630908441120917 + }, + { + "step": 2686, + "lr": 0.0001, + "sup_loss": 1.236884288510307 + }, + { + "step": 2687, + "lr": 0.0001, + "sup_loss": 1.257547052127402 + }, + { + "step": 2688, + "lr": 0.0001, + "sup_loss": 1.2389862907354807 + }, + { + "step": 2689, + "lr": 0.0001, + "sup_loss": 1.210508130840378 + }, + { + "step": 2690, + "lr": 0.0001, + "sup_loss": 1.2146456800665766 + }, + { + "step": 2691, + "lr": 0.0001, + "sup_loss": 1.1833097620584363 + }, + { + "step": 2692, + "lr": 0.0001, + "sup_loss": 1.2303068127169816 + }, + { + "step": 2693, + "lr": 0.0001, + "sup_loss": 1.2233629934785821 + }, + { + "step": 2694, + "lr": 0.0001, + "sup_loss": 1.214829726705529 + }, + { + "step": 2695, + "lr": 0.0001, + "sup_loss": 1.1909151715528141 + }, + { + "step": 2696, + "lr": 0.0001, + "sup_loss": 1.2196554054468436 + }, + { + "step": 2697, + "lr": 0.0001, + "sup_loss": 1.2453942550271437 + }, + { + "step": 2698, + "lr": 0.0001, + "sup_loss": 1.209229778147698 + }, + { + "step": 2699, + "lr": 0.0001, + "sup_loss": 1.1766958675207015 + }, + { + "step": 2700, + "lr": 0.0001, + "sup_loss": 1.2198236073581505, + "lyap1_mean": -4.145177364349365, + "lyap1_max": -4.0913896560668945, + "lyap_spec_mean": [ + -4.145176887512207, + -4.136057376861572 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2701, + "lr": 0.0001, + "sup_loss": 1.2880146954173164 + }, + { + "step": 2702, + "lr": 0.0001, + "sup_loss": 1.176585073018046 + }, + { + "step": 2703, + "lr": 0.0001, + "sup_loss": 1.2597035952353128 + }, + { + "step": 2704, + "lr": 0.0001, + "sup_loss": 1.1913034085610907 + }, + { + "step": 2705, + "lr": 0.0001, + "sup_loss": 1.2189204630389974 + }, + { + "step": 2706, + "lr": 0.0001, + "sup_loss": 1.2958042206729246 + }, + { + "step": 2707, + "lr": 0.0001, + "sup_loss": 1.2179067928281493 + }, + { + "step": 2708, + "lr": 0.0001, + "sup_loss": 1.1740395520447249 + }, + { + "step": 2709, + "lr": 0.0001, + "sup_loss": 1.2405722413173224 + }, + { + "step": 2710, + "lr": 0.0001, + "sup_loss": 1.2582204092738973 + }, + { + "step": 2711, + "lr": 0.0001, + "sup_loss": 1.185800269719854 + }, + { + "step": 2712, + "lr": 0.0001, + "sup_loss": 1.2425739850163413 + }, + { + "step": 2713, + "lr": 0.0001, + "sup_loss": 1.2494519796577779 + }, + { + "step": 2714, + "lr": 0.0001, + "sup_loss": 1.1771091277437487 + }, + { + "step": 2715, + "lr": 0.0001, + "sup_loss": 1.23497752042563 + }, + { + "step": 2716, + "lr": 0.0001, + "sup_loss": 1.1898585780165514 + }, + { + "step": 2717, + "lr": 0.0001, + "sup_loss": 1.1633061664767241 + }, + { + "step": 2718, + "lr": 0.0001, + "sup_loss": 1.1796349814599567 + }, + { + "step": 2719, + "lr": 0.0001, + "sup_loss": 1.2305618166506958 + }, + { + "step": 2720, + "lr": 0.0001, + "sup_loss": 1.1893667840347706 + }, + { + "step": 2721, + "lr": 0.0001, + "sup_loss": 1.2037491957803592 + }, + { + "step": 2722, + "lr": 0.0001, + "sup_loss": 1.2198712561133827 + }, + { + "step": 2723, + "lr": 0.0001, + "sup_loss": 1.2329404923248257 + }, + { + "step": 2724, + "lr": 0.0001, + "sup_loss": 1.2189166656131465 + }, + { + "step": 2725, + "lr": 0.0001, + "sup_loss": 1.1933895007474031 + }, + { + "step": 2726, + "lr": 0.0001, + "sup_loss": 1.2004903308178951 + }, + { + "step": 2727, + "lr": 0.0001, + "sup_loss": 1.2152784105541112 + }, + { + "step": 2728, + "lr": 0.0001, + "sup_loss": 1.2430411043509662 + }, + { + "step": 2729, + "lr": 0.0001, + "sup_loss": 1.2199430103046758 + }, + { + "step": 2730, + "lr": 0.0001, + "sup_loss": 1.1434355247339236 + }, + { + "step": 2731, + "lr": 0.0001, + "sup_loss": 1.1955526919386807 + }, + { + "step": 2732, + "lr": 0.0001, + "sup_loss": 1.209411240231424 + }, + { + "step": 2733, + "lr": 0.0001, + "sup_loss": 1.1610627539057328 + }, + { + "step": 2734, + "lr": 0.0001, + "sup_loss": 1.265616125904369 + }, + { + "step": 2735, + "lr": 0.0001, + "sup_loss": 1.233067209210319 + }, + { + "step": 2736, + "lr": 0.0001, + "sup_loss": 1.1349202049704068 + }, + { + "step": 2737, + "lr": 0.0001, + "sup_loss": 1.243318356390707 + }, + { + "step": 2738, + "lr": 0.0001, + "sup_loss": 1.2091210087374296 + }, + { + "step": 2739, + "lr": 0.0001, + "sup_loss": 1.204178753701558 + }, + { + "step": 2740, + "lr": 0.0001, + "sup_loss": 1.2301383137613766 + }, + { + "step": 2741, + "lr": 0.0001, + "sup_loss": 1.194941001323939 + }, + { + "step": 2742, + "lr": 0.0001, + "sup_loss": 1.2801947029592953 + }, + { + "step": 2743, + "lr": 0.0001, + "sup_loss": 1.2673226855160413 + }, + { + "step": 2744, + "lr": 0.0001, + "sup_loss": 1.1574369637623212 + }, + { + "step": 2745, + "lr": 0.0001, + "sup_loss": 1.2059445530420552 + }, + { + "step": 2746, + "lr": 0.0001, + "sup_loss": 1.1969309760339402 + }, + { + "step": 2747, + "lr": 0.0001, + "sup_loss": 1.2148660666052427 + }, + { + "step": 2748, + "lr": 0.0001, + "sup_loss": 1.2679244712600848 + }, + { + "step": 2749, + "lr": 0.0001, + "sup_loss": 1.2099251915517164 + }, + { + "step": 2750, + "lr": 0.0001, + "sup_loss": 1.2394785717424335, + "lyap1_mean": -4.143750190734863, + "lyap1_max": -4.113030433654785, + "lyap_spec_mean": [ + -4.143750190734863, + -4.121489524841309 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2751, + "lr": 0.0001, + "sup_loss": 1.284863019727325 + }, + { + "step": 2752, + "lr": 0.0001, + "sup_loss": 1.233550625109153 + }, + { + "step": 2753, + "lr": 0.0001, + "sup_loss": 1.2208482321052476 + }, + { + "step": 2754, + "lr": 0.0001, + "sup_loss": 1.2380220206520889 + }, + { + "step": 2755, + "lr": 0.0001, + "sup_loss": 1.2261492601117994 + }, + { + "step": 2756, + "lr": 0.0001, + "sup_loss": 1.2237059238576027 + }, + { + "step": 2757, + "lr": 0.0001, + "sup_loss": 1.2285876218270264 + }, + { + "step": 2758, + "lr": 0.0001, + "sup_loss": 1.2275381953624167 + }, + { + "step": 2759, + "lr": 0.0001, + "sup_loss": 1.2361166117305022 + }, + { + "step": 2760, + "lr": 0.0001, + "sup_loss": 1.1846329634607293 + }, + { + "step": 2761, + "lr": 0.0001, + "sup_loss": 1.2236013369391563 + }, + { + "step": 2762, + "lr": 0.0001, + "sup_loss": 1.2273579297738417 + }, + { + "step": 2763, + "lr": 0.0001, + "sup_loss": 1.2187209130458858 + }, + { + "step": 2764, + "lr": 0.0001, + "sup_loss": 1.1774631009643235 + }, + { + "step": 2765, + "lr": 0.0001, + "sup_loss": 1.2236323995349703 + }, + { + "step": 2766, + "lr": 0.0001, + "sup_loss": 1.1818180680383588 + }, + { + "step": 2767, + "lr": 0.0001, + "sup_loss": 1.2034424250363023 + }, + { + "step": 2768, + "lr": 0.0001, + "sup_loss": 1.18371561688779 + }, + { + "step": 2769, + "lr": 0.0001, + "sup_loss": 1.2024557460047902 + }, + { + "step": 2770, + "lr": 0.0001, + "sup_loss": 1.1986742839325801 + }, + { + "step": 2771, + "lr": 0.0001, + "sup_loss": 1.1790561146746892 + }, + { + "step": 2772, + "lr": 0.0001, + "sup_loss": 1.2084146659008368 + }, + { + "step": 2773, + "lr": 0.0001, + "sup_loss": 1.1759226899441422 + }, + { + "step": 2774, + "lr": 0.0001, + "sup_loss": 1.1524661896672563 + }, + { + "step": 2775, + "lr": 0.0001, + "sup_loss": 1.2029550253797614 + }, + { + "step": 2776, + "lr": 0.0001, + "sup_loss": 1.2122894439884144 + }, + { + "step": 2777, + "lr": 0.0001, + "sup_loss": 1.1750197155974214 + }, + { + "step": 2778, + "lr": 0.0001, + "sup_loss": 1.2057003453425166 + }, + { + "step": 2779, + "lr": 0.0001, + "sup_loss": 1.1974536913271807 + }, + { + "step": 2780, + "lr": 0.0001, + "sup_loss": 1.1967415293632362 + }, + { + "step": 2781, + "lr": 0.0001, + "sup_loss": 1.2473728593308882 + }, + { + "step": 2782, + "lr": 0.0001, + "sup_loss": 1.20178185044411 + }, + { + "step": 2783, + "lr": 0.0001, + "sup_loss": 1.1928275692126933 + }, + { + "step": 2784, + "lr": 0.0001, + "sup_loss": 1.1893599898107874 + }, + { + "step": 2785, + "lr": 0.0001, + "sup_loss": 1.1873089050891608 + }, + { + "step": 2786, + "lr": 0.0001, + "sup_loss": 1.2415463692069302 + }, + { + "step": 2787, + "lr": 0.0001, + "sup_loss": 1.2373799716808946 + }, + { + "step": 2788, + "lr": 0.0001, + "sup_loss": 1.1974427804739871 + }, + { + "step": 2789, + "lr": 0.0001, + "sup_loss": 1.1886924107918198 + }, + { + "step": 2790, + "lr": 0.0001, + "sup_loss": 1.1527571170396032 + }, + { + "step": 2791, + "lr": 0.0001, + "sup_loss": 1.1912547398088777 + }, + { + "step": 2792, + "lr": 0.0001, + "sup_loss": 1.2329055287117536 + }, + { + "step": 2793, + "lr": 0.0001, + "sup_loss": 1.2618365797929392 + }, + { + "step": 2794, + "lr": 0.0001, + "sup_loss": 1.1751907376176824 + }, + { + "step": 2795, + "lr": 0.0001, + "sup_loss": 1.1724260160125624 + }, + { + "step": 2796, + "lr": 0.0001, + "sup_loss": 1.2392439352573679 + }, + { + "step": 2797, + "lr": 0.0001, + "sup_loss": 1.209633508685113 + }, + { + "step": 2798, + "lr": 0.0001, + "sup_loss": 1.2019066963454212 + }, + { + "step": 2799, + "lr": 0.0001, + "sup_loss": 1.2139447758952797 + }, + { + "step": 2800, + "lr": 0.0001, + "sup_loss": 1.1932710711203705, + "lyap1_mean": -4.098666667938232, + "lyap1_max": -4.023406505584717, + "lyap_spec_mean": [ + -4.098666667938232, + -4.096119403839111 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2801, + "lr": 0.0001, + "sup_loss": 1.1992359788428548 + }, + { + "step": 2802, + "lr": 0.0001, + "sup_loss": 1.224443586595954 + }, + { + "step": 2803, + "lr": 0.0001, + "sup_loss": 1.1886977014484197 + }, + { + "step": 2804, + "lr": 0.0001, + "sup_loss": 1.2124679468171327 + }, + { + "step": 2805, + "lr": 0.0001, + "sup_loss": 1.226693078449122 + }, + { + "step": 2806, + "lr": 0.0001, + "sup_loss": 1.1944266377460955 + }, + { + "step": 2807, + "lr": 0.0001, + "sup_loss": 1.192333851939071 + }, + { + "step": 2808, + "lr": 0.0001, + "sup_loss": 1.2353895003405737 + }, + { + "step": 2809, + "lr": 0.0001, + "sup_loss": 1.2131763642554134 + }, + { + "step": 2810, + "lr": 0.0001, + "sup_loss": 1.1203792874720453 + }, + { + "step": 2811, + "lr": 0.0001, + "sup_loss": 1.18498464537838 + }, + { + "step": 2812, + "lr": 0.0001, + "sup_loss": 1.207462564988826 + }, + { + "step": 2813, + "lr": 0.0001, + "sup_loss": 1.2474337865979834 + }, + { + "step": 2814, + "lr": 0.0001, + "sup_loss": 1.1924465453378905 + }, + { + "step": 2815, + "lr": 0.0001, + "sup_loss": 1.2341939677174314 + }, + { + "step": 2816, + "lr": 0.0001, + "sup_loss": 1.204472869145486 + }, + { + "step": 2817, + "lr": 0.0001, + "sup_loss": 1.1703876157210666 + }, + { + "step": 2818, + "lr": 0.0001, + "sup_loss": 1.2040041039826734 + }, + { + "step": 2819, + "lr": 0.0001, + "sup_loss": 1.174052402058899 + }, + { + "step": 2820, + "lr": 0.0001, + "sup_loss": 1.1424810811321462 + }, + { + "step": 2821, + "lr": 0.0001, + "sup_loss": 1.1688515694095583 + }, + { + "step": 2822, + "lr": 0.0001, + "sup_loss": 1.1887108821416759 + }, + { + "step": 2823, + "lr": 0.0001, + "sup_loss": 1.1892404297536572 + }, + { + "step": 2824, + "lr": 0.0001, + "sup_loss": 1.2046011036775555 + }, + { + "step": 2825, + "lr": 0.0001, + "sup_loss": 1.1858145615309514 + }, + { + "step": 2826, + "lr": 0.0001, + "sup_loss": 1.1897495943902472 + }, + { + "step": 2827, + "lr": 0.0001, + "sup_loss": 1.1883187262599395 + }, + { + "step": 2828, + "lr": 0.0001, + "sup_loss": 1.2334616058181576 + }, + { + "step": 2829, + "lr": 0.0001, + "sup_loss": 1.2340482098484336 + }, + { + "step": 2830, + "lr": 0.0001, + "sup_loss": 1.262314854804658 + }, + { + "step": 2831, + "lr": 0.0001, + "sup_loss": 1.1894816058362065 + }, + { + "step": 2832, + "lr": 0.0001, + "sup_loss": 1.1948407795911669 + }, + { + "step": 2833, + "lr": 0.0001, + "sup_loss": 1.1159364647106889 + }, + { + "step": 2834, + "lr": 0.0001, + "sup_loss": 1.2129146843921843 + }, + { + "step": 2835, + "lr": 0.0001, + "sup_loss": 1.2269511721615434 + }, + { + "step": 2836, + "lr": 0.0001, + "sup_loss": 1.2122383039764555 + }, + { + "step": 2837, + "lr": 0.0001, + "sup_loss": 1.1458994949785084 + }, + { + "step": 2838, + "lr": 0.0001, + "sup_loss": 1.2040875109310762 + }, + { + "step": 2839, + "lr": 0.0001, + "sup_loss": 1.20474349329665 + }, + { + "step": 2840, + "lr": 0.0001, + "sup_loss": 1.1394442555182653 + }, + { + "step": 2841, + "lr": 0.0001, + "sup_loss": 1.1840842843101966 + }, + { + "step": 2842, + "lr": 0.0001, + "sup_loss": 1.141838865386241 + }, + { + "step": 2843, + "lr": 0.0001, + "sup_loss": 1.16604813166901 + }, + { + "step": 2844, + "lr": 0.0001, + "sup_loss": 1.155479261348026 + }, + { + "step": 2845, + "lr": 0.0001, + "sup_loss": 1.1899330566387052 + }, + { + "step": 2846, + "lr": 0.0001, + "sup_loss": 1.2027384784506403 + }, + { + "step": 2847, + "lr": 0.0001, + "sup_loss": 1.1849872937838002 + }, + { + "step": 2848, + "lr": 0.0001, + "sup_loss": 1.1241586801436854 + }, + { + "step": 2849, + "lr": 0.0001, + "sup_loss": 1.2227660670211045 + }, + { + "step": 2850, + "lr": 0.0001, + "sup_loss": 1.1946299294714526, + "lyap1_mean": -4.078210353851318, + "lyap1_max": -4.034915924072266, + "lyap_spec_mean": [ + -4.07820987701416, + -4.045197010040283 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2851, + "lr": 0.0001, + "sup_loss": 1.2158997966725866 + }, + { + "step": 2852, + "lr": 0.0001, + "sup_loss": 1.1656405885988264 + }, + { + "step": 2853, + "lr": 0.0001, + "sup_loss": 1.155922532205952 + }, + { + "step": 2854, + "lr": 0.0001, + "sup_loss": 1.1890076736471467 + }, + { + "step": 2855, + "lr": 0.0001, + "sup_loss": 1.1692176886947816 + }, + { + "step": 2856, + "lr": 0.0001, + "sup_loss": 1.214723730707917 + }, + { + "step": 2857, + "lr": 0.0001, + "sup_loss": 1.1489284975809753 + }, + { + "step": 2858, + "lr": 0.0001, + "sup_loss": 1.1874928221748196 + }, + { + "step": 2859, + "lr": 0.0001, + "sup_loss": 1.215204873387386 + }, + { + "step": 2860, + "lr": 0.0001, + "sup_loss": 1.1612307816047192 + }, + { + "step": 2861, + "lr": 0.0001, + "sup_loss": 1.1891047610043515 + }, + { + "step": 2862, + "lr": 0.0001, + "sup_loss": 1.2012892688969983 + }, + { + "step": 2863, + "lr": 0.0001, + "sup_loss": 1.1426133608447897 + }, + { + "step": 2864, + "lr": 0.0001, + "sup_loss": 1.1617679128780294 + }, + { + "step": 2865, + "lr": 0.0001, + "sup_loss": 1.1408589656897925 + }, + { + "step": 2866, + "lr": 0.0001, + "sup_loss": 1.146708760416167 + }, + { + "step": 2867, + "lr": 0.0001, + "sup_loss": 1.1684592155412288 + }, + { + "step": 2868, + "lr": 0.0001, + "sup_loss": 1.1647948119819325 + }, + { + "step": 2869, + "lr": 0.0001, + "sup_loss": 1.1751339515984083 + }, + { + "step": 2870, + "lr": 0.0001, + "sup_loss": 1.1939843035738116 + }, + { + "step": 2871, + "lr": 0.0001, + "sup_loss": 1.152120812532805 + }, + { + "step": 2872, + "lr": 0.0001, + "sup_loss": 1.1790699006620267 + }, + { + "step": 2873, + "lr": 0.0001, + "sup_loss": 1.1816362228296602 + }, + { + "step": 2874, + "lr": 0.0001, + "sup_loss": 1.154230955305328 + }, + { + "step": 2875, + "lr": 0.0001, + "sup_loss": 1.1869670145034565 + }, + { + "step": 2876, + "lr": 0.0001, + "sup_loss": 1.198217257954748 + }, + { + "step": 2877, + "lr": 0.0001, + "sup_loss": 1.178907155494454 + }, + { + "step": 2878, + "lr": 0.0001, + "sup_loss": 1.200648403269875 + }, + { + "step": 2879, + "lr": 0.0001, + "sup_loss": 1.1860254880272565 + }, + { + "step": 2880, + "lr": 0.0001, + "sup_loss": 1.1733465436563406 + }, + { + "step": 2881, + "lr": 0.0001, + "sup_loss": 1.096949323573581 + }, + { + "step": 2882, + "lr": 0.0001, + "sup_loss": 1.1242257301280816 + }, + { + "step": 2883, + "lr": 0.0001, + "sup_loss": 1.1987406168920336 + }, + { + "step": 2884, + "lr": 0.0001, + "sup_loss": 1.1899407768330346 + }, + { + "step": 2885, + "lr": 0.0001, + "sup_loss": 1.1901822490716116 + }, + { + "step": 2886, + "lr": 0.0001, + "sup_loss": 1.1889308342205704 + }, + { + "step": 2887, + "lr": 0.0001, + "sup_loss": 1.207253840898637 + }, + { + "step": 2888, + "lr": 0.0001, + "sup_loss": 1.0427842523974111 + }, + { + "step": 2889, + "lr": 0.0001, + "sup_loss": 1.252588670479281 + }, + { + "step": 2890, + "lr": 0.0001, + "sup_loss": 1.18839651161764 + }, + { + "step": 2891, + "lr": 0.0001, + "sup_loss": 1.132269866290512 + }, + { + "step": 2892, + "lr": 0.0001, + "sup_loss": 1.1798659901783823 + }, + { + "step": 2893, + "lr": 0.0001, + "sup_loss": 1.1298951990330095 + }, + { + "step": 2894, + "lr": 0.0001, + "sup_loss": 1.1971495758809165 + }, + { + "step": 2895, + "lr": 0.0001, + "sup_loss": 1.1781393181666764 + }, + { + "step": 2896, + "lr": 0.0001, + "sup_loss": 1.200866593014584 + }, + { + "step": 2897, + "lr": 0.0001, + "sup_loss": 1.2609272796460058 + }, + { + "step": 2898, + "lr": 0.0001, + "sup_loss": 1.1340103707874907 + }, + { + "step": 2899, + "lr": 0.0001, + "sup_loss": 1.1758957142742377 + }, + { + "step": 2900, + "lr": 0.0001, + "sup_loss": 1.2324421750710768, + "lyap1_mean": -4.075942039489746, + "lyap1_max": -4.031743049621582, + "lyap_spec_mean": [ + -4.075942039489746, + -4.056901931762695 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2901, + "lr": 0.0001, + "sup_loss": 1.2187291516481937 + }, + { + "step": 2902, + "lr": 0.0001, + "sup_loss": 1.2261938936775971 + }, + { + "step": 2903, + "lr": 0.0001, + "sup_loss": 1.1872528217774525 + }, + { + "step": 2904, + "lr": 0.0001, + "sup_loss": 1.188666017764403 + }, + { + "step": 2905, + "lr": 0.0001, + "sup_loss": 1.1238364290538336 + }, + { + "step": 2906, + "lr": 0.0001, + "sup_loss": 1.1120989900587106 + }, + { + "step": 2907, + "lr": 0.0001, + "sup_loss": 1.2277774401355726 + }, + { + "step": 2908, + "lr": 0.0001, + "sup_loss": 1.1857180597484058 + }, + { + "step": 2909, + "lr": 0.0001, + "sup_loss": 1.1795244650958037 + }, + { + "step": 2910, + "lr": 0.0001, + "sup_loss": 1.1669530044039547 + }, + { + "step": 2911, + "lr": 0.0001, + "sup_loss": 1.1706363013080479 + }, + { + "step": 2912, + "lr": 0.0001, + "sup_loss": 1.100990712193374 + }, + { + "step": 2913, + "lr": 0.0001, + "sup_loss": 1.1588804353463202 + }, + { + "step": 2914, + "lr": 0.0001, + "sup_loss": 1.2126128018682034 + }, + { + "step": 2915, + "lr": 0.0001, + "sup_loss": 1.1778418880363921 + }, + { + "step": 2916, + "lr": 0.0001, + "sup_loss": 1.155806804963178 + }, + { + "step": 2917, + "lr": 0.0001, + "sup_loss": 1.1762411167879385 + }, + { + "step": 2918, + "lr": 0.0001, + "sup_loss": 1.1179291698849965 + }, + { + "step": 2919, + "lr": 0.0001, + "sup_loss": 1.170109204444336 + }, + { + "step": 2920, + "lr": 0.0001, + "sup_loss": 1.1981055816241923 + }, + { + "step": 2921, + "lr": 0.0001, + "sup_loss": 1.17888707374863 + }, + { + "step": 2922, + "lr": 0.0001, + "sup_loss": 1.2015338757936609 + }, + { + "step": 2923, + "lr": 0.0001, + "sup_loss": 1.1444368246317362 + }, + { + "step": 2924, + "lr": 0.0001, + "sup_loss": 1.150511402385754 + }, + { + "step": 2925, + "lr": 0.0001, + "sup_loss": 1.1897001767833921 + }, + { + "step": 2926, + "lr": 0.0001, + "sup_loss": 1.197184738845019 + }, + { + "step": 2927, + "lr": 0.0001, + "sup_loss": 1.1928740911563882 + }, + { + "step": 2928, + "lr": 0.0001, + "sup_loss": 1.1444655352074873 + }, + { + "step": 2929, + "lr": 0.0001, + "sup_loss": 1.1625666361192932 + }, + { + "step": 2930, + "lr": 0.0001, + "sup_loss": 1.1688287682219136 + }, + { + "step": 2931, + "lr": 0.0001, + "sup_loss": 1.1409571925999333 + }, + { + "step": 2932, + "lr": 0.0001, + "sup_loss": 1.1979738588901685 + }, + { + "step": 2933, + "lr": 0.0001, + "sup_loss": 1.136123197871838 + }, + { + "step": 2934, + "lr": 0.0001, + "sup_loss": 1.2242667976317398 + }, + { + "step": 2935, + "lr": 0.0001, + "sup_loss": 1.173073780174113 + }, + { + "step": 2936, + "lr": 0.0001, + "sup_loss": 1.157782913928542 + }, + { + "step": 2937, + "lr": 0.0001, + "sup_loss": 1.2040865487659438 + }, + { + "step": 2938, + "lr": 0.0001, + "sup_loss": 1.1856106809552456 + }, + { + "step": 2939, + "lr": 0.0001, + "sup_loss": 1.1675539629425669 + }, + { + "step": 2940, + "lr": 0.0001, + "sup_loss": 1.1483279284979238 + }, + { + "step": 2941, + "lr": 0.0001, + "sup_loss": 1.1564708697313058 + }, + { + "step": 2942, + "lr": 0.0001, + "sup_loss": 1.2134473587962367 + }, + { + "step": 2943, + "lr": 0.0001, + "sup_loss": 1.1816269042600522 + }, + { + "step": 2944, + "lr": 0.0001, + "sup_loss": 1.1426613519069986 + }, + { + "step": 2945, + "lr": 0.0001, + "sup_loss": 1.1792052165579912 + }, + { + "step": 2946, + "lr": 0.0001, + "sup_loss": 1.167843994471431 + }, + { + "step": 2947, + "lr": 0.0001, + "sup_loss": 1.183089975029637 + }, + { + "step": 2948, + "lr": 0.0001, + "sup_loss": 1.170864852254815 + }, + { + "step": 2949, + "lr": 0.0001, + "sup_loss": 1.1590587537602042 + }, + { + "step": 2950, + "lr": 0.0001, + "sup_loss": 1.1370805382976412, + "lyap1_mean": -4.0443925857543945, + "lyap1_max": -3.981407642364502, + "lyap_spec_mean": [ + -4.0443925857543945, + -4.064511299133301 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 2951, + "lr": 0.0001, + "sup_loss": 1.1491813169557314 + }, + { + "step": 2952, + "lr": 0.0001, + "sup_loss": 1.1615139663771925 + }, + { + "step": 2953, + "lr": 0.0001, + "sup_loss": 1.1525536184712686 + }, + { + "step": 2954, + "lr": 0.0001, + "sup_loss": 1.1680880258820185 + }, + { + "step": 2955, + "lr": 0.0001, + "sup_loss": 1.2009458917465787 + }, + { + "step": 2956, + "lr": 0.0001, + "sup_loss": 1.1734697854098128 + }, + { + "step": 2957, + "lr": 0.0001, + "sup_loss": 1.1449008867426098 + }, + { + "step": 2958, + "lr": 0.0001, + "sup_loss": 1.1457298844734047 + }, + { + "step": 2959, + "lr": 0.0001, + "sup_loss": 1.148402188629566 + }, + { + "step": 2960, + "lr": 0.0001, + "sup_loss": 1.147558309889396 + }, + { + "step": 2961, + "lr": 0.0001, + "sup_loss": 1.1788514939392485 + }, + { + "step": 2962, + "lr": 0.0001, + "sup_loss": 1.190776967578652 + }, + { + "step": 2963, + "lr": 0.0001, + "sup_loss": 1.1768656677437581 + }, + { + "step": 2964, + "lr": 0.0001, + "sup_loss": 1.196892056701697 + }, + { + "step": 2965, + "lr": 0.0001, + "sup_loss": 1.1571320471255018 + }, + { + "step": 2966, + "lr": 0.0001, + "sup_loss": 1.1264738882019996 + }, + { + "step": 2967, + "lr": 0.0001, + "sup_loss": 1.1526100889005635 + }, + { + "step": 2968, + "lr": 0.0001, + "sup_loss": 1.163735278966188 + }, + { + "step": 2969, + "lr": 0.0001, + "sup_loss": 1.2055233338188271 + }, + { + "step": 2970, + "lr": 0.0001, + "sup_loss": 1.1285352656067338 + }, + { + "step": 2971, + "lr": 0.0001, + "sup_loss": 1.1858702061614081 + }, + { + "step": 2972, + "lr": 0.0001, + "sup_loss": 1.1757114354599536 + }, + { + "step": 2973, + "lr": 0.0001, + "sup_loss": 1.1392801986571524 + }, + { + "step": 2974, + "lr": 0.0001, + "sup_loss": 1.1286656941304138 + }, + { + "step": 2975, + "lr": 0.0001, + "sup_loss": 1.158593817334657 + }, + { + "step": 2976, + "lr": 0.0001, + "sup_loss": 1.117415718219704 + }, + { + "step": 2977, + "lr": 0.0001, + "sup_loss": 1.1336880365905515 + }, + { + "step": 2978, + "lr": 0.0001, + "sup_loss": 1.1900292231288252 + }, + { + "step": 2979, + "lr": 0.0001, + "sup_loss": 1.1893053077115456 + }, + { + "step": 2980, + "lr": 0.0001, + "sup_loss": 1.1652481772555807 + }, + { + "step": 2981, + "lr": 0.0001, + "sup_loss": 1.174186948480565 + }, + { + "step": 2982, + "lr": 0.0001, + "sup_loss": 1.1269489485253894 + }, + { + "step": 2983, + "lr": 0.0001, + "sup_loss": 1.1743589914590937 + }, + { + "step": 2984, + "lr": 0.0001, + "sup_loss": 1.1596524248955662 + }, + { + "step": 2985, + "lr": 0.0001, + "sup_loss": 1.0697048282521715 + }, + { + "step": 2986, + "lr": 0.0001, + "sup_loss": 1.1886016550622456 + }, + { + "step": 2987, + "lr": 0.0001, + "sup_loss": 1.1834043850315474 + }, + { + "step": 2988, + "lr": 0.0001, + "sup_loss": 1.1675571433591352 + }, + { + "step": 2989, + "lr": 0.0001, + "sup_loss": 1.1720413201939002 + }, + { + "step": 2990, + "lr": 0.0001, + "sup_loss": 1.1959874962050194 + }, + { + "step": 2991, + "lr": 0.0001, + "sup_loss": 1.1215594680166399 + }, + { + "step": 2992, + "lr": 0.0001, + "sup_loss": 1.1600075908147822 + }, + { + "step": 2993, + "lr": 0.0001, + "sup_loss": 1.1364626191809135 + }, + { + "step": 2994, + "lr": 0.0001, + "sup_loss": 1.2316981120958004 + }, + { + "step": 2995, + "lr": 0.0001, + "sup_loss": 1.1565516832981928 + }, + { + "step": 2996, + "lr": 0.0001, + "sup_loss": 1.1648822632412152 + }, + { + "step": 2997, + "lr": 0.0001, + "sup_loss": 1.0539961582229516 + }, + { + "step": 2998, + "lr": 0.0001, + "sup_loss": 1.144950537758599 + }, + { + "step": 2999, + "lr": 0.0001, + "sup_loss": 1.1471636485820178 + } + ], + "evals": [ + { + "step": 0, + "acc": 0.0, + "tok_acc": 0.11108699845679013 + }, + { + "step": 300, + "acc": 0.0, + "tok_acc": 0.38797260802469136 + }, + { + "step": 600, + "acc": 0.0, + "tok_acc": 0.38915412808641975 + }, + { + "step": 900, + "acc": 0.0, + "tok_acc": 0.40340470679012347 + }, + { + "step": 1200, + "acc": 0.0, + "tok_acc": 0.4230324074074074 + }, + { + "step": 1500, + "acc": 0.0, + "tok_acc": 0.4381269290123457 + }, + { + "step": 1800, + "acc": 0.0, + "tok_acc": 0.45703125 + }, + { + "step": 2100, + "acc": 0.0, + "tok_acc": 0.47528452932098764 + }, + { + "step": 2400, + "acc": 0.0, + "tok_acc": 0.49129533179012347 + }, + { + "step": 2700, + "acc": 0.0, + "tok_acc": 0.5081018518518519 + }, + { + "step": 3000, + "acc": 0.0, + "tok_acc": 0.5225935570987654 + } + ], + "final_acc": 0.0, + "final_tok_acc": 0.5225935570987654 +}
\ No newline at end of file diff --git a/runs/srm_smoke_500_h256.json b/runs/srm_smoke_500_h256.json new file mode 100644 index 0000000..728ef05 --- /dev/null +++ b/runs/srm_smoke_500_h256.json @@ -0,0 +1,2712 @@ +{ + "args": { + "data_path": "/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000", + "n_steps": 500, + "batch_size": 8, + "lr": 0.0001, + "puzzle_emb_lr": 0.0001, + "warmup_steps": 100, + "weight_decay": 1.0, + "hidden_size": 256, + "n_iters": 6, + "n_aol_layers": 2, + "kappa": 0.9, + "eta": 1.0, + "alpha": 1.0, + "k_lyap": 2, + "lyap_iters": 4, + "lyap_every": 25, + "seed": 42, + "eval_every": 100, + "eval_n": 256, + "eval_batch_size": 32, + "out": "runs/srm_smoke_500_h256.json", + "save_ckpt": "" + }, + "n_params": 939314, + "initial_acc": 0.0, + "initial_tok_acc": 0.0, + "initial_lip": { + "lip_emp_mean": 0.1357038915157318, + "lip_emp_max": 0.17765390872955322, + "lip_emp_99p": 0.17537692189216614, + "lip_theoretical_bound": 0.9, + "passes_bound": true + }, + "steps": [ + { + "step": 0, + "lr": 0.0, + "sup_loss": 2.8284496844095326, + "lyap1_mean": -6.2286057472229, + "lyap1_max": -6.200310230255127, + "lyap_spec_mean": [ + -6.2286057472229, + -6.228340148925781 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1, + "lr": 1e-06, + "sup_loss": 2.838926445513094 + }, + { + "step": 2, + "lr": 2e-06, + "sup_loss": 2.834020958290193 + }, + { + "step": 3, + "lr": 3e-06, + "sup_loss": 2.8271419600543934 + }, + { + "step": 4, + "lr": 4e-06, + "sup_loss": 2.8336220039837565 + }, + { + "step": 5, + "lr": 5e-06, + "sup_loss": 2.847244138413309 + }, + { + "step": 6, + "lr": 6e-06, + "sup_loss": 2.8296374251845307 + }, + { + "step": 7, + "lr": 7e-06, + "sup_loss": 2.816413976469476 + }, + { + "step": 8, + "lr": 8e-06, + "sup_loss": 2.8221126965976864 + }, + { + "step": 9, + "lr": 9e-06, + "sup_loss": 2.8106848008287297 + }, + { + "step": 10, + "lr": 1e-05, + "sup_loss": 2.8203775573541052 + }, + { + "step": 11, + "lr": 1.1000000000000001e-05, + "sup_loss": 2.8181432855784427 + }, + { + "step": 12, + "lr": 1.2e-05, + "sup_loss": 2.8234647595703923 + }, + { + "step": 13, + "lr": 1.3000000000000001e-05, + "sup_loss": 2.819964976625202 + }, + { + "step": 14, + "lr": 1.4e-05, + "sup_loss": 2.777240835924918 + }, + { + "step": 15, + "lr": 1.5e-05, + "sup_loss": 2.796632874074624 + }, + { + "step": 16, + "lr": 1.6e-05, + "sup_loss": 2.80353484864027 + }, + { + "step": 17, + "lr": 1.7e-05, + "sup_loss": 2.7731115467511405 + }, + { + "step": 18, + "lr": 1.8e-05, + "sup_loss": 2.776553477441689 + }, + { + "step": 19, + "lr": 1.9e-05, + "sup_loss": 2.764454348935523 + }, + { + "step": 20, + "lr": 2e-05, + "sup_loss": 2.763660193795498 + }, + { + "step": 21, + "lr": 2.1000000000000002e-05, + "sup_loss": 2.7644818192133025 + }, + { + "step": 22, + "lr": 2.2000000000000003e-05, + "sup_loss": 2.720537518373678 + }, + { + "step": 23, + "lr": 2.3e-05, + "sup_loss": 2.7332803327454616 + }, + { + "step": 24, + "lr": 2.4e-05, + "sup_loss": 2.7274532837280776 + }, + { + "step": 25, + "lr": 2.5e-05, + "sup_loss": 2.7009521547955337, + "lyap1_mean": -6.240983009338379, + "lyap1_max": -6.22477388381958, + "lyap_spec_mean": [ + -6.240983009338379, + -6.240518093109131 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 26, + "lr": 2.6000000000000002e-05, + "sup_loss": 2.7118575888452168 + }, + { + "step": 27, + "lr": 2.7000000000000002e-05, + "sup_loss": 2.676175428796176 + }, + { + "step": 28, + "lr": 2.8e-05, + "sup_loss": 2.693807866858765 + }, + { + "step": 29, + "lr": 2.9000000000000004e-05, + "sup_loss": 2.682955672998549 + }, + { + "step": 30, + "lr": 3e-05, + "sup_loss": 2.654173913401533 + }, + { + "step": 31, + "lr": 3.1e-05, + "sup_loss": 2.6763871059258495 + }, + { + "step": 32, + "lr": 3.2e-05, + "sup_loss": 2.644394835134615 + }, + { + "step": 33, + "lr": 3.3e-05, + "sup_loss": 2.6299395433077652 + }, + { + "step": 34, + "lr": 3.4e-05, + "sup_loss": 2.6242866722739664 + }, + { + "step": 35, + "lr": 3.5000000000000004e-05, + "sup_loss": 2.616557927761839 + }, + { + "step": 36, + "lr": 3.6e-05, + "sup_loss": 2.5994089813723797 + }, + { + "step": 37, + "lr": 3.7000000000000005e-05, + "sup_loss": 2.5626432829550247 + }, + { + "step": 38, + "lr": 3.8e-05, + "sup_loss": 2.5606932168456327 + }, + { + "step": 39, + "lr": 3.9e-05, + "sup_loss": 2.546780039790928 + }, + { + "step": 40, + "lr": 4e-05, + "sup_loss": 2.520369392127516 + }, + { + "step": 41, + "lr": 4.1e-05, + "sup_loss": 2.5127611686076583 + }, + { + "step": 42, + "lr": 4.2000000000000004e-05, + "sup_loss": 2.5208038257044745 + }, + { + "step": 43, + "lr": 4.3e-05, + "sup_loss": 2.493714124425292 + }, + { + "step": 44, + "lr": 4.4000000000000006e-05, + "sup_loss": 2.480158985877274 + }, + { + "step": 45, + "lr": 4.5e-05, + "sup_loss": 2.4361940027334783 + }, + { + "step": 46, + "lr": 4.6e-05, + "sup_loss": 2.4460679490083947 + }, + { + "step": 47, + "lr": 4.7000000000000004e-05, + "sup_loss": 2.4495761120800132 + }, + { + "step": 48, + "lr": 4.8e-05, + "sup_loss": 2.4131285345159026 + }, + { + "step": 49, + "lr": 4.9e-05, + "sup_loss": 2.3999822324103657 + }, + { + "step": 50, + "lr": 5e-05, + "sup_loss": 2.4029195286821534, + "lyap1_mean": -6.2544169425964355, + "lyap1_max": -6.231961250305176, + "lyap_spec_mean": [ + -6.2544169425964355, + -6.252962112426758 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 51, + "lr": 5.1000000000000006e-05, + "sup_loss": 2.366847464160111 + }, + { + "step": 52, + "lr": 5.2000000000000004e-05, + "sup_loss": 2.33094637470472 + }, + { + "step": 53, + "lr": 5.3e-05, + "sup_loss": 2.329777112486255 + }, + { + "step": 54, + "lr": 5.4000000000000005e-05, + "sup_loss": 2.334095543782154 + }, + { + "step": 55, + "lr": 5.500000000000001e-05, + "sup_loss": 2.3051139076185523 + }, + { + "step": 56, + "lr": 5.6e-05, + "sup_loss": 2.3167502882455797 + }, + { + "step": 57, + "lr": 5.7e-05, + "sup_loss": 2.2903970820408137 + }, + { + "step": 58, + "lr": 5.800000000000001e-05, + "sup_loss": 2.274886117171231 + }, + { + "step": 59, + "lr": 5.9e-05, + "sup_loss": 2.2313597888611825 + }, + { + "step": 60, + "lr": 6e-05, + "sup_loss": 2.2339985174568273 + }, + { + "step": 61, + "lr": 6.1000000000000005e-05, + "sup_loss": 2.223536861505 + }, + { + "step": 62, + "lr": 6.2e-05, + "sup_loss": 2.201599151730311 + }, + { + "step": 63, + "lr": 6.3e-05, + "sup_loss": 2.1832543882469557 + }, + { + "step": 64, + "lr": 6.4e-05, + "sup_loss": 2.1580907568451795 + }, + { + "step": 65, + "lr": 6.500000000000001e-05, + "sup_loss": 2.1679979832095837 + }, + { + "step": 66, + "lr": 6.6e-05, + "sup_loss": 2.151159814374817 + }, + { + "step": 67, + "lr": 6.7e-05, + "sup_loss": 2.132176910649508 + }, + { + "step": 68, + "lr": 6.8e-05, + "sup_loss": 2.123453724015459 + }, + { + "step": 69, + "lr": 6.900000000000001e-05, + "sup_loss": 2.100389289485579 + }, + { + "step": 70, + "lr": 7.000000000000001e-05, + "sup_loss": 2.100101099728348 + }, + { + "step": 71, + "lr": 7.1e-05, + "sup_loss": 2.082929395622361 + }, + { + "step": 72, + "lr": 7.2e-05, + "sup_loss": 2.0613085193351983 + }, + { + "step": 73, + "lr": 7.3e-05, + "sup_loss": 2.043758664648479 + }, + { + "step": 74, + "lr": 7.400000000000001e-05, + "sup_loss": 2.0471150778750125 + }, + { + "step": 75, + "lr": 7.500000000000001e-05, + "sup_loss": 2.043438499353482, + "lyap1_mean": -6.26137638092041, + "lyap1_max": -6.241876125335693, + "lyap_spec_mean": [ + -6.261376857757568, + -6.260560035705566 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 76, + "lr": 7.6e-05, + "sup_loss": 2.0290594292513395 + }, + { + "step": 77, + "lr": 7.7e-05, + "sup_loss": 2.017350256019743 + }, + { + "step": 78, + "lr": 7.8e-05, + "sup_loss": 2.010315789581182 + }, + { + "step": 79, + "lr": 7.900000000000001e-05, + "sup_loss": 2.007747901168263 + }, + { + "step": 80, + "lr": 8e-05, + "sup_loss": 1.9878722973916485 + }, + { + "step": 81, + "lr": 8.099999999999999e-05, + "sup_loss": 1.9820387430330704 + }, + { + "step": 82, + "lr": 8.2e-05, + "sup_loss": 1.960933098122315 + }, + { + "step": 83, + "lr": 8.3e-05, + "sup_loss": 1.9703326768508003 + }, + { + "step": 84, + "lr": 8.400000000000001e-05, + "sup_loss": 1.9608279961175652 + }, + { + "step": 85, + "lr": 8.5e-05, + "sup_loss": 1.9439071529032013 + }, + { + "step": 86, + "lr": 8.6e-05, + "sup_loss": 1.940231741846871 + }, + { + "step": 87, + "lr": 8.700000000000001e-05, + "sup_loss": 1.9256818466494516 + }, + { + "step": 88, + "lr": 8.800000000000001e-05, + "sup_loss": 1.9422044059735057 + }, + { + "step": 89, + "lr": 8.9e-05, + "sup_loss": 1.9265779568349395 + }, + { + "step": 90, + "lr": 9e-05, + "sup_loss": 1.9107046426157344 + }, + { + "step": 91, + "lr": 9.1e-05, + "sup_loss": 1.9120888307606432 + }, + { + "step": 92, + "lr": 9.2e-05, + "sup_loss": 1.9039226834805385 + }, + { + "step": 93, + "lr": 9.300000000000001e-05, + "sup_loss": 1.8859638864585686 + }, + { + "step": 94, + "lr": 9.400000000000001e-05, + "sup_loss": 1.8935103237832633 + }, + { + "step": 95, + "lr": 9.499999999999999e-05, + "sup_loss": 1.8780442206074615 + }, + { + "step": 96, + "lr": 9.6e-05, + "sup_loss": 1.880738136494954 + }, + { + "step": 97, + "lr": 9.7e-05, + "sup_loss": 1.8727563337733835 + }, + { + "step": 98, + "lr": 9.8e-05, + "sup_loss": 1.8747788248049444 + }, + { + "step": 99, + "lr": 9.900000000000001e-05, + "sup_loss": 1.8535151587817753 + }, + { + "step": 100, + "lr": 0.0001, + "sup_loss": 1.8369216468190068, + "lyap1_mean": -6.2764668464660645, + "lyap1_max": -6.253869533538818, + "lyap_spec_mean": [ + -6.2764668464660645, + -6.276641368865967 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 101, + "lr": 0.0001, + "sup_loss": 1.8375981252343498 + }, + { + "step": 102, + "lr": 0.0001, + "sup_loss": 1.8414441493027138 + }, + { + "step": 103, + "lr": 0.0001, + "sup_loss": 1.841932872396079 + }, + { + "step": 104, + "lr": 0.0001, + "sup_loss": 1.8250621869367878 + }, + { + "step": 105, + "lr": 0.0001, + "sup_loss": 1.8180236366243632 + }, + { + "step": 106, + "lr": 0.0001, + "sup_loss": 1.8148333969430548 + }, + { + "step": 107, + "lr": 0.0001, + "sup_loss": 1.8097191971601063 + }, + { + "step": 108, + "lr": 0.0001, + "sup_loss": 1.8062571266936345 + }, + { + "step": 109, + "lr": 0.0001, + "sup_loss": 1.783310944279242 + }, + { + "step": 110, + "lr": 0.0001, + "sup_loss": 1.7882709675762618 + }, + { + "step": 111, + "lr": 0.0001, + "sup_loss": 1.7837487501971951 + }, + { + "step": 112, + "lr": 0.0001, + "sup_loss": 1.8040981386613326 + }, + { + "step": 113, + "lr": 0.0001, + "sup_loss": 1.7839348955084138 + }, + { + "step": 114, + "lr": 0.0001, + "sup_loss": 1.7808710490288988 + }, + { + "step": 115, + "lr": 0.0001, + "sup_loss": 1.73568023182333 + }, + { + "step": 116, + "lr": 0.0001, + "sup_loss": 1.7546620469313183 + }, + { + "step": 117, + "lr": 0.0001, + "sup_loss": 1.7751414856763805 + }, + { + "step": 118, + "lr": 0.0001, + "sup_loss": 1.752759060218006 + }, + { + "step": 119, + "lr": 0.0001, + "sup_loss": 1.7358692128944557 + }, + { + "step": 120, + "lr": 0.0001, + "sup_loss": 1.739228786343244 + }, + { + "step": 121, + "lr": 0.0001, + "sup_loss": 1.7493239323825307 + }, + { + "step": 122, + "lr": 0.0001, + "sup_loss": 1.7536705840390145 + }, + { + "step": 123, + "lr": 0.0001, + "sup_loss": 1.7502938192568702 + }, + { + "step": 124, + "lr": 0.0001, + "sup_loss": 1.7474968410988128 + }, + { + "step": 125, + "lr": 0.0001, + "sup_loss": 1.741263138180546, + "lyap1_mean": -6.289633750915527, + "lyap1_max": -6.271181106567383, + "lyap_spec_mean": [ + -6.289633750915527, + -6.290492534637451 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 126, + "lr": 0.0001, + "sup_loss": 1.7211279276855354 + }, + { + "step": 127, + "lr": 0.0001, + "sup_loss": 1.7073139543990044 + }, + { + "step": 128, + "lr": 0.0001, + "sup_loss": 1.7141654935069126 + }, + { + "step": 129, + "lr": 0.0001, + "sup_loss": 1.7005028367708452 + }, + { + "step": 130, + "lr": 0.0001, + "sup_loss": 1.6860979041501705 + }, + { + "step": 131, + "lr": 0.0001, + "sup_loss": 1.7066609179775223 + }, + { + "step": 132, + "lr": 0.0001, + "sup_loss": 1.6979615420852867 + }, + { + "step": 133, + "lr": 0.0001, + "sup_loss": 1.7273194417800906 + }, + { + "step": 134, + "lr": 0.0001, + "sup_loss": 1.7047646668485257 + }, + { + "step": 135, + "lr": 0.0001, + "sup_loss": 1.716210852207255 + }, + { + "step": 136, + "lr": 0.0001, + "sup_loss": 1.7307658980947846 + }, + { + "step": 137, + "lr": 0.0001, + "sup_loss": 1.711602512894522 + }, + { + "step": 138, + "lr": 0.0001, + "sup_loss": 1.7057358493816908 + }, + { + "step": 139, + "lr": 0.0001, + "sup_loss": 1.688446466585652 + }, + { + "step": 140, + "lr": 0.0001, + "sup_loss": 1.704572577683926 + }, + { + "step": 141, + "lr": 0.0001, + "sup_loss": 1.7304753449546095 + }, + { + "step": 142, + "lr": 0.0001, + "sup_loss": 1.6883580864485974 + }, + { + "step": 143, + "lr": 0.0001, + "sup_loss": 1.6718549694522848 + }, + { + "step": 144, + "lr": 0.0001, + "sup_loss": 1.7193390482772282 + }, + { + "step": 145, + "lr": 0.0001, + "sup_loss": 1.6713528463158704 + }, + { + "step": 146, + "lr": 0.0001, + "sup_loss": 1.6959659224268964 + }, + { + "step": 147, + "lr": 0.0001, + "sup_loss": 1.6890260775715398 + }, + { + "step": 148, + "lr": 0.0001, + "sup_loss": 1.6248832250680991 + }, + { + "step": 149, + "lr": 0.0001, + "sup_loss": 1.654215549400142 + }, + { + "step": 150, + "lr": 0.0001, + "sup_loss": 1.6615707449121377, + "lyap1_mean": -6.306196689605713, + "lyap1_max": -6.2920122146606445, + "lyap_spec_mean": [ + -6.306196212768555, + -6.305814266204834 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 151, + "lr": 0.0001, + "sup_loss": 1.6810928502731424 + }, + { + "step": 152, + "lr": 0.0001, + "sup_loss": 1.6695774965144798 + }, + { + "step": 153, + "lr": 0.0001, + "sup_loss": 1.6444260938397506 + }, + { + "step": 154, + "lr": 0.0001, + "sup_loss": 1.6821734376746678 + }, + { + "step": 155, + "lr": 0.0001, + "sup_loss": 1.6728631290634948 + }, + { + "step": 156, + "lr": 0.0001, + "sup_loss": 1.6820655261417026 + }, + { + "step": 157, + "lr": 0.0001, + "sup_loss": 1.6516146373078011 + }, + { + "step": 158, + "lr": 0.0001, + "sup_loss": 1.6530359901341733 + }, + { + "step": 159, + "lr": 0.0001, + "sup_loss": 1.684539741706318 + }, + { + "step": 160, + "lr": 0.0001, + "sup_loss": 1.6740305810886589 + }, + { + "step": 161, + "lr": 0.0001, + "sup_loss": 1.60870367198615 + }, + { + "step": 162, + "lr": 0.0001, + "sup_loss": 1.6475936644805484 + }, + { + "step": 163, + "lr": 0.0001, + "sup_loss": 1.6515501185205237 + }, + { + "step": 164, + "lr": 0.0001, + "sup_loss": 1.6180064545910822 + }, + { + "step": 165, + "lr": 0.0001, + "sup_loss": 1.6406713466514131 + }, + { + "step": 166, + "lr": 0.0001, + "sup_loss": 1.6452356602937375 + }, + { + "step": 167, + "lr": 0.0001, + "sup_loss": 1.6201243770891205 + }, + { + "step": 168, + "lr": 0.0001, + "sup_loss": 1.665671342076336 + }, + { + "step": 169, + "lr": 0.0001, + "sup_loss": 1.6228934612102783 + }, + { + "step": 170, + "lr": 0.0001, + "sup_loss": 1.6389439525875207 + }, + { + "step": 171, + "lr": 0.0001, + "sup_loss": 1.6387175869530952 + }, + { + "step": 172, + "lr": 0.0001, + "sup_loss": 1.6356821068128287 + }, + { + "step": 173, + "lr": 0.0001, + "sup_loss": 1.6217340583613125 + }, + { + "step": 174, + "lr": 0.0001, + "sup_loss": 1.612851693540602 + }, + { + "step": 175, + "lr": 0.0001, + "sup_loss": 1.6110975775679166, + "lyap1_mean": -6.304257392883301, + "lyap1_max": -6.284839153289795, + "lyap_spec_mean": [ + -6.304257392883301, + -6.30750846862793 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 176, + "lr": 0.0001, + "sup_loss": 1.61914691618028 + }, + { + "step": 177, + "lr": 0.0001, + "sup_loss": 1.618533874469112 + }, + { + "step": 178, + "lr": 0.0001, + "sup_loss": 1.6225511296653723 + }, + { + "step": 179, + "lr": 0.0001, + "sup_loss": 1.6409765068011015 + }, + { + "step": 180, + "lr": 0.0001, + "sup_loss": 1.6209402621335396 + }, + { + "step": 181, + "lr": 0.0001, + "sup_loss": 1.5903578498662685 + }, + { + "step": 182, + "lr": 0.0001, + "sup_loss": 1.6243800011765677 + }, + { + "step": 183, + "lr": 0.0001, + "sup_loss": 1.6207408935168828 + }, + { + "step": 184, + "lr": 0.0001, + "sup_loss": 1.6102868840186222 + }, + { + "step": 185, + "lr": 0.0001, + "sup_loss": 1.6001848591227634 + }, + { + "step": 186, + "lr": 0.0001, + "sup_loss": 1.572493708893578 + }, + { + "step": 187, + "lr": 0.0001, + "sup_loss": 1.612159538574632 + }, + { + "step": 188, + "lr": 0.0001, + "sup_loss": 1.5911217530125168 + }, + { + "step": 189, + "lr": 0.0001, + "sup_loss": 1.6293490369187993 + }, + { + "step": 190, + "lr": 0.0001, + "sup_loss": 1.6040246983038626 + }, + { + "step": 191, + "lr": 0.0001, + "sup_loss": 1.6171197527958248 + }, + { + "step": 192, + "lr": 0.0001, + "sup_loss": 1.6471899354369277 + }, + { + "step": 193, + "lr": 0.0001, + "sup_loss": 1.6034272540262462 + }, + { + "step": 194, + "lr": 0.0001, + "sup_loss": 1.624265957837026 + }, + { + "step": 195, + "lr": 0.0001, + "sup_loss": 1.6315334681058467 + }, + { + "step": 196, + "lr": 0.0001, + "sup_loss": 1.6182992051901102 + }, + { + "step": 197, + "lr": 0.0001, + "sup_loss": 1.6026089533859447 + }, + { + "step": 198, + "lr": 0.0001, + "sup_loss": 1.6068981420107973 + }, + { + "step": 199, + "lr": 0.0001, + "sup_loss": 1.6226155394597461 + }, + { + "step": 200, + "lr": 0.0001, + "sup_loss": 1.5908226672539212, + "lyap1_mean": -6.314453125, + "lyap1_max": -6.295107841491699, + "lyap_spec_mean": [ + -6.314453601837158, + -6.311760902404785 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 201, + "lr": 0.0001, + "sup_loss": 1.614759653061085 + }, + { + "step": 202, + "lr": 0.0001, + "sup_loss": 1.607828428395806 + }, + { + "step": 203, + "lr": 0.0001, + "sup_loss": 1.6031055264743805 + }, + { + "step": 204, + "lr": 0.0001, + "sup_loss": 1.62692512809244 + }, + { + "step": 205, + "lr": 0.0001, + "sup_loss": 1.6197515599744592 + }, + { + "step": 206, + "lr": 0.0001, + "sup_loss": 1.5945279526481753 + }, + { + "step": 207, + "lr": 0.0001, + "sup_loss": 1.600027352742842 + }, + { + "step": 208, + "lr": 0.0001, + "sup_loss": 1.6231346460135334 + }, + { + "step": 209, + "lr": 0.0001, + "sup_loss": 1.6045676349961688 + }, + { + "step": 210, + "lr": 0.0001, + "sup_loss": 1.612343899125989 + }, + { + "step": 211, + "lr": 0.0001, + "sup_loss": 1.572819151638782 + }, + { + "step": 212, + "lr": 0.0001, + "sup_loss": 1.5998946197093518 + }, + { + "step": 213, + "lr": 0.0001, + "sup_loss": 1.603872718653751 + }, + { + "step": 214, + "lr": 0.0001, + "sup_loss": 1.6064486221293404 + }, + { + "step": 215, + "lr": 0.0001, + "sup_loss": 1.5816140347137049 + }, + { + "step": 216, + "lr": 0.0001, + "sup_loss": 1.623563311041461 + }, + { + "step": 217, + "lr": 0.0001, + "sup_loss": 1.604757450088786 + }, + { + "step": 218, + "lr": 0.0001, + "sup_loss": 1.5856211225488168 + }, + { + "step": 219, + "lr": 0.0001, + "sup_loss": 1.5971583490656518 + }, + { + "step": 220, + "lr": 0.0001, + "sup_loss": 1.62120464502734 + }, + { + "step": 221, + "lr": 0.0001, + "sup_loss": 1.5521333505669912 + }, + { + "step": 222, + "lr": 0.0001, + "sup_loss": 1.5910480072140367 + }, + { + "step": 223, + "lr": 0.0001, + "sup_loss": 1.5878960555935198 + }, + { + "step": 224, + "lr": 0.0001, + "sup_loss": 1.52525868655689 + }, + { + "step": 225, + "lr": 0.0001, + "sup_loss": 1.6046161077785628, + "lyap1_mean": -6.307826995849609, + "lyap1_max": -6.286316394805908, + "lyap_spec_mean": [ + -6.307826995849609, + -6.311221122741699 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 226, + "lr": 0.0001, + "sup_loss": 1.5787563174285753 + }, + { + "step": 227, + "lr": 0.0001, + "sup_loss": 1.6046591313503475 + }, + { + "step": 228, + "lr": 0.0001, + "sup_loss": 1.6058233574528387 + }, + { + "step": 229, + "lr": 0.0001, + "sup_loss": 1.5708493836588395 + }, + { + "step": 230, + "lr": 0.0001, + "sup_loss": 1.5522555239571714 + }, + { + "step": 231, + "lr": 0.0001, + "sup_loss": 1.5866450454598817 + }, + { + "step": 232, + "lr": 0.0001, + "sup_loss": 1.5833501739262028 + }, + { + "step": 233, + "lr": 0.0001, + "sup_loss": 1.5781072194587922 + }, + { + "step": 234, + "lr": 0.0001, + "sup_loss": 1.5801195205359906 + }, + { + "step": 235, + "lr": 0.0001, + "sup_loss": 1.6097866642525447 + }, + { + "step": 236, + "lr": 0.0001, + "sup_loss": 1.5776707740181835 + }, + { + "step": 237, + "lr": 0.0001, + "sup_loss": 1.575370573452357 + }, + { + "step": 238, + "lr": 0.0001, + "sup_loss": 1.62187735175436 + }, + { + "step": 239, + "lr": 0.0001, + "sup_loss": 1.5719111789223998 + }, + { + "step": 240, + "lr": 0.0001, + "sup_loss": 1.6043917104768717 + }, + { + "step": 241, + "lr": 0.0001, + "sup_loss": 1.6115212118102145 + }, + { + "step": 242, + "lr": 0.0001, + "sup_loss": 1.5726522914277772 + }, + { + "step": 243, + "lr": 0.0001, + "sup_loss": 1.5747379788820692 + }, + { + "step": 244, + "lr": 0.0001, + "sup_loss": 1.602347413668085 + }, + { + "step": 245, + "lr": 0.0001, + "sup_loss": 1.561972573460076 + }, + { + "step": 246, + "lr": 0.0001, + "sup_loss": 1.579944843245691 + }, + { + "step": 247, + "lr": 0.0001, + "sup_loss": 1.56607517962854 + }, + { + "step": 248, + "lr": 0.0001, + "sup_loss": 1.5602422723452187 + }, + { + "step": 249, + "lr": 0.0001, + "sup_loss": 1.5481532506755131 + }, + { + "step": 250, + "lr": 0.0001, + "sup_loss": 1.6282572215428823, + "lyap1_mean": -6.311943054199219, + "lyap1_max": -6.285268306732178, + "lyap_spec_mean": [ + -6.311943054199219, + -6.315288543701172 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 251, + "lr": 0.0001, + "sup_loss": 1.5718172405729163 + }, + { + "step": 252, + "lr": 0.0001, + "sup_loss": 1.568016116494461 + }, + { + "step": 253, + "lr": 0.0001, + "sup_loss": 1.5892380828003114 + }, + { + "step": 254, + "lr": 0.0001, + "sup_loss": 1.5607691734001938 + }, + { + "step": 255, + "lr": 0.0001, + "sup_loss": 1.5788166224967786 + }, + { + "step": 256, + "lr": 0.0001, + "sup_loss": 1.5587572791400528 + }, + { + "step": 257, + "lr": 0.0001, + "sup_loss": 1.56509858619077 + }, + { + "step": 258, + "lr": 0.0001, + "sup_loss": 1.5588711212966218 + }, + { + "step": 259, + "lr": 0.0001, + "sup_loss": 1.579683401292984 + }, + { + "step": 260, + "lr": 0.0001, + "sup_loss": 1.5572354648170486 + }, + { + "step": 261, + "lr": 0.0001, + "sup_loss": 1.569087077453923 + }, + { + "step": 262, + "lr": 0.0001, + "sup_loss": 1.552346801093667 + }, + { + "step": 263, + "lr": 0.0001, + "sup_loss": 1.5730190896596785 + }, + { + "step": 264, + "lr": 0.0001, + "sup_loss": 1.5592950476448364 + }, + { + "step": 265, + "lr": 0.0001, + "sup_loss": 1.578283494419273 + }, + { + "step": 266, + "lr": 0.0001, + "sup_loss": 1.5317856999633797 + }, + { + "step": 267, + "lr": 0.0001, + "sup_loss": 1.5859820808572964 + }, + { + "step": 268, + "lr": 0.0001, + "sup_loss": 1.5849993822920676 + }, + { + "step": 269, + "lr": 0.0001, + "sup_loss": 1.5748490996922742 + }, + { + "step": 270, + "lr": 0.0001, + "sup_loss": 1.5941484134857486 + }, + { + "step": 271, + "lr": 0.0001, + "sup_loss": 1.5520565621157805 + }, + { + "step": 272, + "lr": 0.0001, + "sup_loss": 1.5736600032776027 + }, + { + "step": 273, + "lr": 0.0001, + "sup_loss": 1.5703344026318378 + }, + { + "step": 274, + "lr": 0.0001, + "sup_loss": 1.532767851458178 + }, + { + "step": 275, + "lr": 0.0001, + "sup_loss": 1.5764611628543121, + "lyap1_mean": -6.288212776184082, + "lyap1_max": -6.271989822387695, + "lyap_spec_mean": [ + -6.288212776184082, + -6.290098190307617 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 276, + "lr": 0.0001, + "sup_loss": 1.5627879826848854 + }, + { + "step": 277, + "lr": 0.0001, + "sup_loss": 1.5565442095845055 + }, + { + "step": 278, + "lr": 0.0001, + "sup_loss": 1.5479018965033426 + }, + { + "step": 279, + "lr": 0.0001, + "sup_loss": 1.542800690022451 + }, + { + "step": 280, + "lr": 0.0001, + "sup_loss": 1.5708411044949124 + }, + { + "step": 281, + "lr": 0.0001, + "sup_loss": 1.5704922914677404 + }, + { + "step": 282, + "lr": 0.0001, + "sup_loss": 1.548503892204476 + }, + { + "step": 283, + "lr": 0.0001, + "sup_loss": 1.557741653174852 + }, + { + "step": 284, + "lr": 0.0001, + "sup_loss": 1.5737748118594674 + }, + { + "step": 285, + "lr": 0.0001, + "sup_loss": 1.5217131287518002 + }, + { + "step": 286, + "lr": 0.0001, + "sup_loss": 1.5508766477654388 + }, + { + "step": 287, + "lr": 0.0001, + "sup_loss": 1.5681149739226237 + }, + { + "step": 288, + "lr": 0.0001, + "sup_loss": 1.533574656235852 + }, + { + "step": 289, + "lr": 0.0001, + "sup_loss": 1.5711439676427719 + }, + { + "step": 290, + "lr": 0.0001, + "sup_loss": 1.5541693707191668 + }, + { + "step": 291, + "lr": 0.0001, + "sup_loss": 1.53935113280687 + }, + { + "step": 292, + "lr": 0.0001, + "sup_loss": 1.5477303077305633 + }, + { + "step": 293, + "lr": 0.0001, + "sup_loss": 1.5411001058204383 + }, + { + "step": 294, + "lr": 0.0001, + "sup_loss": 1.566108745714733 + }, + { + "step": 295, + "lr": 0.0001, + "sup_loss": 1.5431387629950557 + }, + { + "step": 296, + "lr": 0.0001, + "sup_loss": 1.5719911366769372 + }, + { + "step": 297, + "lr": 0.0001, + "sup_loss": 1.5459029921829308 + }, + { + "step": 298, + "lr": 0.0001, + "sup_loss": 1.5684083943193499 + }, + { + "step": 299, + "lr": 0.0001, + "sup_loss": 1.5552347272080564 + }, + { + "step": 300, + "lr": 0.0001, + "sup_loss": 1.5612322177126692, + "lyap1_mean": -6.289813995361328, + "lyap1_max": -6.263297080993652, + "lyap_spec_mean": [ + -6.289813995361328, + -6.282321453094482 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 301, + "lr": 0.0001, + "sup_loss": 1.5511914784971887 + }, + { + "step": 302, + "lr": 0.0001, + "sup_loss": 1.5731978045920958 + }, + { + "step": 303, + "lr": 0.0001, + "sup_loss": 1.5566588069651133 + }, + { + "step": 304, + "lr": 0.0001, + "sup_loss": 1.5435834816545064 + }, + { + "step": 305, + "lr": 0.0001, + "sup_loss": 1.5338201112718275 + }, + { + "step": 306, + "lr": 0.0001, + "sup_loss": 1.5620164001408299 + }, + { + "step": 307, + "lr": 0.0001, + "sup_loss": 1.5461411309298845 + }, + { + "step": 308, + "lr": 0.0001, + "sup_loss": 1.556008165963498 + }, + { + "step": 309, + "lr": 0.0001, + "sup_loss": 1.5554833750200956 + }, + { + "step": 310, + "lr": 0.0001, + "sup_loss": 1.568397089848242 + }, + { + "step": 311, + "lr": 0.0001, + "sup_loss": 1.5708716932718876 + }, + { + "step": 312, + "lr": 0.0001, + "sup_loss": 1.5195926766923842 + }, + { + "step": 313, + "lr": 0.0001, + "sup_loss": 1.52811400474664 + }, + { + "step": 314, + "lr": 0.0001, + "sup_loss": 1.5539360130593718 + }, + { + "step": 315, + "lr": 0.0001, + "sup_loss": 1.5706093056068795 + }, + { + "step": 316, + "lr": 0.0001, + "sup_loss": 1.557120468837921 + }, + { + "step": 317, + "lr": 0.0001, + "sup_loss": 1.5822501082816158 + }, + { + "step": 318, + "lr": 0.0001, + "sup_loss": 1.542841458798004 + }, + { + "step": 319, + "lr": 0.0001, + "sup_loss": 1.5563868638207532 + }, + { + "step": 320, + "lr": 0.0001, + "sup_loss": 1.5360635360486008 + }, + { + "step": 321, + "lr": 0.0001, + "sup_loss": 1.5459013446151262 + }, + { + "step": 322, + "lr": 0.0001, + "sup_loss": 1.5867201314501376 + }, + { + "step": 323, + "lr": 0.0001, + "sup_loss": 1.5681245372992967 + }, + { + "step": 324, + "lr": 0.0001, + "sup_loss": 1.545037808734181 + }, + { + "step": 325, + "lr": 0.0001, + "sup_loss": 1.5807109835114312, + "lyap1_mean": -6.280990123748779, + "lyap1_max": -6.268655776977539, + "lyap_spec_mean": [ + -6.2809906005859375, + -6.283902168273926 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 326, + "lr": 0.0001, + "sup_loss": 1.5361487778088874 + }, + { + "step": 327, + "lr": 0.0001, + "sup_loss": 1.5569850983489886 + }, + { + "step": 328, + "lr": 0.0001, + "sup_loss": 1.5468934937491015 + }, + { + "step": 329, + "lr": 0.0001, + "sup_loss": 1.5269107579149266 + }, + { + "step": 330, + "lr": 0.0001, + "sup_loss": 1.5308932888735198 + }, + { + "step": 331, + "lr": 0.0001, + "sup_loss": 1.546140456130772 + }, + { + "step": 332, + "lr": 0.0001, + "sup_loss": 1.556001616894801 + }, + { + "step": 333, + "lr": 0.0001, + "sup_loss": 1.5160081181707756 + }, + { + "step": 334, + "lr": 0.0001, + "sup_loss": 1.5370807622724176 + }, + { + "step": 335, + "lr": 0.0001, + "sup_loss": 1.5463874980357313 + }, + { + "step": 336, + "lr": 0.0001, + "sup_loss": 1.527108314657003 + }, + { + "step": 337, + "lr": 0.0001, + "sup_loss": 1.5391164403451145 + }, + { + "step": 338, + "lr": 0.0001, + "sup_loss": 1.5288020191539669 + }, + { + "step": 339, + "lr": 0.0001, + "sup_loss": 1.544628633865707 + }, + { + "step": 340, + "lr": 0.0001, + "sup_loss": 1.5508991977799556 + }, + { + "step": 341, + "lr": 0.0001, + "sup_loss": 1.55731107953372 + }, + { + "step": 342, + "lr": 0.0001, + "sup_loss": 1.5502437895717247 + }, + { + "step": 343, + "lr": 0.0001, + "sup_loss": 1.5113026766574436 + }, + { + "step": 344, + "lr": 0.0001, + "sup_loss": 1.534505224713083 + }, + { + "step": 345, + "lr": 0.0001, + "sup_loss": 1.5664212448478674 + }, + { + "step": 346, + "lr": 0.0001, + "sup_loss": 1.5343774635089513 + }, + { + "step": 347, + "lr": 0.0001, + "sup_loss": 1.523872870702963 + }, + { + "step": 348, + "lr": 0.0001, + "sup_loss": 1.520222813804984 + }, + { + "step": 349, + "lr": 0.0001, + "sup_loss": 1.5100668329858764 + }, + { + "step": 350, + "lr": 0.0001, + "sup_loss": 1.5462873210196821, + "lyap1_mean": -6.279208660125732, + "lyap1_max": -6.246695518493652, + "lyap_spec_mean": [ + -6.279209136962891, + -6.2811126708984375 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 351, + "lr": 0.0001, + "sup_loss": 1.5592996002285948 + }, + { + "step": 352, + "lr": 0.0001, + "sup_loss": 1.513165697344076 + }, + { + "step": 353, + "lr": 0.0001, + "sup_loss": 1.5556921340697518 + }, + { + "step": 354, + "lr": 0.0001, + "sup_loss": 1.558373381312493 + }, + { + "step": 355, + "lr": 0.0001, + "sup_loss": 1.5547521752157896 + }, + { + "step": 356, + "lr": 0.0001, + "sup_loss": 1.5482859037251404 + }, + { + "step": 357, + "lr": 0.0001, + "sup_loss": 1.5541198311222053 + }, + { + "step": 358, + "lr": 0.0001, + "sup_loss": 1.513929981198374 + }, + { + "step": 359, + "lr": 0.0001, + "sup_loss": 1.5386774539004036 + }, + { + "step": 360, + "lr": 0.0001, + "sup_loss": 1.5418122629340056 + }, + { + "step": 361, + "lr": 0.0001, + "sup_loss": 1.5336020776983843 + }, + { + "step": 362, + "lr": 0.0001, + "sup_loss": 1.5442231828959216 + }, + { + "step": 363, + "lr": 0.0001, + "sup_loss": 1.5474763280659718 + }, + { + "step": 364, + "lr": 0.0001, + "sup_loss": 1.5401739408673039 + }, + { + "step": 365, + "lr": 0.0001, + "sup_loss": 1.5340803578606583 + }, + { + "step": 366, + "lr": 0.0001, + "sup_loss": 1.5137462377878363 + }, + { + "step": 367, + "lr": 0.0001, + "sup_loss": 1.5490127083318859 + }, + { + "step": 368, + "lr": 0.0001, + "sup_loss": 1.562103831097833 + }, + { + "step": 369, + "lr": 0.0001, + "sup_loss": 1.5230020136679903 + }, + { + "step": 370, + "lr": 0.0001, + "sup_loss": 1.519926874355663 + }, + { + "step": 371, + "lr": 0.0001, + "sup_loss": 1.525188796570022 + }, + { + "step": 372, + "lr": 0.0001, + "sup_loss": 1.5389628270751545 + }, + { + "step": 373, + "lr": 0.0001, + "sup_loss": 1.5206521555610368 + }, + { + "step": 374, + "lr": 0.0001, + "sup_loss": 1.5910327063155232 + }, + { + "step": 375, + "lr": 0.0001, + "sup_loss": 1.5517228598442367, + "lyap1_mean": -6.272121906280518, + "lyap1_max": -6.257327079772949, + "lyap_spec_mean": [ + -6.272121906280518, + -6.272157669067383 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 376, + "lr": 0.0001, + "sup_loss": 1.5083365432341427 + }, + { + "step": 377, + "lr": 0.0001, + "sup_loss": 1.5351844511019792 + }, + { + "step": 378, + "lr": 0.0001, + "sup_loss": 1.5185103477122763 + }, + { + "step": 379, + "lr": 0.0001, + "sup_loss": 1.5313015037319202 + }, + { + "step": 380, + "lr": 0.0001, + "sup_loss": 1.6042587657443281 + }, + { + "step": 381, + "lr": 0.0001, + "sup_loss": 1.5148778440809103 + }, + { + "step": 382, + "lr": 0.0001, + "sup_loss": 1.547646792356559 + }, + { + "step": 383, + "lr": 0.0001, + "sup_loss": 1.5043766820879998 + }, + { + "step": 384, + "lr": 0.0001, + "sup_loss": 1.517374048269513 + }, + { + "step": 385, + "lr": 0.0001, + "sup_loss": 1.510813145580335 + }, + { + "step": 386, + "lr": 0.0001, + "sup_loss": 1.5343651897756998 + }, + { + "step": 387, + "lr": 0.0001, + "sup_loss": 1.5567518054948828 + }, + { + "step": 388, + "lr": 0.0001, + "sup_loss": 1.556583401528102 + }, + { + "step": 389, + "lr": 0.0001, + "sup_loss": 1.5469114160109558 + }, + { + "step": 390, + "lr": 0.0001, + "sup_loss": 1.5066436477329854 + }, + { + "step": 391, + "lr": 0.0001, + "sup_loss": 1.5363803613218583 + }, + { + "step": 392, + "lr": 0.0001, + "sup_loss": 1.5623538263511423 + }, + { + "step": 393, + "lr": 0.0001, + "sup_loss": 1.5509992112423387 + }, + { + "step": 394, + "lr": 0.0001, + "sup_loss": 1.5553260421123227 + }, + { + "step": 395, + "lr": 0.0001, + "sup_loss": 1.5360717974523455 + }, + { + "step": 396, + "lr": 0.0001, + "sup_loss": 1.5299607690174781 + }, + { + "step": 397, + "lr": 0.0001, + "sup_loss": 1.4931204366796695 + }, + { + "step": 398, + "lr": 0.0001, + "sup_loss": 1.5352384127840748 + }, + { + "step": 399, + "lr": 0.0001, + "sup_loss": 1.5222885300656976 + }, + { + "step": 400, + "lr": 0.0001, + "sup_loss": 1.508831153333313, + "lyap1_mean": -6.27598237991333, + "lyap1_max": -6.24452018737793, + "lyap_spec_mean": [ + -6.275981903076172, + -6.269106388092041 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 401, + "lr": 0.0001, + "sup_loss": 1.5349017639824347 + }, + { + "step": 402, + "lr": 0.0001, + "sup_loss": 1.548015367001545 + }, + { + "step": 403, + "lr": 0.0001, + "sup_loss": 1.5078454672949264 + }, + { + "step": 404, + "lr": 0.0001, + "sup_loss": 1.5185558897396196 + }, + { + "step": 405, + "lr": 0.0001, + "sup_loss": 1.5512310382424748 + }, + { + "step": 406, + "lr": 0.0001, + "sup_loss": 1.5304067426341317 + }, + { + "step": 407, + "lr": 0.0001, + "sup_loss": 1.5183398062918037 + }, + { + "step": 408, + "lr": 0.0001, + "sup_loss": 1.5387815991781382 + }, + { + "step": 409, + "lr": 0.0001, + "sup_loss": 1.5672918045341844 + }, + { + "step": 410, + "lr": 0.0001, + "sup_loss": 1.5307374295242138 + }, + { + "step": 411, + "lr": 0.0001, + "sup_loss": 1.544239649750348 + }, + { + "step": 412, + "lr": 0.0001, + "sup_loss": 1.5372248394722121 + }, + { + "step": 413, + "lr": 0.0001, + "sup_loss": 1.546755674066759 + }, + { + "step": 414, + "lr": 0.0001, + "sup_loss": 1.5602227617649562 + }, + { + "step": 415, + "lr": 0.0001, + "sup_loss": 1.5235959308536382 + }, + { + "step": 416, + "lr": 0.0001, + "sup_loss": 1.54999620670243 + }, + { + "step": 417, + "lr": 0.0001, + "sup_loss": 1.5595363483742926 + }, + { + "step": 418, + "lr": 0.0001, + "sup_loss": 1.5429251013869647 + }, + { + "step": 419, + "lr": 0.0001, + "sup_loss": 1.5263398785761602 + }, + { + "step": 420, + "lr": 0.0001, + "sup_loss": 1.5403264813934174 + }, + { + "step": 421, + "lr": 0.0001, + "sup_loss": 1.51891197855286 + }, + { + "step": 422, + "lr": 0.0001, + "sup_loss": 1.5330320818314984 + }, + { + "step": 423, + "lr": 0.0001, + "sup_loss": 1.5024285323810576 + }, + { + "step": 424, + "lr": 0.0001, + "sup_loss": 1.5529554719113352 + }, + { + "step": 425, + "lr": 0.0001, + "sup_loss": 1.519168167373818, + "lyap1_mean": -6.279496192932129, + "lyap1_max": -6.26162052154541, + "lyap_spec_mean": [ + -6.279496192932129, + -6.2808918952941895 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 426, + "lr": 0.0001, + "sup_loss": 1.5365064575118708 + }, + { + "step": 427, + "lr": 0.0001, + "sup_loss": 1.5222562257747192 + }, + { + "step": 428, + "lr": 0.0001, + "sup_loss": 1.519125449620276 + }, + { + "step": 429, + "lr": 0.0001, + "sup_loss": 1.529261269354274 + }, + { + "step": 430, + "lr": 0.0001, + "sup_loss": 1.52106984620442 + }, + { + "step": 431, + "lr": 0.0001, + "sup_loss": 1.542332523616496 + }, + { + "step": 432, + "lr": 0.0001, + "sup_loss": 1.5053887182166101 + }, + { + "step": 433, + "lr": 0.0001, + "sup_loss": 1.5287912390123242 + }, + { + "step": 434, + "lr": 0.0001, + "sup_loss": 1.4792699148804047 + }, + { + "step": 435, + "lr": 0.0001, + "sup_loss": 1.5175428625480079 + }, + { + "step": 436, + "lr": 0.0001, + "sup_loss": 1.5141915348633057 + }, + { + "step": 437, + "lr": 0.0001, + "sup_loss": 1.5021534012618962 + }, + { + "step": 438, + "lr": 0.0001, + "sup_loss": 1.515655711068608 + }, + { + "step": 439, + "lr": 0.0001, + "sup_loss": 1.5336998941946303 + }, + { + "step": 440, + "lr": 0.0001, + "sup_loss": 1.5709049462152431 + }, + { + "step": 441, + "lr": 0.0001, + "sup_loss": 1.5345619270812774 + }, + { + "step": 442, + "lr": 0.0001, + "sup_loss": 1.545632307238727 + }, + { + "step": 443, + "lr": 0.0001, + "sup_loss": 1.5348385538551335 + }, + { + "step": 444, + "lr": 0.0001, + "sup_loss": 1.5596831114341054 + }, + { + "step": 445, + "lr": 0.0001, + "sup_loss": 1.5213286739984129 + }, + { + "step": 446, + "lr": 0.0001, + "sup_loss": 1.507136513986345 + }, + { + "step": 447, + "lr": 0.0001, + "sup_loss": 1.5338240960099745 + }, + { + "step": 448, + "lr": 0.0001, + "sup_loss": 1.5335693962074035 + }, + { + "step": 449, + "lr": 0.0001, + "sup_loss": 1.5439888653044145 + }, + { + "step": 450, + "lr": 0.0001, + "sup_loss": 1.527999408648118, + "lyap1_mean": -6.28324031829834, + "lyap1_max": -6.266049385070801, + "lyap_spec_mean": [ + -6.28324031829834, + -6.283727169036865 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 451, + "lr": 0.0001, + "sup_loss": 1.5393156317969563 + }, + { + "step": 452, + "lr": 0.0001, + "sup_loss": 1.5240758554208984 + }, + { + "step": 453, + "lr": 0.0001, + "sup_loss": 1.5172760077652057 + }, + { + "step": 454, + "lr": 0.0001, + "sup_loss": 1.5435138079852262 + }, + { + "step": 455, + "lr": 0.0001, + "sup_loss": 1.553782064658805 + }, + { + "step": 456, + "lr": 0.0001, + "sup_loss": 1.4990875570761242 + }, + { + "step": 457, + "lr": 0.0001, + "sup_loss": 1.526767795262043 + }, + { + "step": 458, + "lr": 0.0001, + "sup_loss": 1.5074100872415515 + }, + { + "step": 459, + "lr": 0.0001, + "sup_loss": 1.5299431484804877 + }, + { + "step": 460, + "lr": 0.0001, + "sup_loss": 1.5411429028548818 + }, + { + "step": 461, + "lr": 0.0001, + "sup_loss": 1.5258069753257875 + }, + { + "step": 462, + "lr": 0.0001, + "sup_loss": 1.5130222736269014 + }, + { + "step": 463, + "lr": 0.0001, + "sup_loss": 1.59688740678216 + }, + { + "step": 464, + "lr": 0.0001, + "sup_loss": 1.5169865077256424 + }, + { + "step": 465, + "lr": 0.0001, + "sup_loss": 1.522381168712973 + }, + { + "step": 466, + "lr": 0.0001, + "sup_loss": 1.522439002600057 + }, + { + "step": 467, + "lr": 0.0001, + "sup_loss": 1.5190416825884472 + }, + { + "step": 468, + "lr": 0.0001, + "sup_loss": 1.5425338998993232 + }, + { + "step": 469, + "lr": 0.0001, + "sup_loss": 1.5452529428590425 + }, + { + "step": 470, + "lr": 0.0001, + "sup_loss": 1.5260539108805669 + }, + { + "step": 471, + "lr": 0.0001, + "sup_loss": 1.5488179073420898 + }, + { + "step": 472, + "lr": 0.0001, + "sup_loss": 1.5192884004268512 + }, + { + "step": 473, + "lr": 0.0001, + "sup_loss": 1.5055062362761682 + }, + { + "step": 474, + "lr": 0.0001, + "sup_loss": 1.4984221563948537 + }, + { + "step": 475, + "lr": 0.0001, + "sup_loss": 1.5320479692645415, + "lyap1_mean": -6.27734375, + "lyap1_max": -6.261284828186035, + "lyap_spec_mean": [ + -6.277344226837158, + -6.2793049812316895 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 476, + "lr": 0.0001, + "sup_loss": 1.538385270074266 + }, + { + "step": 477, + "lr": 0.0001, + "sup_loss": 1.4984740868654256 + }, + { + "step": 478, + "lr": 0.0001, + "sup_loss": 1.5450349077570704 + }, + { + "step": 479, + "lr": 0.0001, + "sup_loss": 1.5320467747126512 + }, + { + "step": 480, + "lr": 0.0001, + "sup_loss": 1.5451753778273984 + }, + { + "step": 481, + "lr": 0.0001, + "sup_loss": 1.5345277338401393 + }, + { + "step": 482, + "lr": 0.0001, + "sup_loss": 1.5576481611049358 + }, + { + "step": 483, + "lr": 0.0001, + "sup_loss": 1.525092799737688 + }, + { + "step": 484, + "lr": 0.0001, + "sup_loss": 1.5177981040203947 + }, + { + "step": 485, + "lr": 0.0001, + "sup_loss": 1.524682836990655 + }, + { + "step": 486, + "lr": 0.0001, + "sup_loss": 1.5577210090147655 + }, + { + "step": 487, + "lr": 0.0001, + "sup_loss": 1.5175527588503717 + }, + { + "step": 488, + "lr": 0.0001, + "sup_loss": 1.517884672177967 + }, + { + "step": 489, + "lr": 0.0001, + "sup_loss": 1.540989397132943 + }, + { + "step": 490, + "lr": 0.0001, + "sup_loss": 1.5207840823262242 + }, + { + "step": 491, + "lr": 0.0001, + "sup_loss": 1.5308205979089273 + }, + { + "step": 492, + "lr": 0.0001, + "sup_loss": 1.5343529235115525 + }, + { + "step": 493, + "lr": 0.0001, + "sup_loss": 1.4972792643274797 + }, + { + "step": 494, + "lr": 0.0001, + "sup_loss": 1.5436764958011016 + }, + { + "step": 495, + "lr": 0.0001, + "sup_loss": 1.5344808545549244 + }, + { + "step": 496, + "lr": 0.0001, + "sup_loss": 1.5101506555087487 + }, + { + "step": 497, + "lr": 0.0001, + "sup_loss": 1.5398357969125787 + }, + { + "step": 498, + "lr": 0.0001, + "sup_loss": 1.5341583392864167 + }, + { + "step": 499, + "lr": 0.0001, + "sup_loss": 1.5103523937991166 + } + ], + "evals": [ + { + "step": 0, + "acc": 0.0, + "tok_acc": 0.0 + }, + { + "step": 100, + "acc": 0.0, + "tok_acc": 0.38816550925925924 + }, + { + "step": 200, + "acc": 0.0, + "tok_acc": 0.3872974537037037 + }, + { + "step": 300, + "acc": 0.0, + "tok_acc": 0.3872974537037037 + }, + { + "step": 400, + "acc": 0.0, + "tok_acc": 0.39052854938271603 + }, + { + "step": 500, + "acc": 0.0, + "tok_acc": 0.3880208333333333 + } + ], + "final_acc": 0.0, + "final_tok_acc": 0.3880208333333333 +}
\ No newline at end of file diff --git a/scripts/train_hrm_orth.py b/scripts/train_hrm_orth.py new file mode 100644 index 0000000..4d9868b --- /dev/null +++ b/scripts/train_hrm_orth.py @@ -0,0 +1,203 @@ +"""Train HRM-Orth (orthogonal-patched HRM) from scratch on Sudoku. + +Per codex round 2 recommendation (Q6 pivot): patch HRM Block (attn+SwiGLU+rms_norm) +with Lipschitz-bounded versions (cosine attn + OrthLinear+MaxMin + weighted residual). +Keeps HRM's H_level/L_level/ACT framework intact. +""" +from __future__ import annotations +import sys, os, json, math, time, argparse +from pathlib import Path +import numpy as np +import torch + +ROOT = Path("/home/yurenh2/rrm/srm") +sys.path.insert(0, str(ROOT)) + +from models.srm.hrm_orth_v1 import HierarchicalReasoningModel_ACTV1 as HRMOrth +from models.losses import ACTLossHead +from models.sparse_embedding import CastedSparseEmbeddingSignSGD_Distributed +from adam_atan2 import AdamATan2 + + +def build_model(data_path: Path, batch_size: int, device: str, + hidden_size: int = 256, num_heads: int = 4, + H_cycles: int = 2, L_cycles: int = 2, H_layers: int = 4, L_layers: int = 4, + orth_s_min: float = 0.95, cosine_attn_tau: float = 8.0): + train_meta = json.loads((data_path / "train" / "dataset.json").read_text()) + arch_cfg = dict( + H_cycles=H_cycles, H_layers=H_layers, + L_cycles=L_cycles, L_layers=L_layers, + expansion=4, + halt_exploration_prob=0.1, + halt_max_steps=16, + hidden_size=hidden_size, + num_heads=num_heads, + pos_encodings="rope", + puzzle_emb_ndim=hidden_size, + batch_size=batch_size, + vocab_size=train_meta["vocab_size"], + seq_len=train_meta["seq_len"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], + forward_dtype="bfloat16", + orth_s_min=orth_s_min, + cosine_attn_tau=cosine_attn_tau, + ) + with torch.device(device): + base = HRMOrth(arch_cfg) + head = ACTLossHead(base, loss_type="stablemax_cross_entropy") + return head, base, train_meta + + +def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "train" / "all__inputs.npy") + labels = np.load(data_path / "train" / "all__labels.npy") + pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy") + N = len(inputs) + for _ in range(n_iters): + idx = rng.choice(N, size=batch_size, replace=False) + yield { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + } + + +def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + idx_all = rng.choice(len(inputs), size=n_samples, replace=False) + head.eval() + correct = 0; token_correct = 0; token_total = 0 + for s in range(0, n_samples, batch_size): + e = min(s + batch_size, n_samples) + idx = idx_all[s:e] + batch = { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device), + "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device), + } + with torch.no_grad(): + with torch.device(device): + carry = base.initial_carry(batch) + for _ in range(base.config.halt_max_steps): + carry, outputs = base(carry=carry, batch=batch) + preds = outputs["logits"].argmax(dim=-1) + mask = batch["labels"] > 0 + exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float() + correct += exact.sum().item() + token_correct += ((preds == batch["labels"]) & mask).sum().item() + token_total += mask.sum().item() + return correct / n_samples, token_correct / max(token_total, 1) + + +def warmup_constant_lr(step, base_lr, warmup): + return base_lr * step / max(1, warmup) if step < warmup else base_lr + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--data-path", default="/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000") + ap.add_argument("--n-steps", type=int, default=3000) + ap.add_argument("--batch-size", type=int, default=8) + ap.add_argument("--lr", type=float, default=1e-4) + ap.add_argument("--puzzle-emb-lr", type=float, default=1e-4) + ap.add_argument("--warmup-steps", type=int, default=200) + ap.add_argument("--weight-decay", type=float, default=1.0) + ap.add_argument("--hidden-size", type=int, default=256) + ap.add_argument("--num-heads", type=int, default=4) + ap.add_argument("--H-cycles", type=int, default=2) + ap.add_argument("--L-cycles", type=int, default=2) + ap.add_argument("--H-layers", type=int, default=4) + ap.add_argument("--L-layers", type=int, default=4) + ap.add_argument("--orth-s-min", type=float, default=0.95, help="min diag scale (weak orthogonality)") + ap.add_argument("--cosine-attn-tau", type=float, default=8.0) + ap.add_argument("--seed", type=int, default=42) + ap.add_argument("--eval-every", type=int, default=300) + ap.add_argument("--eval-n", type=int, default=512) + ap.add_argument("--eval-batch-size", type=int, default=32) + ap.add_argument("--out", required=True) + ap.add_argument("--save-ckpt", default="") + args = ap.parse_args() + + device = "cuda" + torch.manual_seed(args.seed); np.random.seed(args.seed) + data_path = Path(args.data_path) + head, base, train_meta = build_model( + data_path, args.batch_size, device, + hidden_size=args.hidden_size, num_heads=args.num_heads, + H_cycles=args.H_cycles, L_cycles=args.L_cycles, + H_layers=args.H_layers, L_layers=args.L_layers, + orth_s_min=args.orth_s_min, cosine_attn_tau=args.cosine_attn_tau, + ) + n_params = sum(p.numel() for p in head.parameters()) + print(f"Built HRM-Orth | params={n_params:,} | hidden={args.hidden_size} " + f"H_layers={args.H_layers} L_layers={args.L_layers} " + f"s_min={args.orth_s_min} τ={args.cosine_attn_tau}") + + puzzle_emb_opt = CastedSparseEmbeddingSignSGD_Distributed( + base.inner.puzzle_emb.buffers(), lr=0, weight_decay=args.weight_decay, world_size=1) + main_opt = AdamATan2(head.parameters(), lr=0, betas=(0.9, 0.95), weight_decay=args.weight_decay) + + acc0, tacc0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print(f"=== step 0 (random init): exact_acc = {acc0:.4f} token_acc = {tacc0:.4f} ===") + + log = {"args": vars(args), "n_params": n_params, "initial_acc": acc0, "initial_tok_acc": tacc0, "steps": [], "evals": []} + log["evals"].append({"step": 0, "acc": acc0, "tok_acc": tacc0}) + t0 = time.time() + train_iter = load_train_batches(data_path, args.batch_size, args.n_steps, seed=args.seed) + + for step, batch in enumerate(train_iter): + batch = {k: v.to(device) for k, v in batch.items()} + cur_lr = warmup_constant_lr(step, args.lr, args.warmup_steps) + cur_pe_lr = warmup_constant_lr(step, args.puzzle_emb_lr, args.warmup_steps) + for pg in main_opt.param_groups: pg["lr"] = cur_lr + for pg in puzzle_emb_opt.param_groups: pg["lr"] = cur_pe_lr + + head.train() + with torch.device(device): + carry = base.initial_carry(batch) + sup_loss_sum = 0.0; n_loss = 0 + for _ in range(base.config.halt_max_steps): + carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch) + sup_loss_sum = sup_loss_sum + l + n_loss += 1 + if all_finish: break + sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size + + puzzle_emb_opt.zero_grad(set_to_none=True) + main_opt.zero_grad(set_to_none=True) + sup_loss.backward() + torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0) + main_opt.step() + puzzle_emb_opt.step() + + rec = {"step": step, "lr": cur_lr, "sup_loss": float(sup_loss.item())} + log["steps"].append(rec) + if step % 25 == 0 or step == args.n_steps - 1: + print(f" [{step:>4}/{args.n_steps}] dt={time.time()-t0:.0f}s lr={cur_lr:.1e} " + f"sup={rec['sup_loss']:.4f}", flush=True) + + if (step + 1) % args.eval_every == 0 or step == args.n_steps - 1: + acc, tacc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print(f" >> EVAL @ {step+1}: exact_acc={acc:.4f} tok_acc={tacc:.4f} (Δ init: {acc-acc0:+.4f})", flush=True) + log["evals"].append({"step": step + 1, "acc": acc, "tok_acc": tacc}) + + log["final_acc"] = log["evals"][-1]["acc"] + log["final_tok_acc"] = log["evals"][-1]["tok_acc"] + Path(args.out).parent.mkdir(parents=True, exist_ok=True) + Path(args.out).write_text(json.dumps(log, indent=2)) + print(f"\n=== DONE === init {acc0:.4f} → final {log['final_acc']:.4f} log → {args.out}") + + if args.save_ckpt: + Path(args.save_ckpt).parent.mkdir(parents=True, exist_ok=True) + torch.save({"state_dict": head.state_dict(), "args": vars(args), + "n_steps_trained": args.n_steps, "final_acc": log["final_acc"], "n_params": n_params}, + args.save_ckpt) + print(f"checkpoint → {args.save_ckpt}") + + +if __name__ == "__main__": + main() |
