| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.07184166097920183, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 577.5625, | |
| "epoch": 0.00014368332195840368, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 595.375, | |
| "epoch": 0.00028736664391680735, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 489.875, | |
| "epoch": 0.00043104996587521106, | |
| "grad_norm": 1.5707740783691406, | |
| "kl": 0.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0478, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 465.875, | |
| "epoch": 0.0005747332878336147, | |
| "grad_norm": 0.003557927906513214, | |
| "kl": 0.00043487548828125, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 671.5, | |
| "epoch": 0.0007184166097920184, | |
| "grad_norm": 0.0033763274550437927, | |
| "kl": 0.0005903244018554688, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 507.375, | |
| "epoch": 0.0008620999317504221, | |
| "grad_norm": 0.003343924880027771, | |
| "kl": 0.0004489421844482422, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 640.125, | |
| "epoch": 0.0010057832537088258, | |
| "grad_norm": 0.003249621717259288, | |
| "kl": 0.0005922317504882812, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 574.875, | |
| "epoch": 0.0011494665756672294, | |
| "grad_norm": 0.004247524309903383, | |
| "kl": 0.000701904296875, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 571.1875, | |
| "epoch": 0.001293149897625633, | |
| "grad_norm": 0.004429314751178026, | |
| "kl": 0.0005669593811035156, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 624.5625, | |
| "epoch": 0.0014368332195840367, | |
| "grad_norm": 0.004792630672454834, | |
| "kl": 0.0008535385131835938, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 634.9375, | |
| "epoch": 0.0015805165415424404, | |
| "grad_norm": 0.004688590299338102, | |
| "kl": 0.00080108642578125, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 681.125, | |
| "epoch": 0.0017241998635008442, | |
| "grad_norm": 0.004042426124215126, | |
| "kl": 0.0007867813110351562, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 552.8125, | |
| "epoch": 0.0018678831854592479, | |
| "grad_norm": 0.007403955794870853, | |
| "kl": 0.0011396408081054688, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 774.5, | |
| "epoch": 0.0020115665074176515, | |
| "grad_norm": 0.007942945696413517, | |
| "kl": 0.00086212158203125, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 648.875, | |
| "epoch": 0.0021552498293760554, | |
| "grad_norm": 0.011233330704271793, | |
| "kl": 0.0015621185302734375, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 395.125, | |
| "epoch": 0.002298933151334459, | |
| "grad_norm": 2.4543673992156982, | |
| "kl": 0.007465362548828125, | |
| "learning_rate": 4.999952797253148e-06, | |
| "loss": -0.0963, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 590.5625, | |
| "epoch": 0.0024426164732928627, | |
| "grad_norm": 0.03457748889923096, | |
| "kl": 0.0042629241943359375, | |
| "learning_rate": 4.9998111909931225e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 466.75, | |
| "epoch": 0.002586299795251266, | |
| "grad_norm": 1.883467674255371, | |
| "kl": 0.013484954833984375, | |
| "learning_rate": 4.999575187161439e-06, | |
| "loss": -0.0091, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 644.25, | |
| "epoch": 0.00272998311720967, | |
| "grad_norm": 1.1460031270980835, | |
| "kl": 0.02417755126953125, | |
| "learning_rate": 4.9992447956603455e-06, | |
| "loss": 0.0375, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 424.125, | |
| "epoch": 0.0028736664391680734, | |
| "grad_norm": 38.29722213745117, | |
| "kl": 5.15692138671875, | |
| "learning_rate": 4.998820030352409e-06, | |
| "loss": 0.0845, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 465.375, | |
| "epoch": 0.0030173497611264773, | |
| "grad_norm": 554.5137329101562, | |
| "kl": 12.4742431640625, | |
| "learning_rate": 4.998300909059929e-06, | |
| "loss": 0.1845, | |
| "reward": 0.02500000037252903, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.25, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 531.0625, | |
| "epoch": 0.0031610330830848807, | |
| "grad_norm": 5.935389041900635, | |
| "kl": 0.32598876953125, | |
| "learning_rate": 4.997687453564198e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 391.9375, | |
| "epoch": 0.0033047164050432846, | |
| "grad_norm": 2.4972121715545654, | |
| "kl": 0.0226593017578125, | |
| "learning_rate": 4.9969796896045775e-06, | |
| "loss": -0.0487, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 595.8125, | |
| "epoch": 0.0034483997270016884, | |
| "grad_norm": 0.053129617124795914, | |
| "kl": 0.0148468017578125, | |
| "learning_rate": 4.996177646877426e-06, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 570.125, | |
| "epoch": 0.003592083048960092, | |
| "grad_norm": 0.026312552392482758, | |
| "kl": 0.0113067626953125, | |
| "learning_rate": 4.995281359034851e-06, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 474.0, | |
| "epoch": 0.0037357663709184957, | |
| "grad_norm": 0.03218882903456688, | |
| "kl": 0.0166015625, | |
| "learning_rate": 4.994290863683296e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 610.6875, | |
| "epoch": 0.003879449692876899, | |
| "grad_norm": 0.051387328654527664, | |
| "kl": 0.01434326171875, | |
| "learning_rate": 4.99320620238196e-06, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 594.0625, | |
| "epoch": 0.004023133014835303, | |
| "grad_norm": 0.26794055104255676, | |
| "kl": 0.034637451171875, | |
| "learning_rate": 4.99202742064106e-06, | |
| "loss": 0.0003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 523.8125, | |
| "epoch": 0.004166816336793707, | |
| "grad_norm": 0.02611120045185089, | |
| "kl": 0.01556396484375, | |
| "learning_rate": 4.990754567919917e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 606.4375, | |
| "epoch": 0.004310499658752111, | |
| "grad_norm": 0.028897596523165703, | |
| "kl": 0.019256591796875, | |
| "learning_rate": 4.989387697624881e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 569.1875, | |
| "epoch": 0.004454182980710514, | |
| "grad_norm": 0.024760432541370392, | |
| "kl": 0.01702880859375, | |
| "learning_rate": 4.987926867107095e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 694.3125, | |
| "epoch": 0.004597866302668918, | |
| "grad_norm": 0.027595188468694687, | |
| "kl": 0.018402099609375, | |
| "learning_rate": 4.986372137660078e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 536.375, | |
| "epoch": 0.0047415496246273215, | |
| "grad_norm": 0.02404957078397274, | |
| "kl": 0.01806640625, | |
| "learning_rate": 4.984723574517165e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 604.375, | |
| "epoch": 0.004885232946585725, | |
| "grad_norm": 0.025466497987508774, | |
| "kl": 0.01824951171875, | |
| "learning_rate": 4.9829812468487655e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 637.8125, | |
| "epoch": 0.005028916268544128, | |
| "grad_norm": 0.029370181262493134, | |
| "kl": 0.0229644775390625, | |
| "learning_rate": 4.981145227759457e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 597.1875, | |
| "epoch": 0.005172599590502532, | |
| "grad_norm": 0.03050071932375431, | |
| "kl": 0.022796630859375, | |
| "learning_rate": 4.979215594284924e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 650.5625, | |
| "epoch": 0.005316282912460936, | |
| "grad_norm": 0.022108623757958412, | |
| "kl": 0.019073486328125, | |
| "learning_rate": 4.977192427388722e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 711.1875, | |
| "epoch": 0.00545996623441934, | |
| "grad_norm": 0.034452371299266815, | |
| "kl": 0.022613525390625, | |
| "learning_rate": 4.9750758119588824e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 613.1875, | |
| "epoch": 0.005603649556377744, | |
| "grad_norm": 0.022646795958280563, | |
| "kl": 0.019561767578125, | |
| "learning_rate": 4.972865836804349e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 592.3125, | |
| "epoch": 0.005747332878336147, | |
| "grad_norm": 0.04067623242735863, | |
| "kl": 0.02325439453125, | |
| "learning_rate": 4.970562594651254e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 553.875, | |
| "epoch": 0.005891016200294551, | |
| "grad_norm": 0.0206755418330431, | |
| "kl": 0.0185546875, | |
| "learning_rate": 4.968166182139026e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 576.0, | |
| "epoch": 0.0060346995222529546, | |
| "grad_norm": 0.027665315195918083, | |
| "kl": 0.025482177734375, | |
| "learning_rate": 4.9656766998163306e-06, | |
| "loss": 0.0003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 673.0625, | |
| "epoch": 0.006178382844211358, | |
| "grad_norm": 0.028020743280649185, | |
| "kl": 0.0281982421875, | |
| "learning_rate": 4.963094252136865e-06, | |
| "loss": 0.0003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 758.5, | |
| "epoch": 0.006322066166169761, | |
| "grad_norm": 0.018797732889652252, | |
| "kl": 0.0179443359375, | |
| "learning_rate": 4.960418947454958e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 650.125, | |
| "epoch": 0.006465749488128165, | |
| "grad_norm": 0.02184910513460636, | |
| "kl": 0.021484375, | |
| "learning_rate": 4.957650898021038e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 659.1875, | |
| "epoch": 0.006609432810086569, | |
| "grad_norm": 0.020105620846152306, | |
| "kl": 0.021453857421875, | |
| "learning_rate": 4.954790219976915e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 564.5625, | |
| "epoch": 0.006753116132044973, | |
| "grad_norm": 0.021222786977887154, | |
| "kl": 0.02001953125, | |
| "learning_rate": 4.95183703335091e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 534.0625, | |
| "epoch": 0.006896799454003377, | |
| "grad_norm": 0.03832077980041504, | |
| "kl": 0.0220489501953125, | |
| "learning_rate": 4.948791462052819e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 689.8125, | |
| "epoch": 0.00704048277596178, | |
| "grad_norm": 7.35048770904541, | |
| "kl": 0.247314453125, | |
| "learning_rate": 4.945653633868716e-06, | |
| "loss": 0.0019, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 699.75, | |
| "epoch": 0.007184166097920184, | |
| "grad_norm": 0.02264636568725109, | |
| "kl": 0.01715087890625, | |
| "learning_rate": 4.942423680455584e-06, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 627.75, | |
| "epoch": 0.007327849419878588, | |
| "grad_norm": 0.06823224574327469, | |
| "kl": 0.2841796875, | |
| "learning_rate": 4.939101737335802e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 716.875, | |
| "epoch": 0.0074715327418369915, | |
| "grad_norm": 0.12825427949428558, | |
| "kl": 0.3291015625, | |
| "learning_rate": 4.935687943891447e-06, | |
| "loss": 0.0034, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 584.125, | |
| "epoch": 0.007615216063795395, | |
| "grad_norm": 0.09328959882259369, | |
| "kl": 0.33056640625, | |
| "learning_rate": 4.932182443358458e-06, | |
| "loss": 0.0034, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 679.8125, | |
| "epoch": 0.007758899385753798, | |
| "grad_norm": 0.1614077389240265, | |
| "kl": 0.3330078125, | |
| "learning_rate": 4.928585382820616e-06, | |
| "loss": 0.0033, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 524.5, | |
| "epoch": 0.007902582707712203, | |
| "grad_norm": 0.08818939328193665, | |
| "kl": 0.32177734375, | |
| "learning_rate": 4.924896913203376e-06, | |
| "loss": 0.0031, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 613.125, | |
| "epoch": 0.008046266029670606, | |
| "grad_norm": 0.10239657759666443, | |
| "kl": 0.337890625, | |
| "learning_rate": 4.921117189267535e-06, | |
| "loss": 0.0033, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 684.375, | |
| "epoch": 0.008189949351629009, | |
| "grad_norm": 0.21877345442771912, | |
| "kl": 0.295166015625, | |
| "learning_rate": 4.917246369602742e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 595.125, | |
| "epoch": 0.008333632673587414, | |
| "grad_norm": 0.17493651807308197, | |
| "kl": 0.32080078125, | |
| "learning_rate": 4.9132846166208355e-06, | |
| "loss": 0.0031, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 654.1875, | |
| "epoch": 0.008477315995545817, | |
| "grad_norm": 0.08061961829662323, | |
| "kl": 0.28564453125, | |
| "learning_rate": 4.9092320965490365e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 704.5625, | |
| "epoch": 0.008620999317504222, | |
| "grad_norm": 0.05985090509057045, | |
| "kl": 0.2939453125, | |
| "learning_rate": 4.905088979422971e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 663.4375, | |
| "epoch": 0.008764682639462625, | |
| "grad_norm": 0.06948310881853104, | |
| "kl": 0.265625, | |
| "learning_rate": 4.900855439079536e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 608.5625, | |
| "epoch": 0.008908365961421028, | |
| "grad_norm": 0.053070612251758575, | |
| "kl": 0.240966796875, | |
| "learning_rate": 4.8965316531496055e-06, | |
| "loss": 0.0024, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 726.3125, | |
| "epoch": 0.009052049283379432, | |
| "grad_norm": 0.05458589643239975, | |
| "kl": 0.271484375, | |
| "learning_rate": 4.892117803050578e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 760.3125, | |
| "epoch": 0.009195732605337835, | |
| "grad_norm": 0.06874047219753265, | |
| "kl": 0.2734375, | |
| "learning_rate": 4.887614073978761e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 895.75, | |
| "epoch": 0.009339415927296238, | |
| "grad_norm": 0.055456191301345825, | |
| "kl": 0.260009765625, | |
| "learning_rate": 4.883020654901609e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 769.875, | |
| "epoch": 0.009483099249254643, | |
| "grad_norm": 0.04558374360203743, | |
| "kl": 0.234375, | |
| "learning_rate": 4.878337738549785e-06, | |
| "loss": 0.0023, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.875, | |
| "epoch": 0.009626782571213046, | |
| "grad_norm": 0.050083279609680176, | |
| "kl": 0.259765625, | |
| "learning_rate": 4.873565521409082e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 679.75, | |
| "epoch": 0.00977046589317145, | |
| "grad_norm": 0.07992015033960342, | |
| "kl": 0.26416015625, | |
| "learning_rate": 4.868704203712173e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 807.375, | |
| "epoch": 0.009914149215129854, | |
| "grad_norm": 0.07405384629964828, | |
| "kl": 0.27001953125, | |
| "learning_rate": 4.86375398943021e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 638.5, | |
| "epoch": 0.010057832537088257, | |
| "grad_norm": 0.5143739581108093, | |
| "kl": 0.352783203125, | |
| "learning_rate": 4.858715086264274e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 668.875, | |
| "epoch": 0.010201515859046661, | |
| "grad_norm": 0.053426701575517654, | |
| "kl": 0.2099609375, | |
| "learning_rate": 4.853587705636646e-06, | |
| "loss": 0.0021, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 605.375, | |
| "epoch": 0.010345199181005064, | |
| "grad_norm": 0.18540872633457184, | |
| "kl": 0.29345703125, | |
| "learning_rate": 4.84837206268195e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 663.4375, | |
| "epoch": 0.01048888250296347, | |
| "grad_norm": 0.11063985526561737, | |
| "kl": 0.263671875, | |
| "learning_rate": 4.8430683762381195e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 627.625, | |
| "epoch": 0.010632565824921872, | |
| "grad_norm": 0.09360551834106445, | |
| "kl": 0.267822265625, | |
| "learning_rate": 4.837676868837213e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 581.25, | |
| "epoch": 0.010776249146880275, | |
| "grad_norm": 0.4334953725337982, | |
| "kl": 0.3037109375, | |
| "learning_rate": 4.832197766696085e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 570.875, | |
| "epoch": 0.01091993246883868, | |
| "grad_norm": 0.1146390363574028, | |
| "kl": 0.36865234375, | |
| "learning_rate": 4.826631299706887e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 431.25, | |
| "epoch": 0.011063615790797083, | |
| "grad_norm": 0.06237626075744629, | |
| "kl": 0.34423828125, | |
| "learning_rate": 4.820977701427424e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 304.25, | |
| "epoch": 0.011207299112755488, | |
| "grad_norm": 0.06463415175676346, | |
| "kl": 0.2822265625, | |
| "learning_rate": 4.81523720907136e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 407.0, | |
| "epoch": 0.01135098243471389, | |
| "grad_norm": 0.12471897900104523, | |
| "kl": 0.29931640625, | |
| "learning_rate": 4.809410063498254e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 294.875, | |
| "epoch": 0.011494665756672294, | |
| "grad_norm": 0.14441123604774475, | |
| "kl": 0.40478515625, | |
| "learning_rate": 4.8034965092034656e-06, | |
| "loss": 0.0032, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 483.25, | |
| "epoch": 0.011638349078630698, | |
| "grad_norm": 0.07597321271896362, | |
| "kl": 0.30029296875, | |
| "learning_rate": 4.797496794307889e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 456.875, | |
| "epoch": 0.011782032400589101, | |
| "grad_norm": 0.09612507373094559, | |
| "kl": 0.3154296875, | |
| "learning_rate": 4.791411170547545e-06, | |
| "loss": 0.0031, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 346.75, | |
| "epoch": 0.011925715722547506, | |
| "grad_norm": 0.16882579028606415, | |
| "kl": 0.29443359375, | |
| "learning_rate": 4.785239893263017e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 380.25, | |
| "epoch": 0.012069399044505909, | |
| "grad_norm": 0.07839754223823547, | |
| "kl": 0.29638671875, | |
| "learning_rate": 4.778983221388742e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 339.0625, | |
| "epoch": 0.012213082366464312, | |
| "grad_norm": 0.11466533690690994, | |
| "kl": 0.298583984375, | |
| "learning_rate": 4.77264141744214e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 449.0, | |
| "epoch": 0.012356765688422717, | |
| "grad_norm": 0.09201041609048843, | |
| "kl": 0.29052734375, | |
| "learning_rate": 4.766214747512603e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 397.875, | |
| "epoch": 0.01250044901038112, | |
| "grad_norm": 0.5096721053123474, | |
| "kl": 0.31689453125, | |
| "learning_rate": 4.759703481250331e-06, | |
| "loss": 0.0033, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 388.625, | |
| "epoch": 0.012644132332339523, | |
| "grad_norm": 0.07259467989206314, | |
| "kl": 0.352294921875, | |
| "learning_rate": 4.753107891855015e-06, | |
| "loss": 0.0025, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 374.125, | |
| "epoch": 0.012787815654297928, | |
| "grad_norm": 0.07488111406564713, | |
| "kl": 0.257080078125, | |
| "learning_rate": 4.746428256064375e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 514.6875, | |
| "epoch": 0.01293149897625633, | |
| "grad_norm": 0.04593910649418831, | |
| "kl": 0.2216796875, | |
| "learning_rate": 4.7396648541425534e-06, | |
| "loss": 0.0023, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 374.0, | |
| "epoch": 0.013075182298214735, | |
| "grad_norm": 0.28726693987846375, | |
| "kl": 0.4638671875, | |
| "learning_rate": 4.732817969868348e-06, | |
| "loss": 0.0044, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 430.0625, | |
| "epoch": 0.013218865620173138, | |
| "grad_norm": 0.0935889482498169, | |
| "kl": 0.26025390625, | |
| "learning_rate": 4.7258878905233095e-06, | |
| "loss": 0.0025, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 450.75, | |
| "epoch": 0.013362548942131541, | |
| "grad_norm": 0.13612554967403412, | |
| "kl": 0.31982421875, | |
| "learning_rate": 4.718874906879688e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 419.25, | |
| "epoch": 0.013506232264089946, | |
| "grad_norm": 0.5188797116279602, | |
| "kl": 0.435546875, | |
| "learning_rate": 4.711779313188231e-06, | |
| "loss": 0.004, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 428.75, | |
| "epoch": 0.013649915586048349, | |
| "grad_norm": 0.06785713881254196, | |
| "kl": 0.2880859375, | |
| "learning_rate": 4.70460140716584e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 417.1875, | |
| "epoch": 0.013793598908006754, | |
| "grad_norm": 0.10226578265428543, | |
| "kl": 0.3046875, | |
| "learning_rate": 4.697341489983076e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 529.8125, | |
| "epoch": 0.013937282229965157, | |
| "grad_norm": 0.09858646988868713, | |
| "kl": 0.30908203125, | |
| "learning_rate": 4.6899998662515215e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 432.4375, | |
| "epoch": 0.01408096555192356, | |
| "grad_norm": 0.4978332817554474, | |
| "kl": 0.31787109375, | |
| "learning_rate": 4.682576844011007e-06, | |
| "loss": 0.0032, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 568.4375, | |
| "epoch": 0.014224648873881965, | |
| "grad_norm": 0.07428912818431854, | |
| "kl": 0.277587890625, | |
| "learning_rate": 4.675072734716678e-06, | |
| "loss": 0.0025, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 410.3125, | |
| "epoch": 0.014368332195840368, | |
| "grad_norm": 0.09108950942754745, | |
| "kl": 0.333251953125, | |
| "learning_rate": 4.667487853225931e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 583.8125, | |
| "epoch": 0.014512015517798772, | |
| "grad_norm": 0.07506731152534485, | |
| "kl": 0.4638671875, | |
| "learning_rate": 4.659822517785203e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 468.25, | |
| "epoch": 0.014655698839757175, | |
| "grad_norm": 0.08625821769237518, | |
| "kl": 0.314453125, | |
| "learning_rate": 4.6520770500166165e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 462.5, | |
| "epoch": 0.014799382161715578, | |
| "grad_norm": 0.06079576537013054, | |
| "kl": 0.28369140625, | |
| "learning_rate": 4.644251774904487e-06, | |
| "loss": 0.0028, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 440.6875, | |
| "epoch": 0.014943065483673983, | |
| "grad_norm": 0.0945528894662857, | |
| "kl": 0.33642578125, | |
| "learning_rate": 4.636347020781684e-06, | |
| "loss": 0.0036, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 607.875, | |
| "epoch": 0.015086748805632386, | |
| "grad_norm": 0.08069407939910889, | |
| "kl": 0.31298828125, | |
| "learning_rate": 4.6283631193158605e-06, | |
| "loss": 0.0031, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 564.375, | |
| "epoch": 0.01523043212759079, | |
| "grad_norm": 0.055641427636146545, | |
| "kl": 0.29150390625, | |
| "learning_rate": 4.620300405495532e-06, | |
| "loss": 0.003, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 531.4375, | |
| "epoch": 0.015374115449549194, | |
| "grad_norm": 0.061065420508384705, | |
| "kl": 0.302734375, | |
| "learning_rate": 4.612159217616022e-06, | |
| "loss": 0.0029, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 493.1875, | |
| "epoch": 0.015517798771507597, | |
| "grad_norm": 0.06338890641927719, | |
| "kl": 0.32373046875, | |
| "learning_rate": 4.603939897265268e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 508.625, | |
| "epoch": 0.015661482093466, | |
| "grad_norm": 0.4134579300880432, | |
| "kl": 0.328125, | |
| "learning_rate": 4.595642789309492e-06, | |
| "loss": 0.0033, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 460.9375, | |
| "epoch": 0.015805165415424406, | |
| "grad_norm": 0.04516274109482765, | |
| "kl": 0.276611328125, | |
| "learning_rate": 4.587268241878724e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 561.3125, | |
| "epoch": 0.01594884873738281, | |
| "grad_norm": 0.0691392570734024, | |
| "kl": 0.27783203125, | |
| "learning_rate": 4.578816606352205e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 548.25, | |
| "epoch": 0.016092532059341212, | |
| "grad_norm": 0.05729848891496658, | |
| "kl": 0.342529296875, | |
| "learning_rate": 4.570288237343632e-06, | |
| "loss": 0.0027, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 516.1875, | |
| "epoch": 0.016236215381299615, | |
| "grad_norm": 0.073029063642025, | |
| "kl": 0.267333984375, | |
| "learning_rate": 4.561683492686289e-06, | |
| "loss": 0.0024, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 502.375, | |
| "epoch": 0.016379898703258018, | |
| "grad_norm": 1.27533757686615, | |
| "kl": 0.22216796875, | |
| "learning_rate": 4.5530027334180285e-06, | |
| "loss": 0.0409, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 522.25, | |
| "epoch": 0.016523582025216425, | |
| "grad_norm": 0.0462871678173542, | |
| "kl": 0.23681640625, | |
| "learning_rate": 4.544246323766122e-06, | |
| "loss": 0.0023, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 582.8125, | |
| "epoch": 0.016667265347174828, | |
| "grad_norm": 0.042445018887519836, | |
| "kl": 0.22998046875, | |
| "learning_rate": 4.535414631131983e-06, | |
| "loss": 0.0022, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 514.3125, | |
| "epoch": 0.01681094866913323, | |
| "grad_norm": 1.90855872631073, | |
| "kl": 0.34375, | |
| "learning_rate": 4.526508026075746e-06, | |
| "loss": -0.0011, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 440.4375, | |
| "epoch": 0.016954631991091634, | |
| "grad_norm": 0.08046559244394302, | |
| "kl": 0.358154296875, | |
| "learning_rate": 4.517526882300721e-06, | |
| "loss": 0.0024, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 449.5, | |
| "epoch": 0.017098315313050037, | |
| "grad_norm": 0.14999990165233612, | |
| "kl": 0.302978515625, | |
| "learning_rate": 4.508471576637713e-06, | |
| "loss": 0.0024, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 472.5, | |
| "epoch": 0.017241998635008443, | |
| "grad_norm": 1.8022429943084717, | |
| "kl": 0.251953125, | |
| "learning_rate": 4.499342489029211e-06, | |
| "loss": 0.0915, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 475.9375, | |
| "epoch": 0.017385681956966846, | |
| "grad_norm": 0.07380665838718414, | |
| "kl": 0.29052734375, | |
| "learning_rate": 4.490140002513449e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 466.0625, | |
| "epoch": 0.01752936527892525, | |
| "grad_norm": 0.0822119191288948, | |
| "kl": 0.294189453125, | |
| "learning_rate": 4.48086450320833e-06, | |
| "loss": 0.0026, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 406.75, | |
| "epoch": 0.017673048600883652, | |
| "grad_norm": 1.9022564888000488, | |
| "kl": 0.2939453125, | |
| "learning_rate": 4.4715163802952266e-06, | |
| "loss": -0.1072, | |
| "reward": 0.0062500000931322575, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0625, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 523.875, | |
| "epoch": 0.017816731922842055, | |
| "grad_norm": 0.10652535408735275, | |
| "kl": 0.25537109375, | |
| "learning_rate": 4.462096026002655e-06, | |
| "loss": 0.0025, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 519.125, | |
| "epoch": 0.017960415244800458, | |
| "grad_norm": 1.6003073453903198, | |
| "kl": 0.260986328125, | |
| "learning_rate": 4.4526038355898144e-06, | |
| "loss": -0.074, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 609.3125, | |
| "epoch": 0.018104098566758865, | |
| "grad_norm": 1.3760863542556763, | |
| "kl": 0.2841796875, | |
| "learning_rate": 4.4430402073300035e-06, | |
| "loss": 0.0366, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 467.5, | |
| "epoch": 0.018247781888717268, | |
| "grad_norm": 1.8627442121505737, | |
| "kl": 0.284423828125, | |
| "learning_rate": 4.433405542493909e-06, | |
| "loss": 0.0936, | |
| "reward": 0.018750000279396772, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.1875, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 618.1875, | |
| "epoch": 0.01839146521067567, | |
| "grad_norm": 1.4385063648223877, | |
| "kl": 0.298828125, | |
| "learning_rate": 4.4237002453327734e-06, | |
| "loss": 0.0351, | |
| "reward": 0.012500000186264515, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.125, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 506.875, | |
| "epoch": 0.018535148532634074, | |
| "grad_norm": 2.3687305450439453, | |
| "kl": 0.316162109375, | |
| "learning_rate": 4.4139247230614245e-06, | |
| "loss": -0.0343, | |
| "reward": 0.018750000279396772, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.1875, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 444.375, | |
| "epoch": 0.018678831854592477, | |
| "grad_norm": 2.7708656787872314, | |
| "kl": 0.35009765625, | |
| "learning_rate": 4.404079385841201e-06, | |
| "loss": -0.0774, | |
| "reward": 0.056250001303851604, | |
| "reward_std": 0.05580127239227295, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5625, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 494.0625, | |
| "epoch": 0.018822515176550883, | |
| "grad_norm": 1.9888551235198975, | |
| "kl": 0.37109375, | |
| "learning_rate": 4.394164646762734e-06, | |
| "loss": -0.1476, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 505.5, | |
| "epoch": 0.018966198498509286, | |
| "grad_norm": 2.9615418910980225, | |
| "kl": 0.39111328125, | |
| "learning_rate": 4.384180921828618e-06, | |
| "loss": 0.1607, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 374.0625, | |
| "epoch": 0.01910988182046769, | |
| "grad_norm": 2.8335933685302734, | |
| "kl": 0.720703125, | |
| "learning_rate": 4.374128629935955e-06, | |
| "loss": 0.0248, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 491.25, | |
| "epoch": 0.019253565142426092, | |
| "grad_norm": 2.8478641510009766, | |
| "kl": 0.4345703125, | |
| "learning_rate": 4.364008192858781e-06, | |
| "loss": -0.0556, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 363.625, | |
| "epoch": 0.019397248464384495, | |
| "grad_norm": 2.654599905014038, | |
| "kl": 0.5478515625, | |
| "learning_rate": 4.353820035230366e-06, | |
| "loss": -0.0377, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 558.4375, | |
| "epoch": 0.0195409317863429, | |
| "grad_norm": 1.9685494899749756, | |
| "kl": 0.3134765625, | |
| "learning_rate": 4.3435645845254e-06, | |
| "loss": 0.0397, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 407.1875, | |
| "epoch": 0.019684615108301304, | |
| "grad_norm": 3.2566757202148438, | |
| "kl": 0.39111328125, | |
| "learning_rate": 4.333242271042054e-06, | |
| "loss": 0.1668, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 372.9375, | |
| "epoch": 0.019828298430259707, | |
| "grad_norm": 2.8907630443573, | |
| "kl": 0.41943359375, | |
| "learning_rate": 4.32285352788393e-06, | |
| "loss": 0.1106, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 385.625, | |
| "epoch": 0.01997198175221811, | |
| "grad_norm": 3.1960320472717285, | |
| "kl": 0.44140625, | |
| "learning_rate": 4.312398790941882e-06, | |
| "loss": 0.0251, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.3125, | |
| "epoch": 0.020115665074176513, | |
| "grad_norm": 3.963812828063965, | |
| "kl": 0.42822265625, | |
| "learning_rate": 4.301878498875735e-06, | |
| "loss": -0.0553, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 208.6875, | |
| "epoch": 0.02025934839613492, | |
| "grad_norm": 24269.09375, | |
| "kl": 984.3603515625, | |
| "learning_rate": 4.291293093095873e-06, | |
| "loss": 13.9158, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 272.0625, | |
| "epoch": 0.020403031718093323, | |
| "grad_norm": 41.54574966430664, | |
| "kl": 1.75048828125, | |
| "learning_rate": 4.280643017744723e-06, | |
| "loss": 0.0095, | |
| "reward": 0.08125000121071935, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 301.5, | |
| "epoch": 0.020546715040051726, | |
| "grad_norm": 1.2713871002197266, | |
| "kl": 0.52978515625, | |
| "learning_rate": 4.269928719678117e-06, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 239.375, | |
| "epoch": 0.02069039836201013, | |
| "grad_norm": 3.0878281593322754, | |
| "kl": 0.755859375, | |
| "learning_rate": 4.2591506484465426e-06, | |
| "loss": -0.0796, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.3125, | |
| "epoch": 0.020834081683968532, | |
| "grad_norm": 3.4972658157348633, | |
| "kl": 0.58154296875, | |
| "learning_rate": 4.248309256276283e-06, | |
| "loss": -0.0246, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.8125, | |
| "epoch": 0.02097776500592694, | |
| "grad_norm": 4.0359272956848145, | |
| "kl": 0.568359375, | |
| "learning_rate": 4.23740499805044e-06, | |
| "loss": -0.1349, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 153.375, | |
| "epoch": 0.02112144832788534, | |
| "grad_norm": 4.588460922241211, | |
| "kl": 0.5205078125, | |
| "learning_rate": 4.22643833128985e-06, | |
| "loss": 0.1225, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 144.9375, | |
| "epoch": 0.021265131649843744, | |
| "grad_norm": 3.1051583290100098, | |
| "kl": 0.5810546875, | |
| "learning_rate": 4.215409716133885e-06, | |
| "loss": 0.0097, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 135.3125, | |
| "epoch": 0.021408814971802147, | |
| "grad_norm": 0.4494142532348633, | |
| "kl": 0.62890625, | |
| "learning_rate": 4.204319615321151e-06, | |
| "loss": 0.0061, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.125, | |
| "epoch": 0.02155249829376055, | |
| "grad_norm": 3.1240758895874023, | |
| "kl": 0.4951171875, | |
| "learning_rate": 4.193168494170065e-06, | |
| "loss": 0.0225, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 161.25, | |
| "epoch": 0.021696181615718957, | |
| "grad_norm": 0.13801798224449158, | |
| "kl": 0.5107421875, | |
| "learning_rate": 4.181956820559339e-06, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 204.75, | |
| "epoch": 0.02183986493767736, | |
| "grad_norm": 2.869321823120117, | |
| "kl": 0.52685546875, | |
| "learning_rate": 4.170685064908342e-06, | |
| "loss": 0.0007, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 217.625, | |
| "epoch": 0.021983548259635763, | |
| "grad_norm": 2.987393379211426, | |
| "kl": 0.4775390625, | |
| "learning_rate": 4.159353700157365e-06, | |
| "loss": 0.0874, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 301.1875, | |
| "epoch": 0.022127231581594166, | |
| "grad_norm": 4.155872344970703, | |
| "kl": 0.513671875, | |
| "learning_rate": 4.14796320174778e-06, | |
| "loss": 0.1495, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 203.375, | |
| "epoch": 0.02227091490355257, | |
| "grad_norm": 3.3726768493652344, | |
| "kl": 0.5556640625, | |
| "learning_rate": 4.136514047602087e-06, | |
| "loss": -0.1008, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 239.5625, | |
| "epoch": 0.022414598225510975, | |
| "grad_norm": 1.6632219552993774, | |
| "kl": 0.43505859375, | |
| "learning_rate": 4.1250067181038635e-06, | |
| "loss": -0.0251, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 279.3125, | |
| "epoch": 0.02255828154746938, | |
| "grad_norm": 1.4800124168395996, | |
| "kl": 0.5205078125, | |
| "learning_rate": 4.113441696077608e-06, | |
| "loss": 0.0334, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 228.5, | |
| "epoch": 0.02270196486942778, | |
| "grad_norm": 3.3807125091552734, | |
| "kl": 0.5283203125, | |
| "learning_rate": 4.101819466768484e-06, | |
| "loss": 0.0686, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 265.375, | |
| "epoch": 0.022845648191386184, | |
| "grad_norm": 0.11021065711975098, | |
| "kl": 0.4150390625, | |
| "learning_rate": 4.0901405178219535e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 236.5, | |
| "epoch": 0.022989331513344587, | |
| "grad_norm": 0.12716983258724213, | |
| "kl": 0.48828125, | |
| "learning_rate": 4.078405339263326e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 394.75, | |
| "epoch": 0.023133014835302994, | |
| "grad_norm": 2.502532720565796, | |
| "kl": 0.43359375, | |
| "learning_rate": 4.06661442347719e-06, | |
| "loss": 0.1277, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 307.75, | |
| "epoch": 0.023276698157261397, | |
| "grad_norm": 2.625666618347168, | |
| "kl": 0.603515625, | |
| "learning_rate": 4.054768265186758e-06, | |
| "loss": -0.0039, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 320.875, | |
| "epoch": 0.0234203814792198, | |
| "grad_norm": 10.025872230529785, | |
| "kl": 3.30126953125, | |
| "learning_rate": 4.0428673614331036e-06, | |
| "loss": -0.1205, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 340.0625, | |
| "epoch": 0.023564064801178203, | |
| "grad_norm": 1.7133045196533203, | |
| "kl": 0.4716796875, | |
| "learning_rate": 4.030912211554316e-06, | |
| "loss": -0.0296, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 323.9375, | |
| "epoch": 0.023707748123136606, | |
| "grad_norm": 2.04508638381958, | |
| "kl": 0.47265625, | |
| "learning_rate": 4.018903317164539e-06, | |
| "loss": 0.1483, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 407.75, | |
| "epoch": 0.023851431445095012, | |
| "grad_norm": 1.8505865335464478, | |
| "kl": 0.435546875, | |
| "learning_rate": 4.006841182132932e-06, | |
| "loss": 0.1073, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 340.6875, | |
| "epoch": 0.023995114767053415, | |
| "grad_norm": 1.863888144493103, | |
| "kl": 0.44677734375, | |
| "learning_rate": 3.9947263125625195e-06, | |
| "loss": -0.0231, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 359.8125, | |
| "epoch": 0.024138798089011818, | |
| "grad_norm": 3.2325119972229004, | |
| "kl": 0.4931640625, | |
| "learning_rate": 3.982559216768967e-06, | |
| "loss": -0.0656, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 228.0, | |
| "epoch": 0.02428248141097022, | |
| "grad_norm": 3.302624464035034, | |
| "kl": 0.5654296875, | |
| "learning_rate": 3.970340405259245e-06, | |
| "loss": 0.0314, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 353.3125, | |
| "epoch": 0.024426164732928624, | |
| "grad_norm": 4.623762607574463, | |
| "kl": 0.5009765625, | |
| "learning_rate": 3.958070390710214e-06, | |
| "loss": 0.3016, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 378.5, | |
| "epoch": 0.02456984805488703, | |
| "grad_norm": 1.5588092803955078, | |
| "kl": 0.43212890625, | |
| "learning_rate": 3.945749687947109e-06, | |
| "loss": 0.0641, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 283.125, | |
| "epoch": 0.024713531376845434, | |
| "grad_norm": 2.220501184463501, | |
| "kl": 0.771484375, | |
| "learning_rate": 3.933378813921942e-06, | |
| "loss": 0.0059, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 373.9375, | |
| "epoch": 0.024857214698803837, | |
| "grad_norm": 1084.8433837890625, | |
| "kl": 40.76611328125, | |
| "learning_rate": 3.920958287691811e-06, | |
| "loss": 0.8411, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 373.25, | |
| "epoch": 0.02500089802076224, | |
| "grad_norm": 117.17958068847656, | |
| "kl": 4.12109375, | |
| "learning_rate": 3.908488630397121e-06, | |
| "loss": 0.0798, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 331.5625, | |
| "epoch": 0.025144581342720643, | |
| "grad_norm": 9.010733604431152, | |
| "kl": 1.80224609375, | |
| "learning_rate": 3.8959703652397175e-06, | |
| "loss": -0.0073, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 389.875, | |
| "epoch": 0.025288264664679046, | |
| "grad_norm": 5.297504901885986, | |
| "kl": 0.67919921875, | |
| "learning_rate": 3.883404017460935e-06, | |
| "loss": 0.1025, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 394.375, | |
| "epoch": 0.025431947986637452, | |
| "grad_norm": 2.6283674240112305, | |
| "kl": 0.5390625, | |
| "learning_rate": 3.870790114319559e-06, | |
| "loss": 0.0492, | |
| "reward": 0.0437500006519258, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.4375, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 321.5, | |
| "epoch": 0.025575631308595855, | |
| "grad_norm": 3.5162456035614014, | |
| "kl": 0.7216796875, | |
| "learning_rate": 3.858129185069701e-06, | |
| "loss": 0.1675, | |
| "reward": 0.03125000046566129, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.3125, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 333.3125, | |
| "epoch": 0.025719314630554258, | |
| "grad_norm": 4.047501564025879, | |
| "kl": 0.751953125, | |
| "learning_rate": 3.845421760938597e-06, | |
| "loss": 0.1119, | |
| "reward": 0.050000001676380634, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 305.6875, | |
| "epoch": 0.02586299795251266, | |
| "grad_norm": 4.252400875091553, | |
| "kl": 0.6796875, | |
| "learning_rate": 3.832668375104312e-06, | |
| "loss": 0.0201, | |
| "reward": 0.050000001676380634, | |
| "reward_std": 0.05000000074505806, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 225.9375, | |
| "epoch": 0.026006681274471064, | |
| "grad_norm": 7.807644844055176, | |
| "kl": 1.560546875, | |
| "learning_rate": 3.8198695626733725e-06, | |
| "loss": 0.0647, | |
| "reward": 0.04375000111758709, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.4375, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 212.625, | |
| "epoch": 0.02615036459642947, | |
| "grad_norm": 5.3708038330078125, | |
| "kl": 0.736328125, | |
| "learning_rate": 3.8070258606583156e-06, | |
| "loss": -0.0523, | |
| "reward": 0.03125000046566129, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.3125, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 263.9375, | |
| "epoch": 0.026294047918387874, | |
| "grad_norm": 3.724597454071045, | |
| "kl": 0.6953125, | |
| "learning_rate": 3.7941378079551544e-06, | |
| "loss": -0.0584, | |
| "reward": 0.06875000149011612, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 231.25, | |
| "epoch": 0.026437731240346277, | |
| "grad_norm": 4.854169845581055, | |
| "kl": 0.8095703125, | |
| "learning_rate": 3.7812059453207677e-06, | |
| "loss": -0.0794, | |
| "reward": 0.05625000176951289, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5625, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 283.125, | |
| "epoch": 0.02658141456230468, | |
| "grad_norm": 4.72191858291626, | |
| "kl": 0.6787109375, | |
| "learning_rate": 3.768230815350213e-06, | |
| "loss": 0.0998, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 315.5625, | |
| "epoch": 0.026725097884263083, | |
| "grad_norm": 4.964809417724609, | |
| "kl": 0.6796875, | |
| "learning_rate": 3.7552129624539557e-06, | |
| "loss": 0.34, | |
| "reward": 0.0687500024214387, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 258.375, | |
| "epoch": 0.02686878120622149, | |
| "grad_norm": 3.47402286529541, | |
| "kl": 0.7421875, | |
| "learning_rate": 3.7421529328350316e-06, | |
| "loss": 0.0961, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 359.0, | |
| "epoch": 0.027012464528179892, | |
| "grad_norm": 480.0755615234375, | |
| "kl": 37.884765625, | |
| "learning_rate": 3.7290512744661274e-06, | |
| "loss": 0.679, | |
| "reward": 0.05000000121071935, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 213.1875, | |
| "epoch": 0.027156147850138295, | |
| "grad_norm": 5.7086052894592285, | |
| "kl": 1.61328125, | |
| "learning_rate": 3.715908537066589e-06, | |
| "loss": 0.1013, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 369.875, | |
| "epoch": 0.027299831172096698, | |
| "grad_norm": 3.870173215866089, | |
| "kl": 0.7568359375, | |
| "learning_rate": 3.7027252720793538e-06, | |
| "loss": 0.2494, | |
| "reward": 0.0687500019557774, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 158.25, | |
| "epoch": 0.0274435144940551, | |
| "grad_norm": 491.740478515625, | |
| "kl": 18.1435546875, | |
| "learning_rate": 3.689502032647817e-06, | |
| "loss": 0.3011, | |
| "reward": 0.07500000111758709, | |
| "reward_std": 0.028867514804005623, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 280.75, | |
| "epoch": 0.027587197816013508, | |
| "grad_norm": 4.54683256149292, | |
| "kl": 0.853515625, | |
| "learning_rate": 3.6762393735926245e-06, | |
| "loss": 0.2421, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.4375, | |
| "epoch": 0.02773088113797191, | |
| "grad_norm": 3.0036094188690186, | |
| "kl": 0.7412109375, | |
| "learning_rate": 3.6629378513883852e-06, | |
| "loss": 0.0226, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 159.5625, | |
| "epoch": 0.027874564459930314, | |
| "grad_norm": 5.060415744781494, | |
| "kl": 0.8330078125, | |
| "learning_rate": 3.6495980241403307e-06, | |
| "loss": 0.1031, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 203.4375, | |
| "epoch": 0.028018247781888717, | |
| "grad_norm": 3.40126633644104, | |
| "kl": 0.8203125, | |
| "learning_rate": 3.636220451560896e-06, | |
| "loss": 0.2255, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 227.8125, | |
| "epoch": 0.02816193110384712, | |
| "grad_norm": 5.539950847625732, | |
| "kl": 0.814453125, | |
| "learning_rate": 3.622805694946235e-06, | |
| "loss": 0.2887, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 196.625, | |
| "epoch": 0.028305614425805526, | |
| "grad_norm": 1.9450119733810425, | |
| "kl": 0.75390625, | |
| "learning_rate": 3.609354317152667e-06, | |
| "loss": -0.0384, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 159.125, | |
| "epoch": 0.02844929774776393, | |
| "grad_norm": 5.7262701988220215, | |
| "kl": 0.79296875, | |
| "learning_rate": 3.595866882573063e-06, | |
| "loss": 0.2286, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 156.875, | |
| "epoch": 0.028592981069722332, | |
| "grad_norm": 4.302977561950684, | |
| "kl": 0.65625, | |
| "learning_rate": 3.5823439571131675e-06, | |
| "loss": 0.0149, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 153.5625, | |
| "epoch": 0.028736664391680735, | |
| "grad_norm": 4.005610466003418, | |
| "kl": 0.9853515625, | |
| "learning_rate": 3.5687861081678477e-06, | |
| "loss": 0.0623, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 148.625, | |
| "epoch": 0.028880347713639138, | |
| "grad_norm": 3.1937036514282227, | |
| "kl": 0.83203125, | |
| "learning_rate": 3.555193904597291e-06, | |
| "loss": 0.0186, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.1875, | |
| "epoch": 0.029024031035597544, | |
| "grad_norm": 3.0985937118530273, | |
| "kl": 0.7939453125, | |
| "learning_rate": 3.541567916703138e-06, | |
| "loss": 0.2288, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 210.6875, | |
| "epoch": 0.029167714357555947, | |
| "grad_norm": 0.2118406444787979, | |
| "kl": 0.8671875, | |
| "learning_rate": 3.5279087162045517e-06, | |
| "loss": 0.0088, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 150.25, | |
| "epoch": 0.02931139767951435, | |
| "grad_norm": 3.7620272636413574, | |
| "kl": 1.2724609375, | |
| "learning_rate": 3.5142168762142265e-06, | |
| "loss": -0.0115, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 203.6875, | |
| "epoch": 0.029455081001472753, | |
| "grad_norm": 3.8695547580718994, | |
| "kl": 0.7763671875, | |
| "learning_rate": 3.500492971214347e-06, | |
| "loss": 0.0121, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 244.125, | |
| "epoch": 0.029598764323431156, | |
| "grad_norm": 5.059806823730469, | |
| "kl": 0.87890625, | |
| "learning_rate": 3.48673757703248e-06, | |
| "loss": 0.3029, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 136.125, | |
| "epoch": 0.029742447645389563, | |
| "grad_norm": 2.909438371658325, | |
| "kl": 0.771484375, | |
| "learning_rate": 3.472951270817418e-06, | |
| "loss": 0.0315, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 207.375, | |
| "epoch": 0.029886130967347966, | |
| "grad_norm": 3.3862502574920654, | |
| "kl": 0.8125, | |
| "learning_rate": 3.4591346310149578e-06, | |
| "loss": 0.0018, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 162.3125, | |
| "epoch": 0.03002981428930637, | |
| "grad_norm": 5.295530796051025, | |
| "kl": 0.7373046875, | |
| "learning_rate": 3.445288237343632e-06, | |
| "loss": 0.1625, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 213.75, | |
| "epoch": 0.030173497611264772, | |
| "grad_norm": 1.1489182710647583, | |
| "kl": 0.791015625, | |
| "learning_rate": 3.4314126707703895e-06, | |
| "loss": 0.2328, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.375, | |
| "epoch": 0.030317180933223175, | |
| "grad_norm": 0.16106684505939484, | |
| "kl": 0.685546875, | |
| "learning_rate": 3.4175085134862128e-06, | |
| "loss": 0.0069, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 171.6875, | |
| "epoch": 0.03046086425518158, | |
| "grad_norm": 5.108158111572266, | |
| "kl": 0.7587890625, | |
| "learning_rate": 3.4035763488816953e-06, | |
| "loss": 0.1661, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 165.8125, | |
| "epoch": 0.030604547577139984, | |
| "grad_norm": 0.24283349514007568, | |
| "kl": 0.7724609375, | |
| "learning_rate": 3.3896167615225594e-06, | |
| "loss": 0.0074, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 169.375, | |
| "epoch": 0.030748230899098387, | |
| "grad_norm": 5.494866371154785, | |
| "kl": 1.5595703125, | |
| "learning_rate": 3.375630337125133e-06, | |
| "loss": -0.0222, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 198.625, | |
| "epoch": 0.03089191422105679, | |
| "grad_norm": 0.21820229291915894, | |
| "kl": 0.6953125, | |
| "learning_rate": 3.361617662531772e-06, | |
| "loss": 0.0069, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 209.8125, | |
| "epoch": 0.031035597543015193, | |
| "grad_norm": 1.8697510957717896, | |
| "kl": 0.912109375, | |
| "learning_rate": 3.347579325686237e-06, | |
| "loss": 0.0101, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.375, | |
| "epoch": 0.0311792808649736, | |
| "grad_norm": 2.8091392517089844, | |
| "kl": 0.724609375, | |
| "learning_rate": 3.333515915609027e-06, | |
| "loss": 0.0367, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 201.6875, | |
| "epoch": 0.031322964186932, | |
| "grad_norm": 3.437286853790283, | |
| "kl": 0.6689453125, | |
| "learning_rate": 3.3194280223726616e-06, | |
| "loss": -0.0491, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.1875, | |
| "epoch": 0.0314666475088904, | |
| "grad_norm": 416.79510498046875, | |
| "kl": 33.26171875, | |
| "learning_rate": 3.305316237076927e-06, | |
| "loss": 0.5898, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 167.5, | |
| "epoch": 0.03161033083084881, | |
| "grad_norm": 4.085023403167725, | |
| "kl": 0.8330078125, | |
| "learning_rate": 3.291181151824071e-06, | |
| "loss": 0.0318, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 247.1875, | |
| "epoch": 0.031754014152807215, | |
| "grad_norm": 5.383124351501465, | |
| "kl": 0.810546875, | |
| "learning_rate": 3.27702335969396e-06, | |
| "loss": 0.151, | |
| "reward": 0.0687500024214387, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 244.9375, | |
| "epoch": 0.03189769747476562, | |
| "grad_norm": 121.67268371582031, | |
| "kl": 9.408203125, | |
| "learning_rate": 3.2628434547191985e-06, | |
| "loss": 0.0321, | |
| "reward": 0.0687500024214387, | |
| "reward_std": 0.051933757960796356, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.0, | |
| "epoch": 0.03204138079672402, | |
| "grad_norm": 17.158824920654297, | |
| "kl": 3.072265625, | |
| "learning_rate": 3.2486420318601973e-06, | |
| "loss": -0.0192, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.0, | |
| "epoch": 0.032185064118682424, | |
| "grad_norm": 4.799882411956787, | |
| "kl": 0.859375, | |
| "learning_rate": 3.2344196869802187e-06, | |
| "loss": 0.0672, | |
| "reward": 0.06250000093132257, | |
| "reward_std": 0.043301272206008434, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 239.5625, | |
| "epoch": 0.03232874744064083, | |
| "grad_norm": 3.988457202911377, | |
| "kl": 0.75390625, | |
| "learning_rate": 3.2201770168203694e-06, | |
| "loss": -0.0968, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.875, | |
| "epoch": 0.03247243076259923, | |
| "grad_norm": 1.923133134841919, | |
| "kl": 0.615234375, | |
| "learning_rate": 3.205914618974563e-06, | |
| "loss": -0.0411, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 202.125, | |
| "epoch": 0.03261611408455763, | |
| "grad_norm": 2.7511911392211914, | |
| "kl": 0.6474609375, | |
| "learning_rate": 3.1916330918644496e-06, | |
| "loss": 0.009, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 246.5, | |
| "epoch": 0.032759797406516036, | |
| "grad_norm": 1.7069810628890991, | |
| "kl": 0.5673828125, | |
| "learning_rate": 3.177333034714303e-06, | |
| "loss": -0.014, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 233.875, | |
| "epoch": 0.03290348072847444, | |
| "grad_norm": 2.153999090194702, | |
| "kl": 0.611328125, | |
| "learning_rate": 3.1630150475258813e-06, | |
| "loss": 0.0533, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 271.9375, | |
| "epoch": 0.03304716405043285, | |
| "grad_norm": 3.5415985584259033, | |
| "kl": 0.5654296875, | |
| "learning_rate": 3.148679731053252e-06, | |
| "loss": -0.0361, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 269.3125, | |
| "epoch": 0.03319084737239125, | |
| "grad_norm": 8.814692497253418, | |
| "kl": 0.642578125, | |
| "learning_rate": 3.1343276867775805e-06, | |
| "loss": -0.1332, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 254.125, | |
| "epoch": 0.033334530694349655, | |
| "grad_norm": 2.5323190689086914, | |
| "kl": 0.5712890625, | |
| "learning_rate": 3.1199595168819043e-06, | |
| "loss": 0.096, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 295.3125, | |
| "epoch": 0.03347821401630806, | |
| "grad_norm": 3.8002569675445557, | |
| "kl": 0.82421875, | |
| "learning_rate": 3.105575824225852e-06, | |
| "loss": -0.0877, | |
| "reward": 0.06875000149011612, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 288.1875, | |
| "epoch": 0.03362189733826646, | |
| "grad_norm": 3.2891669273376465, | |
| "kl": 0.55810546875, | |
| "learning_rate": 3.091177212320363e-06, | |
| "loss": 0.2101, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 299.6875, | |
| "epoch": 0.033765580660224864, | |
| "grad_norm": 2.444749355316162, | |
| "kl": 0.54150390625, | |
| "learning_rate": 3.0767642853023538e-06, | |
| "loss": -0.0229, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 351.6875, | |
| "epoch": 0.03390926398218327, | |
| "grad_norm": 0.35167747735977173, | |
| "kl": 0.49560546875, | |
| "learning_rate": 3.062337647909376e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 325.0, | |
| "epoch": 0.03405294730414167, | |
| "grad_norm": 2.867281436920166, | |
| "kl": 0.45166015625, | |
| "learning_rate": 3.04789790545424e-06, | |
| "loss": 0.1182, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 310.25, | |
| "epoch": 0.03419663062610007, | |
| "grad_norm": 2.982635498046875, | |
| "kl": 0.51708984375, | |
| "learning_rate": 3.033445663799621e-06, | |
| "loss": 0.0588, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 311.125, | |
| "epoch": 0.034340313948058476, | |
| "grad_norm": 2.8937182426452637, | |
| "kl": 0.521484375, | |
| "learning_rate": 3.018981529332633e-06, | |
| "loss": 0.1857, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 262.9375, | |
| "epoch": 0.034483997270016886, | |
| "grad_norm": 3.6246678829193115, | |
| "kl": 0.6162109375, | |
| "learning_rate": 3.00450610893939e-06, | |
| "loss": 0.1892, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 403.5625, | |
| "epoch": 0.03462768059197529, | |
| "grad_norm": 2.3801181316375732, | |
| "kl": 0.52099609375, | |
| "learning_rate": 2.9900200099795396e-06, | |
| "loss": 0.0722, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 287.3125, | |
| "epoch": 0.03477136391393369, | |
| "grad_norm": 0.24018186330795288, | |
| "kl": 0.5986328125, | |
| "learning_rate": 2.9755238402607826e-06, | |
| "loss": 0.0058, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 393.625, | |
| "epoch": 0.034915047235892095, | |
| "grad_norm": 0.6572515964508057, | |
| "kl": 0.53466796875, | |
| "learning_rate": 2.961018208013367e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 560.4375, | |
| "epoch": 0.0350587305578505, | |
| "grad_norm": 2.2393953800201416, | |
| "kl": 0.4404296875, | |
| "learning_rate": 2.9465037218645694e-06, | |
| "loss": 0.3692, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 446.1875, | |
| "epoch": 0.0352024138798089, | |
| "grad_norm": 2.0634946823120117, | |
| "kl": 0.57177734375, | |
| "learning_rate": 2.9319809908131604e-06, | |
| "loss": -0.0071, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 528.1875, | |
| "epoch": 0.035346097201767304, | |
| "grad_norm": 2.6367909908294678, | |
| "kl": 0.5859375, | |
| "learning_rate": 2.917450624203847e-06, | |
| "loss": 0.0284, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 640.25, | |
| "epoch": 0.03548978052372571, | |
| "grad_norm": 0.06677532196044922, | |
| "kl": 0.365234375, | |
| "learning_rate": 2.9029132317017118e-06, | |
| "loss": 0.0034, | |
| "reward": 0.07500000111758709, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 625.75, | |
| "epoch": 0.03563346384568411, | |
| "grad_norm": 1.9811322689056396, | |
| "kl": 0.39404296875, | |
| "learning_rate": 2.888369423266629e-06, | |
| "loss": 0.1424, | |
| "reward": 0.0687500019557774, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 710.0, | |
| "epoch": 0.03577714716764251, | |
| "grad_norm": 1.6662659645080566, | |
| "kl": 0.4345703125, | |
| "learning_rate": 2.8738198091276712e-06, | |
| "loss": 0.1986, | |
| "reward": 0.06250000139698386, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 680.0, | |
| "epoch": 0.035920830489600916, | |
| "grad_norm": 1.886515736579895, | |
| "kl": 0.39013671875, | |
| "learning_rate": 2.859264999757509e-06, | |
| "loss": 0.2372, | |
| "reward": 0.04375000111758709, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.4375, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 764.6875, | |
| "epoch": 0.036064513811559326, | |
| "grad_norm": 5.7232513427734375, | |
| "kl": 0.4990234375, | |
| "learning_rate": 2.8447056058467928e-06, | |
| "loss": 0.1301, | |
| "reward": 0.05000000074505806, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 524.6875, | |
| "epoch": 0.03620819713351773, | |
| "grad_norm": 1.8524736166000366, | |
| "kl": 0.5546875, | |
| "learning_rate": 2.830142238278531e-06, | |
| "loss": 0.0687, | |
| "reward": 0.056250001303851604, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.5625, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 476.375, | |
| "epoch": 0.03635188045547613, | |
| "grad_norm": 2.285640239715576, | |
| "kl": 0.56640625, | |
| "learning_rate": 2.81557550810246e-06, | |
| "loss": 0.2638, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 374.3125, | |
| "epoch": 0.036495563777434535, | |
| "grad_norm": 3.965292453765869, | |
| "kl": 0.626953125, | |
| "learning_rate": 2.8010060265094026e-06, | |
| "loss": -0.0549, | |
| "reward": 0.06250000186264515, | |
| "reward_std": 0.05386751517653465, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.625, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 299.0625, | |
| "epoch": 0.03663924709939294, | |
| "grad_norm": 2.7410449981689453, | |
| "kl": 0.54248046875, | |
| "learning_rate": 2.786434404805629e-06, | |
| "loss": -0.0428, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 242.25, | |
| "epoch": 0.03678293042135134, | |
| "grad_norm": 7.071990489959717, | |
| "kl": 0.76611328125, | |
| "learning_rate": 2.771861254387199e-06, | |
| "loss": 0.1733, | |
| "reward": 0.06875000149011612, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.6875, | |
| "epoch": 0.036926613743309744, | |
| "grad_norm": 126.14482116699219, | |
| "kl": 5.0859375, | |
| "learning_rate": 2.7572871867143204e-06, | |
| "loss": 0.2472, | |
| "reward": 0.06875000149011612, | |
| "reward_std": 0.04136751499027014, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 245.5625, | |
| "epoch": 0.03707029706526815, | |
| "grad_norm": 4.183584213256836, | |
| "kl": 0.6787109375, | |
| "learning_rate": 2.742712813285681e-06, | |
| "loss": 0.0637, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 318.4375, | |
| "epoch": 0.03721398038722655, | |
| "grad_norm": 3.0855674743652344, | |
| "kl": 0.5908203125, | |
| "learning_rate": 2.7281387456128017e-06, | |
| "loss": 0.1647, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 215.6875, | |
| "epoch": 0.03735766370918495, | |
| "grad_norm": 4.139929294586182, | |
| "kl": 0.673828125, | |
| "learning_rate": 2.7135655951943716e-06, | |
| "loss": -0.0256, | |
| "reward": 0.06875000102445483, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.6875, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 219.0625, | |
| "epoch": 0.03750134703114336, | |
| "grad_norm": 0.20174548029899597, | |
| "kl": 0.6328125, | |
| "learning_rate": 2.698993973490598e-06, | |
| "loss": 0.0059, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 158.75, | |
| "epoch": 0.037645030353101766, | |
| "grad_norm": 5.350340366363525, | |
| "kl": 0.6455078125, | |
| "learning_rate": 2.6844244918975416e-06, | |
| "loss": 0.0784, | |
| "reward": 0.07500000204890966, | |
| "reward_std": 0.03943375777453184, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.75, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 135.625, | |
| "epoch": 0.03778871367506017, | |
| "grad_norm": 3.1076321601867676, | |
| "kl": 0.9599609375, | |
| "learning_rate": 2.66985776172147e-06, | |
| "loss": 0.0643, | |
| "reward": 0.08125000121071935, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 170.0625, | |
| "epoch": 0.03793239699701857, | |
| "grad_norm": 2.9959518909454346, | |
| "kl": 0.8447265625, | |
| "learning_rate": 2.6552943941532088e-06, | |
| "loss": 0.0162, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 214.125, | |
| "epoch": 0.038076080318976975, | |
| "grad_norm": 0.19384361803531647, | |
| "kl": 0.6650390625, | |
| "learning_rate": 2.6407350002424927e-06, | |
| "loss": 0.0066, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 250.5, | |
| "epoch": 0.03821976364093538, | |
| "grad_norm": 0.26606935262680054, | |
| "kl": 0.71875, | |
| "learning_rate": 2.626180190872329e-06, | |
| "loss": 0.0068, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 240.375, | |
| "epoch": 0.03836344696289378, | |
| "grad_norm": 0.9030230641365051, | |
| "kl": 0.8369140625, | |
| "learning_rate": 2.611630576733372e-06, | |
| "loss": 0.0062, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 166.1875, | |
| "epoch": 0.038507130284852184, | |
| "grad_norm": 3.1588239669799805, | |
| "kl": 1.0390625, | |
| "learning_rate": 2.5970867682982885e-06, | |
| "loss": 0.0089, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 165.5, | |
| "epoch": 0.03865081360681059, | |
| "grad_norm": 2.534489154815674, | |
| "kl": 0.740234375, | |
| "learning_rate": 2.582549375796154e-06, | |
| "loss": -0.0202, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 152.6875, | |
| "epoch": 0.03879449692876899, | |
| "grad_norm": 3.67244029045105, | |
| "kl": 0.673828125, | |
| "learning_rate": 2.568019009186841e-06, | |
| "loss": 0.0254, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 207.4375, | |
| "epoch": 0.0389381802507274, | |
| "grad_norm": 2.7831411361694336, | |
| "kl": 0.72265625, | |
| "learning_rate": 2.5534962781354317e-06, | |
| "loss": -0.0581, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 156.0, | |
| "epoch": 0.0390818635726858, | |
| "grad_norm": 5.289279937744141, | |
| "kl": 0.6708984375, | |
| "learning_rate": 2.538981791986634e-06, | |
| "loss": 0.0231, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 145.6875, | |
| "epoch": 0.039225546894644206, | |
| "grad_norm": 1.0131648778915405, | |
| "kl": 0.8173828125, | |
| "learning_rate": 2.524476159739218e-06, | |
| "loss": 0.0082, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 176.375, | |
| "epoch": 0.03936923021660261, | |
| "grad_norm": 2.1524786949157715, | |
| "kl": 0.69921875, | |
| "learning_rate": 2.5099799900204607e-06, | |
| "loss": -0.031, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 169.625, | |
| "epoch": 0.03951291353856101, | |
| "grad_norm": 0.3758815824985504, | |
| "kl": 0.60546875, | |
| "learning_rate": 2.4954938910606108e-06, | |
| "loss": 0.0056, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 120.0625, | |
| "epoch": 0.039656596860519415, | |
| "grad_norm": 2.6264796257019043, | |
| "kl": 0.646484375, | |
| "learning_rate": 2.481018470667368e-06, | |
| "loss": 0.0003, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.75, | |
| "epoch": 0.03980028018247782, | |
| "grad_norm": 9.496621131896973, | |
| "kl": 2.5390625, | |
| "learning_rate": 2.4665543362003802e-06, | |
| "loss": 0.0279, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 129.4375, | |
| "epoch": 0.03994396350443622, | |
| "grad_norm": 0.19138182699680328, | |
| "kl": 0.5478515625, | |
| "learning_rate": 2.4521020945457615e-06, | |
| "loss": 0.0053, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 128.125, | |
| "epoch": 0.040087646826394624, | |
| "grad_norm": 3.2331597805023193, | |
| "kl": 0.599609375, | |
| "learning_rate": 2.4376623520906255e-06, | |
| "loss": 0.0059, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.5, | |
| "epoch": 0.04023133014835303, | |
| "grad_norm": 2.2726430892944336, | |
| "kl": 0.5234375, | |
| "learning_rate": 2.4232357146976478e-06, | |
| "loss": 0.0504, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 163.1875, | |
| "epoch": 0.04037501347031144, | |
| "grad_norm": 2.353566884994507, | |
| "kl": 0.5107421875, | |
| "learning_rate": 2.408822787679637e-06, | |
| "loss": -0.0288, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 199.25, | |
| "epoch": 0.04051869679226984, | |
| "grad_norm": 0.12594649195671082, | |
| "kl": 0.49951171875, | |
| "learning_rate": 2.3944241757741475e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.9375, | |
| "epoch": 0.04066238011422824, | |
| "grad_norm": 1.8537880182266235, | |
| "kl": 0.544921875, | |
| "learning_rate": 2.380040483118097e-06, | |
| "loss": -0.0586, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.3125, | |
| "epoch": 0.040806063436186646, | |
| "grad_norm": 3.44875168800354, | |
| "kl": 0.900390625, | |
| "learning_rate": 2.365672313222419e-06, | |
| "loss": 0.1564, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.125, | |
| "epoch": 0.04094974675814505, | |
| "grad_norm": 1.9976215362548828, | |
| "kl": 0.48974609375, | |
| "learning_rate": 2.351320268946749e-06, | |
| "loss": 0.0606, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 219.0, | |
| "epoch": 0.04109343008010345, | |
| "grad_norm": 3.5146334171295166, | |
| "kl": 0.7607421875, | |
| "learning_rate": 2.336984952474119e-06, | |
| "loss": -0.06, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.3125, | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 3.8956093788146973, | |
| "kl": 0.6533203125, | |
| "learning_rate": 2.322666965285697e-06, | |
| "loss": 0.0237, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 165.375, | |
| "epoch": 0.04138079672402026, | |
| "grad_norm": 3.3750154972076416, | |
| "kl": 0.607421875, | |
| "learning_rate": 2.3083669081355507e-06, | |
| "loss": 0.0068, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 267.0625, | |
| "epoch": 0.04152448004597866, | |
| "grad_norm": 2.528259754180908, | |
| "kl": 0.5810546875, | |
| "learning_rate": 2.2940853810254377e-06, | |
| "loss": 0.1652, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 248.0, | |
| "epoch": 0.041668163367937064, | |
| "grad_norm": 0.24038171768188477, | |
| "kl": 0.5234375, | |
| "learning_rate": 2.2798229831796313e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 218.0, | |
| "epoch": 0.041811846689895474, | |
| "grad_norm": 3.456895589828491, | |
| "kl": 0.49658203125, | |
| "learning_rate": 2.2655803130197816e-06, | |
| "loss": 0.2115, | |
| "reward": 0.08125000167638063, | |
| "reward_std": 0.026933757588267326, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 146.625, | |
| "epoch": 0.04195553001185388, | |
| "grad_norm": 0.15112897753715515, | |
| "kl": 0.5068359375, | |
| "learning_rate": 2.2513579681398034e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 200.6875, | |
| "epoch": 0.04209921333381228, | |
| "grad_norm": 0.16546125710010529, | |
| "kl": 0.50732421875, | |
| "learning_rate": 2.237156545280803e-06, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 241.375, | |
| "epoch": 0.04224289665577068, | |
| "grad_norm": 0.17799150943756104, | |
| "kl": 0.49072265625, | |
| "learning_rate": 2.2229766403060403e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 212.4375, | |
| "epoch": 0.042386579977729086, | |
| "grad_norm": 3.0936059951782227, | |
| "kl": 0.5068359375, | |
| "learning_rate": 2.2088188481759305e-06, | |
| "loss": 0.0534, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 197.5625, | |
| "epoch": 0.04253026329968749, | |
| "grad_norm": 1.9481518268585205, | |
| "kl": 0.478515625, | |
| "learning_rate": 2.194683762923073e-06, | |
| "loss": -0.0562, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 226.0, | |
| "epoch": 0.04267394662164589, | |
| "grad_norm": 2.9069528579711914, | |
| "kl": 0.64111328125, | |
| "learning_rate": 2.1805719776273387e-06, | |
| "loss": 0.0548, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 199.1875, | |
| "epoch": 0.042817629943604295, | |
| "grad_norm": 0.11291232705116272, | |
| "kl": 0.49072265625, | |
| "learning_rate": 2.166484084390974e-06, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 195.875, | |
| "epoch": 0.0429613132655627, | |
| "grad_norm": 0.10604951530694962, | |
| "kl": 0.4345703125, | |
| "learning_rate": 2.1524206743137636e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.5625, | |
| "epoch": 0.0431049965875211, | |
| "grad_norm": 0.18865613639354706, | |
| "kl": 0.45849609375, | |
| "learning_rate": 2.1383823374682287e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 200.25, | |
| "epoch": 0.043248679909479504, | |
| "grad_norm": 0.26521536707878113, | |
| "kl": 0.43798828125, | |
| "learning_rate": 2.124369662874868e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 200.0625, | |
| "epoch": 0.043392363231437914, | |
| "grad_norm": 2.301034927368164, | |
| "kl": 0.58837890625, | |
| "learning_rate": 2.110383238477441e-06, | |
| "loss": 0.0223, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 154.1875, | |
| "epoch": 0.04353604655339632, | |
| "grad_norm": 0.2673363983631134, | |
| "kl": 0.54736328125, | |
| "learning_rate": 2.096423651118305e-06, | |
| "loss": 0.0054, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 263.875, | |
| "epoch": 0.04367972987535472, | |
| "grad_norm": 0.11367050558328629, | |
| "kl": 0.3955078125, | |
| "learning_rate": 2.082491486513788e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 210.9375, | |
| "epoch": 0.04382341319731312, | |
| "grad_norm": 0.1046769991517067, | |
| "kl": 0.47021484375, | |
| "learning_rate": 2.0685873292296116e-06, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 215.125, | |
| "epoch": 0.043967096519271526, | |
| "grad_norm": 0.15870000422000885, | |
| "kl": 0.5517578125, | |
| "learning_rate": 2.054711762656369e-06, | |
| "loss": 0.0052, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 162.4375, | |
| "epoch": 0.04411077984122993, | |
| "grad_norm": 0.12202607095241547, | |
| "kl": 0.4443359375, | |
| "learning_rate": 2.040865368985044e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 282.8125, | |
| "epoch": 0.04425446316318833, | |
| "grad_norm": 0.5133360624313354, | |
| "kl": 0.4326171875, | |
| "learning_rate": 2.027048729182583e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 225.75, | |
| "epoch": 0.044398146485146735, | |
| "grad_norm": 0.08721781522035599, | |
| "kl": 0.3935546875, | |
| "learning_rate": 2.0132624229675205e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.3125, | |
| "epoch": 0.04454182980710514, | |
| "grad_norm": 0.13983947038650513, | |
| "kl": 0.52587890625, | |
| "learning_rate": 1.9995070287856546e-06, | |
| "loss": 0.0055, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 213.625, | |
| "epoch": 0.04468551312906354, | |
| "grad_norm": 0.7404407858848572, | |
| "kl": 0.45166015625, | |
| "learning_rate": 1.985783123785774e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 241.125, | |
| "epoch": 0.04482919645102195, | |
| "grad_norm": 0.11150796711444855, | |
| "kl": 0.466796875, | |
| "learning_rate": 1.9720912837954486e-06, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 247.25, | |
| "epoch": 0.044972879772980354, | |
| "grad_norm": 0.09887633472681046, | |
| "kl": 0.44921875, | |
| "learning_rate": 1.958432083296862e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 228.1875, | |
| "epoch": 0.04511656309493876, | |
| "grad_norm": 0.08166426420211792, | |
| "kl": 0.41015625, | |
| "learning_rate": 1.9448060954027093e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 295.5, | |
| "epoch": 0.04526024641689716, | |
| "grad_norm": 0.08792513608932495, | |
| "kl": 0.40380859375, | |
| "learning_rate": 1.931213891832153e-06, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 286.5625, | |
| "epoch": 0.04540392973885556, | |
| "grad_norm": 3.032651662826538, | |
| "kl": 0.4560546875, | |
| "learning_rate": 1.9176560428868336e-06, | |
| "loss": -0.0239, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 296.4375, | |
| "epoch": 0.045547613060813966, | |
| "grad_norm": 0.11587885767221451, | |
| "kl": 0.41650390625, | |
| "learning_rate": 1.9041331174269373e-06, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 387.3125, | |
| "epoch": 0.04569129638277237, | |
| "grad_norm": 0.8805405497550964, | |
| "kl": 0.43701171875, | |
| "learning_rate": 1.8906456828473341e-06, | |
| "loss": 0.0609, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 283.75, | |
| "epoch": 0.04583497970473077, | |
| "grad_norm": 0.08596353977918625, | |
| "kl": 0.43212890625, | |
| "learning_rate": 1.8771943050537656e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 299.375, | |
| "epoch": 0.045978663026689175, | |
| "grad_norm": 1.9583159685134888, | |
| "kl": 0.4990234375, | |
| "learning_rate": 1.8637795484391046e-06, | |
| "loss": 0.06, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 271.3125, | |
| "epoch": 0.04612234634864758, | |
| "grad_norm": 0.15285852551460266, | |
| "kl": 0.4501953125, | |
| "learning_rate": 1.8504019758596698e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 313.6875, | |
| "epoch": 0.04626602967060599, | |
| "grad_norm": 0.10856521874666214, | |
| "kl": 0.44287109375, | |
| "learning_rate": 1.8370621486116163e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 291.8125, | |
| "epoch": 0.04640971299256439, | |
| "grad_norm": 2.9064958095550537, | |
| "kl": 0.51025390625, | |
| "learning_rate": 1.823760626407377e-06, | |
| "loss": -0.0198, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 304.0, | |
| "epoch": 0.046553396314522794, | |
| "grad_norm": 0.0989953801035881, | |
| "kl": 0.41796875, | |
| "learning_rate": 1.8104979673521838e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 330.75, | |
| "epoch": 0.0466970796364812, | |
| "grad_norm": 1.5450448989868164, | |
| "kl": 0.5390625, | |
| "learning_rate": 1.7972747279206482e-06, | |
| "loss": 0.0276, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 320.3125, | |
| "epoch": 0.0468407629584396, | |
| "grad_norm": 0.07848266512155533, | |
| "kl": 0.359375, | |
| "learning_rate": 1.7840914629334122e-06, | |
| "loss": 0.0035, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 326 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 350.75, | |
| "epoch": 0.046984446280398, | |
| "grad_norm": 0.11079081147909164, | |
| "kl": 0.4228515625, | |
| "learning_rate": 1.7709487255338731e-06, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 327 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 268.5625, | |
| "epoch": 0.047128129602356406, | |
| "grad_norm": 0.08232247084379196, | |
| "kl": 0.43603515625, | |
| "learning_rate": 1.7578470671649684e-06, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 350.3125, | |
| "epoch": 0.04727181292431481, | |
| "grad_norm": 0.09194315969944, | |
| "kl": 0.40869140625, | |
| "learning_rate": 1.744787037546045e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 329 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 319.125, | |
| "epoch": 0.04741549624627321, | |
| "grad_norm": 0.08984750509262085, | |
| "kl": 0.49169921875, | |
| "learning_rate": 1.731769184649788e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 254.875, | |
| "epoch": 0.047559179568231615, | |
| "grad_norm": 0.09713928401470184, | |
| "kl": 0.484375, | |
| "learning_rate": 1.7187940546792325e-06, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 331 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 305.0625, | |
| "epoch": 0.047702862890190025, | |
| "grad_norm": 0.11803556233644485, | |
| "kl": 0.435546875, | |
| "learning_rate": 1.7058621920448465e-06, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 332 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 279.6875, | |
| "epoch": 0.04784654621214843, | |
| "grad_norm": 2.062950849533081, | |
| "kl": 0.501953125, | |
| "learning_rate": 1.6929741393416855e-06, | |
| "loss": 0.0285, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 327.8125, | |
| "epoch": 0.04799022953410683, | |
| "grad_norm": 3.389031410217285, | |
| "kl": 0.4765625, | |
| "learning_rate": 1.6801304373266286e-06, | |
| "loss": -0.0063, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 334 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 343.5, | |
| "epoch": 0.048133912856065234, | |
| "grad_norm": 0.17832499742507935, | |
| "kl": 0.54443359375, | |
| "learning_rate": 1.667331624895689e-06, | |
| "loss": 0.0053, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 414.0, | |
| "epoch": 0.048277596178023637, | |
| "grad_norm": 1.3559800386428833, | |
| "kl": 0.4990234375, | |
| "learning_rate": 1.6545782390614037e-06, | |
| "loss": 0.0622, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 345.375, | |
| "epoch": 0.04842127949998204, | |
| "grad_norm": 0.11337132751941681, | |
| "kl": 0.45654296875, | |
| "learning_rate": 1.6418708149302992e-06, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 337 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 347.9375, | |
| "epoch": 0.04856496282194044, | |
| "grad_norm": 1.1885749101638794, | |
| "kl": 0.3779296875, | |
| "learning_rate": 1.6292098856804423e-06, | |
| "loss": 0.1019, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 421.4375, | |
| "epoch": 0.048708646143898846, | |
| "grad_norm": 0.9923747181892395, | |
| "kl": 0.53564453125, | |
| "learning_rate": 1.6165959825390661e-06, | |
| "loss": -0.0729, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 339 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 298.3125, | |
| "epoch": 0.04885232946585725, | |
| "grad_norm": 0.11337187141180038, | |
| "kl": 0.50927734375, | |
| "learning_rate": 1.604029634760284e-06, | |
| "loss": 0.0055, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 306.3125, | |
| "epoch": 0.04899601278781565, | |
| "grad_norm": 0.1039213165640831, | |
| "kl": 0.46630859375, | |
| "learning_rate": 1.59151136960288e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 341 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 281.1875, | |
| "epoch": 0.04913969610977406, | |
| "grad_norm": 0.08466682583093643, | |
| "kl": 0.4306640625, | |
| "learning_rate": 1.5790417123081903e-06, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 342 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 386.6875, | |
| "epoch": 0.049283379431732464, | |
| "grad_norm": 0.10602067410945892, | |
| "kl": 0.58203125, | |
| "learning_rate": 1.5666211860780583e-06, | |
| "loss": 0.006, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 343 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 296.5, | |
| "epoch": 0.04942706275369087, | |
| "grad_norm": 0.06954965740442276, | |
| "kl": 0.435546875, | |
| "learning_rate": 1.5542503120528918e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 344 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 333.6875, | |
| "epoch": 0.04957074607564927, | |
| "grad_norm": 0.08143350481987, | |
| "kl": 0.53125, | |
| "learning_rate": 1.5419296092897866e-06, | |
| "loss": 0.0053, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 280.6875, | |
| "epoch": 0.04971442939760767, | |
| "grad_norm": 0.11366530507802963, | |
| "kl": 0.42529296875, | |
| "learning_rate": 1.529659594740755e-06, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 346 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 308.5, | |
| "epoch": 0.049858112719566076, | |
| "grad_norm": 0.08413314819335938, | |
| "kl": 0.43701171875, | |
| "learning_rate": 1.5174407832310338e-06, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 361.1875, | |
| "epoch": 0.05000179604152448, | |
| "grad_norm": 1.0981367826461792, | |
| "kl": 0.40869140625, | |
| "learning_rate": 1.5052736874374815e-06, | |
| "loss": 0.0653, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 348 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 333.9375, | |
| "epoch": 0.05014547936348288, | |
| "grad_norm": 0.09738067537546158, | |
| "kl": 0.38671875, | |
| "learning_rate": 1.4931588178670695e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 349 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 289.3125, | |
| "epoch": 0.050289162685441285, | |
| "grad_norm": 0.06618193536996841, | |
| "kl": 0.36474609375, | |
| "learning_rate": 1.4810966828354605e-06, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 462.375, | |
| "epoch": 0.05043284600739969, | |
| "grad_norm": 2.784261703491211, | |
| "kl": 0.47021484375, | |
| "learning_rate": 1.469087788445684e-06, | |
| "loss": 0.0589, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 351 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 352.25, | |
| "epoch": 0.05057652932935809, | |
| "grad_norm": 0.08649200946092606, | |
| "kl": 0.46240234375, | |
| "learning_rate": 1.4571326385668965e-06, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 279.0625, | |
| "epoch": 0.0507202126513165, | |
| "grad_norm": 0.09761747717857361, | |
| "kl": 0.431640625, | |
| "learning_rate": 1.4452317348132434e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 292.5625, | |
| "epoch": 0.050863895973274904, | |
| "grad_norm": 0.12047071009874344, | |
| "kl": 0.47900390625, | |
| "learning_rate": 1.4333855765228104e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 354 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 385.75, | |
| "epoch": 0.05100757929523331, | |
| "grad_norm": 1.935134768486023, | |
| "kl": 0.666015625, | |
| "learning_rate": 1.421594660736675e-06, | |
| "loss": 0.1214, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 388.5625, | |
| "epoch": 0.05115126261719171, | |
| "grad_norm": 0.0700010433793068, | |
| "kl": 0.39013671875, | |
| "learning_rate": 1.4098594821780476e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 356 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 338.125, | |
| "epoch": 0.05129494593915011, | |
| "grad_norm": 0.07236258685588837, | |
| "kl": 0.35693359375, | |
| "learning_rate": 1.3981805332315174e-06, | |
| "loss": 0.0035, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 357 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 375.8125, | |
| "epoch": 0.051438629261108516, | |
| "grad_norm": 0.10169550776481628, | |
| "kl": 0.48779296875, | |
| "learning_rate": 1.3865583039223929e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 341.4375, | |
| "epoch": 0.05158231258306692, | |
| "grad_norm": 4.062565803527832, | |
| "kl": 0.7236328125, | |
| "learning_rate": 1.374993281896137e-06, | |
| "loss": 0.0212, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 359 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 464.3125, | |
| "epoch": 0.05172599590502532, | |
| "grad_norm": 0.11676806211471558, | |
| "kl": 0.5576171875, | |
| "learning_rate": 1.3634859523979134e-06, | |
| "loss": 0.0054, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 390.6875, | |
| "epoch": 0.051869679226983725, | |
| "grad_norm": 1.4354273080825806, | |
| "kl": 0.486328125, | |
| "learning_rate": 1.3520367982522208e-06, | |
| "loss": 0.0869, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 361 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 388.8125, | |
| "epoch": 0.05201336254894213, | |
| "grad_norm": 0.11781425029039383, | |
| "kl": 0.41259765625, | |
| "learning_rate": 1.3406462998426358e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 362 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 352.375, | |
| "epoch": 0.05215704587090054, | |
| "grad_norm": 0.1065189316868782, | |
| "kl": 0.4375, | |
| "learning_rate": 1.3293149350916595e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 363 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 358.3125, | |
| "epoch": 0.05230072919285894, | |
| "grad_norm": 0.08605165779590607, | |
| "kl": 0.40380859375, | |
| "learning_rate": 1.3180431794406623e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 344.75, | |
| "epoch": 0.052444412514817344, | |
| "grad_norm": 5.735403537750244, | |
| "kl": 0.64013671875, | |
| "learning_rate": 1.3068315058299358e-06, | |
| "loss": 0.1397, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 350.25, | |
| "epoch": 0.05258809583677575, | |
| "grad_norm": 1.2408664226531982, | |
| "kl": 0.40576171875, | |
| "learning_rate": 1.2956803846788503e-06, | |
| "loss": -0.0358, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 372.125, | |
| "epoch": 0.05273177915873415, | |
| "grad_norm": 0.08045981079339981, | |
| "kl": 0.451171875, | |
| "learning_rate": 1.284590283866116e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 367 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 319.5625, | |
| "epoch": 0.05287546248069255, | |
| "grad_norm": 0.08284196257591248, | |
| "kl": 0.416015625, | |
| "learning_rate": 1.2735616687101518e-06, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 361.0, | |
| "epoch": 0.053019145802650956, | |
| "grad_norm": 0.08355327695608139, | |
| "kl": 0.4482421875, | |
| "learning_rate": 1.2625950019495614e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 369 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 403.875, | |
| "epoch": 0.05316282912460936, | |
| "grad_norm": 0.08809319883584976, | |
| "kl": 0.44873046875, | |
| "learning_rate": 1.251690743723718e-06, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 318.75, | |
| "epoch": 0.05330651244656776, | |
| "grad_norm": 1.7776198387145996, | |
| "kl": 0.384765625, | |
| "learning_rate": 1.2408493515534581e-06, | |
| "loss": 0.0241, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 371 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 375.4375, | |
| "epoch": 0.053450195768526165, | |
| "grad_norm": 0.09417347609996796, | |
| "kl": 0.51171875, | |
| "learning_rate": 1.2300712803218834e-06, | |
| "loss": 0.0051, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 372 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 534.25, | |
| "epoch": 0.053593879090484575, | |
| "grad_norm": 1.3220620155334473, | |
| "kl": 0.447265625, | |
| "learning_rate": 1.2193569822552772e-06, | |
| "loss": 0.0913, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 373 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 392.0625, | |
| "epoch": 0.05373756241244298, | |
| "grad_norm": 1.8771501779556274, | |
| "kl": 0.47216796875, | |
| "learning_rate": 1.2087069069041268e-06, | |
| "loss": 0.0733, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 374 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 370.0, | |
| "epoch": 0.05388124573440138, | |
| "grad_norm": 0.08563094586133957, | |
| "kl": 0.3857421875, | |
| "learning_rate": 1.1981215011242654e-06, | |
| "loss": 0.0038, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 433.3125, | |
| "epoch": 0.054024929056359784, | |
| "grad_norm": 1.363516926765442, | |
| "kl": 0.43994140625, | |
| "learning_rate": 1.1876012090581184e-06, | |
| "loss": 0.064, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 376 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 417.9375, | |
| "epoch": 0.05416861237831819, | |
| "grad_norm": 1.752987027168274, | |
| "kl": 0.404296875, | |
| "learning_rate": 1.177146472116071e-06, | |
| "loss": 0.1186, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 377 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 360.0625, | |
| "epoch": 0.05431229570027659, | |
| "grad_norm": 0.08021112531423569, | |
| "kl": 0.4033203125, | |
| "learning_rate": 1.1667577289579462e-06, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 312.1875, | |
| "epoch": 0.05445597902223499, | |
| "grad_norm": 0.12521375715732574, | |
| "kl": 0.4931640625, | |
| "learning_rate": 1.1564354154746007e-06, | |
| "loss": 0.0051, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 321.8125, | |
| "epoch": 0.054599662344193396, | |
| "grad_norm": 0.09579505026340485, | |
| "kl": 0.4921875, | |
| "learning_rate": 1.146179964769635e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 474.5, | |
| "epoch": 0.0547433456661518, | |
| "grad_norm": 0.0810822919011116, | |
| "kl": 0.44970703125, | |
| "learning_rate": 1.1359918071412195e-06, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 397.8125, | |
| "epoch": 0.0548870289881102, | |
| "grad_norm": 1.5699117183685303, | |
| "kl": 0.4716796875, | |
| "learning_rate": 1.1258713700640456e-06, | |
| "loss": 0.0992, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 382 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 421.875, | |
| "epoch": 0.05503071231006861, | |
| "grad_norm": 0.06795567274093628, | |
| "kl": 0.47998046875, | |
| "learning_rate": 1.115819078171383e-06, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 383 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 302.0, | |
| "epoch": 0.055174395632027015, | |
| "grad_norm": 2.255272150039673, | |
| "kl": 0.5107421875, | |
| "learning_rate": 1.1058353532372667e-06, | |
| "loss": 0.0632, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 323.25, | |
| "epoch": 0.05531807895398542, | |
| "grad_norm": 0.1344795525074005, | |
| "kl": 0.4765625, | |
| "learning_rate": 1.0959206141587998e-06, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 293.0, | |
| "epoch": 0.05546176227594382, | |
| "grad_norm": 2.3380484580993652, | |
| "kl": 0.49462890625, | |
| "learning_rate": 1.0860752769385766e-06, | |
| "loss": -0.0554, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 289.25, | |
| "epoch": 0.055605445597902224, | |
| "grad_norm": 2.3010873794555664, | |
| "kl": 0.41650390625, | |
| "learning_rate": 1.0762997546672279e-06, | |
| "loss": -0.0603, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 387 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 352.75, | |
| "epoch": 0.05574912891986063, | |
| "grad_norm": 0.09730658680200577, | |
| "kl": 0.46533203125, | |
| "learning_rate": 1.0665944575060914e-06, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 317.75, | |
| "epoch": 0.05589281224181903, | |
| "grad_norm": 0.08599717915058136, | |
| "kl": 0.40087890625, | |
| "learning_rate": 1.056959792669997e-06, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 301.9375, | |
| "epoch": 0.05603649556377743, | |
| "grad_norm": 0.08673281967639923, | |
| "kl": 0.36865234375, | |
| "learning_rate": 1.0473961644101856e-06, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 275.75, | |
| "epoch": 0.056180178885735836, | |
| "grad_norm": 0.08435692638158798, | |
| "kl": 0.4267578125, | |
| "learning_rate": 1.037903973997345e-06, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 391 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 416.0, | |
| "epoch": 0.05632386220769424, | |
| "grad_norm": 0.10442197322845459, | |
| "kl": 0.447265625, | |
| "learning_rate": 1.0284836197047737e-06, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 270.0625, | |
| "epoch": 0.05646754552965264, | |
| "grad_norm": 0.1144208163022995, | |
| "kl": 0.4169921875, | |
| "learning_rate": 1.0191354967916712e-06, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 393 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 413.375, | |
| "epoch": 0.05661122885161105, | |
| "grad_norm": 1.8321337699890137, | |
| "kl": 0.46142578125, | |
| "learning_rate": 1.0098599974865515e-06, | |
| "loss": 0.1531, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 394 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 283.375, | |
| "epoch": 0.056754912173569455, | |
| "grad_norm": 0.07575566321611404, | |
| "kl": 0.44384765625, | |
| "learning_rate": 1.0006575109707898e-06, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 300.25, | |
| "epoch": 0.05689859549552786, | |
| "grad_norm": 0.06213975325226784, | |
| "kl": 0.4384765625, | |
| "learning_rate": 9.915284233622877e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 300.25, | |
| "epoch": 0.05704227881748626, | |
| "grad_norm": 0.09239588677883148, | |
| "kl": 0.42138671875, | |
| "learning_rate": 9.824731176992796e-07, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 300.6875, | |
| "epoch": 0.057185962139444664, | |
| "grad_norm": 0.21190443634986877, | |
| "kl": 0.396484375, | |
| "learning_rate": 9.734919739242543e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 248.0, | |
| "epoch": 0.05732964546140307, | |
| "grad_norm": 0.1755290925502777, | |
| "kl": 0.43212890625, | |
| "learning_rate": 9.645853688680177e-07, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 399 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 331.6875, | |
| "epoch": 0.05747332878336147, | |
| "grad_norm": 1.920776128768921, | |
| "kl": 0.4873046875, | |
| "learning_rate": 9.557536762338786e-07, | |
| "loss": 0.1366, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 341.125, | |
| "epoch": 0.05761701210531987, | |
| "grad_norm": 0.12242202460765839, | |
| "kl": 0.44580078125, | |
| "learning_rate": 9.46997266581973e-07, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 401 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 294.75, | |
| "epoch": 0.057760695427278276, | |
| "grad_norm": 0.07106681913137436, | |
| "kl": 0.3984375, | |
| "learning_rate": 9.383165073137115e-07, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 402 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 328.9375, | |
| "epoch": 0.05790437874923668, | |
| "grad_norm": 0.10068362206220627, | |
| "kl": 0.431640625, | |
| "learning_rate": 9.297117626563687e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 403 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 304.0625, | |
| "epoch": 0.05804806207119509, | |
| "grad_norm": 0.20983238518238068, | |
| "kl": 0.4404296875, | |
| "learning_rate": 9.211833936477957e-07, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 404 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 309.0625, | |
| "epoch": 0.05819174539315349, | |
| "grad_norm": 0.14584408700466156, | |
| "kl": 0.4462890625, | |
| "learning_rate": 9.127317581212753e-07, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 359.4375, | |
| "epoch": 0.058335428715111895, | |
| "grad_norm": 0.0764172375202179, | |
| "kl": 0.41796875, | |
| "learning_rate": 9.043572106905084e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 406 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 380.6875, | |
| "epoch": 0.0584791120370703, | |
| "grad_norm": 0.0757213830947876, | |
| "kl": 0.4384765625, | |
| "learning_rate": 8.960601027347321e-07, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 407 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 306.8125, | |
| "epoch": 0.0586227953590287, | |
| "grad_norm": 0.0817841961979866, | |
| "kl": 0.42529296875, | |
| "learning_rate": 8.878407823839788e-07, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 408 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 248.0, | |
| "epoch": 0.058766478680987104, | |
| "grad_norm": 2.3475921154022217, | |
| "kl": 0.4404296875, | |
| "learning_rate": 8.796995945044689e-07, | |
| "loss": 0.038, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 409 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 327.25, | |
| "epoch": 0.05891016200294551, | |
| "grad_norm": 0.11481396108865738, | |
| "kl": 0.4560546875, | |
| "learning_rate": 8.716368806841405e-07, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 327.0625, | |
| "epoch": 0.05905384532490391, | |
| "grad_norm": 0.11284112930297852, | |
| "kl": 0.36279296875, | |
| "learning_rate": 8.636529792183171e-07, | |
| "loss": 0.0036, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 411 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 387.0625, | |
| "epoch": 0.05919752864686231, | |
| "grad_norm": 1.5390390157699585, | |
| "kl": 0.48486328125, | |
| "learning_rate": 8.557482250955144e-07, | |
| "loss": 0.0038, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 412 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 320.4375, | |
| "epoch": 0.059341211968820716, | |
| "grad_norm": 0.12584422528743744, | |
| "kl": 0.45556640625, | |
| "learning_rate": 8.479229499833844e-07, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 413 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 310.3125, | |
| "epoch": 0.059484895290779126, | |
| "grad_norm": 0.06749071925878525, | |
| "kl": 0.4423828125, | |
| "learning_rate": 8.401774822147976e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 414 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 418.6875, | |
| "epoch": 0.05962857861273753, | |
| "grad_norm": 0.0747298002243042, | |
| "kl": 0.501953125, | |
| "learning_rate": 8.325121467740695e-07, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 310.6875, | |
| "epoch": 0.05977226193469593, | |
| "grad_norm": 1.954253911972046, | |
| "kl": 0.3984375, | |
| "learning_rate": 8.249272652833226e-07, | |
| "loss": -0.0782, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 442.5625, | |
| "epoch": 0.059915945256654335, | |
| "grad_norm": 0.09026701003313065, | |
| "kl": 0.49267578125, | |
| "learning_rate": 8.174231559889931e-07, | |
| "loss": 0.0052, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 417 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 264.4375, | |
| "epoch": 0.06005962857861274, | |
| "grad_norm": 0.10345678776502609, | |
| "kl": 0.45751953125, | |
| "learning_rate": 8.100001337484787e-07, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 418 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 311.375, | |
| "epoch": 0.06020331190057114, | |
| "grad_norm": 0.07524294406175613, | |
| "kl": 0.38232421875, | |
| "learning_rate": 8.026585100169251e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 419 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 299.375, | |
| "epoch": 0.060346995222529544, | |
| "grad_norm": 0.08929236233234406, | |
| "kl": 0.37255859375, | |
| "learning_rate": 7.953985928341601e-07, | |
| "loss": 0.0035, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 291.8125, | |
| "epoch": 0.06049067854448795, | |
| "grad_norm": 0.08844827115535736, | |
| "kl": 0.38427734375, | |
| "learning_rate": 7.882206868117693e-07, | |
| "loss": 0.0038, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 421 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 388.0625, | |
| "epoch": 0.06063436186644635, | |
| "grad_norm": 0.08420619368553162, | |
| "kl": 0.3828125, | |
| "learning_rate": 7.81125093120313e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 422 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 364.3125, | |
| "epoch": 0.06077804518840475, | |
| "grad_norm": 0.05994531139731407, | |
| "kl": 0.39013671875, | |
| "learning_rate": 7.741121094766916e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 423 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 346.9375, | |
| "epoch": 0.06092172851036316, | |
| "grad_norm": 169.9329376220703, | |
| "kl": 15.16455078125, | |
| "learning_rate": 7.671820301316532e-07, | |
| "loss": 0.0798, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 424 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 301.875, | |
| "epoch": 0.061065411832321566, | |
| "grad_norm": 0.08597877621650696, | |
| "kl": 0.4296875, | |
| "learning_rate": 7.603351458574474e-07, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 360.625, | |
| "epoch": 0.06120909515427997, | |
| "grad_norm": 0.08660821616649628, | |
| "kl": 0.4404296875, | |
| "learning_rate": 7.535717439356255e-07, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 426 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 347.375, | |
| "epoch": 0.06135277847623837, | |
| "grad_norm": 0.09224473685026169, | |
| "kl": 0.45654296875, | |
| "learning_rate": 7.46892108144986e-07, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 427 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 342.3125, | |
| "epoch": 0.061496461798196775, | |
| "grad_norm": 0.11141007393598557, | |
| "kl": 0.42578125, | |
| "learning_rate": 7.402965187496697e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 340.5, | |
| "epoch": 0.06164014512015518, | |
| "grad_norm": 2.9540467262268066, | |
| "kl": 0.42919921875, | |
| "learning_rate": 7.337852524873974e-07, | |
| "loss": 0.2308, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 429 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 370.4375, | |
| "epoch": 0.06178382844211358, | |
| "grad_norm": 0.10491559654474258, | |
| "kl": 0.3974609375, | |
| "learning_rate": 7.273585825578608e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 334.8125, | |
| "epoch": 0.061927511764071984, | |
| "grad_norm": 2.0611469745635986, | |
| "kl": 0.48681640625, | |
| "learning_rate": 7.21016778611259e-07, | |
| "loss": 0.0687, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 431 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 346.1875, | |
| "epoch": 0.06207119508603039, | |
| "grad_norm": 1.7300693988800049, | |
| "kl": 0.40234375, | |
| "learning_rate": 7.147601067369835e-07, | |
| "loss": 0.1007, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 326.25, | |
| "epoch": 0.06221487840798879, | |
| "grad_norm": 1.881553053855896, | |
| "kl": 0.3720703125, | |
| "learning_rate": 7.085888294524561e-07, | |
| "loss": 0.0915, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 331.625, | |
| "epoch": 0.0623585617299472, | |
| "grad_norm": 0.08463376760482788, | |
| "kl": 0.431640625, | |
| "learning_rate": 7.025032056921117e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 280.25, | |
| "epoch": 0.0625022450519056, | |
| "grad_norm": 0.08293016254901886, | |
| "kl": 0.43505859375, | |
| "learning_rate": 6.965034907965349e-07, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 362.3125, | |
| "epoch": 0.062645928373864, | |
| "grad_norm": 0.13934151828289032, | |
| "kl": 0.45068359375, | |
| "learning_rate": 6.905899365017462e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 235.0, | |
| "epoch": 0.06278961169582241, | |
| "grad_norm": 2.300443649291992, | |
| "kl": 0.53857421875, | |
| "learning_rate": 6.847627909286409e-07, | |
| "loss": -0.0633, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 437 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 321.6875, | |
| "epoch": 0.0629332950177808, | |
| "grad_norm": 2.531771183013916, | |
| "kl": 0.43359375, | |
| "learning_rate": 6.790222985725761e-07, | |
| "loss": 0.1378, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 317.125, | |
| "epoch": 0.06307697833973921, | |
| "grad_norm": 0.17811354994773865, | |
| "kl": 0.45751953125, | |
| "learning_rate": 6.733687002931141e-07, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 386.5, | |
| "epoch": 0.06322066166169762, | |
| "grad_norm": 0.0891660824418068, | |
| "kl": 0.38916015625, | |
| "learning_rate": 6.678022333039158e-07, | |
| "loss": 0.0038, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 354.5625, | |
| "epoch": 0.06336434498365602, | |
| "grad_norm": 1.3903534412384033, | |
| "kl": 0.38818359375, | |
| "learning_rate": 6.623231311627876e-07, | |
| "loss": 0.0717, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 441 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 333.625, | |
| "epoch": 0.06350802830561443, | |
| "grad_norm": 0.08387104421854019, | |
| "kl": 0.47412109375, | |
| "learning_rate": 6.569316237618811e-07, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 442 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 329.75, | |
| "epoch": 0.06365171162757283, | |
| "grad_norm": 0.10497360676527023, | |
| "kl": 0.46826171875, | |
| "learning_rate": 6.516279373180499e-07, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 443 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 349.125, | |
| "epoch": 0.06379539494953124, | |
| "grad_norm": 0.1853691041469574, | |
| "kl": 0.48046875, | |
| "learning_rate": 6.464122943633543e-07, | |
| "loss": 0.0047, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 348.5, | |
| "epoch": 0.06393907827148963, | |
| "grad_norm": 1.2641898393630981, | |
| "kl": 0.4384765625, | |
| "learning_rate": 6.412849137357271e-07, | |
| "loss": -0.041, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 376.9375, | |
| "epoch": 0.06408276159344804, | |
| "grad_norm": 2.004131317138672, | |
| "kl": 0.48583984375, | |
| "learning_rate": 6.3624601056979e-07, | |
| "loss": -0.0629, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 392.8125, | |
| "epoch": 0.06422644491540644, | |
| "grad_norm": 3.0901613235473633, | |
| "kl": 0.44677734375, | |
| "learning_rate": 6.312957962878278e-07, | |
| "loss": 0.1556, | |
| "reward": 0.08125000260770321, | |
| "reward_std": 0.037500000558793545, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.8125, | |
| "step": 447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 356.1875, | |
| "epoch": 0.06437012823736485, | |
| "grad_norm": 0.08546911180019379, | |
| "kl": 0.40185546875, | |
| "learning_rate": 6.264344785909181e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 262.875, | |
| "epoch": 0.06451381155932324, | |
| "grad_norm": 0.12542271614074707, | |
| "kl": 0.40625, | |
| "learning_rate": 6.216622614502149e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 449 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 446.1875, | |
| "epoch": 0.06465749488128165, | |
| "grad_norm": 0.882746696472168, | |
| "kl": 0.4072265625, | |
| "learning_rate": 6.169793450983916e-07, | |
| "loss": 0.0326, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 243.6875, | |
| "epoch": 0.06480117820324006, | |
| "grad_norm": 0.08595617115497589, | |
| "kl": 0.38916015625, | |
| "learning_rate": 6.123859260212393e-07, | |
| "loss": 0.0038, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 451 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 320.625, | |
| "epoch": 0.06494486152519846, | |
| "grad_norm": 0.0691758245229721, | |
| "kl": 0.40380859375, | |
| "learning_rate": 6.07882196949423e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 452 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 460.0625, | |
| "epoch": 0.06508854484715687, | |
| "grad_norm": 0.06517741084098816, | |
| "kl": 0.37109375, | |
| "learning_rate": 6.034683468503948e-07, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 338.625, | |
| "epoch": 0.06523222816911527, | |
| "grad_norm": 0.1255577653646469, | |
| "kl": 0.52587890625, | |
| "learning_rate": 5.991445609204641e-07, | |
| "loss": 0.0053, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 454 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 294.1875, | |
| "epoch": 0.06537591149107368, | |
| "grad_norm": 0.2892981767654419, | |
| "kl": 0.421875, | |
| "learning_rate": 5.949110205770292e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 290.625, | |
| "epoch": 0.06551959481303207, | |
| "grad_norm": 0.08248059451580048, | |
| "kl": 0.4111328125, | |
| "learning_rate": 5.90767903450964e-07, | |
| "loss": 0.0043, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 456 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 316.8125, | |
| "epoch": 0.06566327813499048, | |
| "grad_norm": 0.06285588443279266, | |
| "kl": 0.40771484375, | |
| "learning_rate": 5.867153833791652e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 457 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 407.375, | |
| "epoch": 0.06580696145694888, | |
| "grad_norm": 1.4286978244781494, | |
| "kl": 0.41552734375, | |
| "learning_rate": 5.827536303972587e-07, | |
| "loss": 0.099, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 458 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 293.9375, | |
| "epoch": 0.06595064477890729, | |
| "grad_norm": 0.07036440074443817, | |
| "kl": 0.39599609375, | |
| "learning_rate": 5.78882810732465e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 459 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 346.875, | |
| "epoch": 0.0660943281008657, | |
| "grad_norm": 0.15339231491088867, | |
| "kl": 0.416015625, | |
| "learning_rate": 5.75103086796625e-07, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 375.5625, | |
| "epoch": 0.0662380114228241, | |
| "grad_norm": 0.07655762881040573, | |
| "kl": 0.38818359375, | |
| "learning_rate": 5.714146171793846e-07, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 326.375, | |
| "epoch": 0.0663816947447825, | |
| "grad_norm": 0.12055181711912155, | |
| "kl": 0.53515625, | |
| "learning_rate": 5.678175566415422e-07, | |
| "loss": 0.0056, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 314.1875, | |
| "epoch": 0.0665253780667409, | |
| "grad_norm": 0.0709441527724266, | |
| "kl": 0.4140625, | |
| "learning_rate": 5.643120561085528e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 463 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 273.25, | |
| "epoch": 0.06666906138869931, | |
| "grad_norm": 0.10049540549516678, | |
| "kl": 0.48583984375, | |
| "learning_rate": 5.608982626641991e-07, | |
| "loss": 0.0049, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 336.9375, | |
| "epoch": 0.0668127447106577, | |
| "grad_norm": 0.18985526263713837, | |
| "kl": 0.4140625, | |
| "learning_rate": 5.575763195444166e-07, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 312.375, | |
| "epoch": 0.06695642803261612, | |
| "grad_norm": 1.2384103536605835, | |
| "kl": 0.4248046875, | |
| "learning_rate": 5.543463661312847e-07, | |
| "loss": -0.0351, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 466 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 367.9375, | |
| "epoch": 0.06710011135457451, | |
| "grad_norm": 0.08845322579145432, | |
| "kl": 0.4287109375, | |
| "learning_rate": 5.512085379471808e-07, | |
| "loss": 0.0046, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 403.25, | |
| "epoch": 0.06724379467653292, | |
| "grad_norm": 1.3062446117401123, | |
| "kl": 0.412109375, | |
| "learning_rate": 5.481629666490903e-07, | |
| "loss": 0.0713, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 468 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 365.1875, | |
| "epoch": 0.06738747799849132, | |
| "grad_norm": 0.07498609274625778, | |
| "kl": 0.3583984375, | |
| "learning_rate": 5.452097800230853e-07, | |
| "loss": 0.0035, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 469 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 347.8125, | |
| "epoch": 0.06753116132044973, | |
| "grad_norm": 1.3313813209533691, | |
| "kl": 0.462890625, | |
| "learning_rate": 5.423491019789623e-07, | |
| "loss": -0.0492, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 315.9375, | |
| "epoch": 0.06767484464240814, | |
| "grad_norm": 0.08418026566505432, | |
| "kl": 0.44775390625, | |
| "learning_rate": 5.395810525450425e-07, | |
| "loss": 0.0045, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 471 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 328.375, | |
| "epoch": 0.06781852796436653, | |
| "grad_norm": 0.065086729824543, | |
| "kl": 0.34716796875, | |
| "learning_rate": 5.369057478631359e-07, | |
| "loss": 0.0034, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 472 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 375.5625, | |
| "epoch": 0.06796221128632494, | |
| "grad_norm": 1.40998113155365, | |
| "kl": 0.39208984375, | |
| "learning_rate": 5.343233001836694e-07, | |
| "loss": 0.1839, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 283.4375, | |
| "epoch": 0.06810589460828334, | |
| "grad_norm": 0.18871350586414337, | |
| "kl": 0.42236328125, | |
| "learning_rate": 5.318338178609754e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 331.0625, | |
| "epoch": 0.06824957793024175, | |
| "grad_norm": 0.07475198060274124, | |
| "kl": 0.44384765625, | |
| "learning_rate": 5.294374053487459e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 350.0625, | |
| "epoch": 0.06839326125220015, | |
| "grad_norm": 0.08038158714771271, | |
| "kl": 0.361328125, | |
| "learning_rate": 5.271341631956511e-07, | |
| "loss": 0.0035, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 476 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 332.75, | |
| "epoch": 0.06853694457415856, | |
| "grad_norm": 1.5739789009094238, | |
| "kl": 0.46044921875, | |
| "learning_rate": 5.249241880411181e-07, | |
| "loss": -0.0099, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 477 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 401.1875, | |
| "epoch": 0.06868062789611695, | |
| "grad_norm": 0.0891866534948349, | |
| "kl": 0.38525390625, | |
| "learning_rate": 5.228075726112785e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 478 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 443.9375, | |
| "epoch": 0.06882431121807536, | |
| "grad_norm": 0.11574111878871918, | |
| "kl": 0.37548828125, | |
| "learning_rate": 5.207844057150768e-07, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 369.9375, | |
| "epoch": 0.06896799454003377, | |
| "grad_norm": 0.07223603874444962, | |
| "kl": 0.34716796875, | |
| "learning_rate": 5.188547722405437e-07, | |
| "loss": 0.0034, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 363.375, | |
| "epoch": 0.06911167786199217, | |
| "grad_norm": 0.07493572682142258, | |
| "kl": 0.4140625, | |
| "learning_rate": 5.170187531512351e-07, | |
| "loss": 0.0042, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 481 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 386.1875, | |
| "epoch": 0.06925536118395058, | |
| "grad_norm": 0.20151501893997192, | |
| "kl": 0.453125, | |
| "learning_rate": 5.152764254828348e-07, | |
| "loss": 0.005, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 482 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 262.3125, | |
| "epoch": 0.06939904450590897, | |
| "grad_norm": 0.200210303068161, | |
| "kl": 0.46435546875, | |
| "learning_rate": 5.136278623399225e-07, | |
| "loss": 0.0048, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 483 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 320.75, | |
| "epoch": 0.06954272782786738, | |
| "grad_norm": 0.08599031716585159, | |
| "kl": 0.369140625, | |
| "learning_rate": 5.120731328929058e-07, | |
| "loss": 0.0036, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 484 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 469.25, | |
| "epoch": 0.06968641114982578, | |
| "grad_norm": 1.3228610754013062, | |
| "kl": 0.390625, | |
| "learning_rate": 5.106123023751187e-07, | |
| "loss": 0.0797, | |
| "reward": 0.0875000013038516, | |
| "reward_std": 0.014433757402002811, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 443.5, | |
| "epoch": 0.06983009447178419, | |
| "grad_norm": 0.06405351310968399, | |
| "kl": 0.4345703125, | |
| "learning_rate": 5.092454320800833e-07, | |
| "loss": 0.0044, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 486 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 313.625, | |
| "epoch": 0.06997377779374259, | |
| "grad_norm": 2.284759998321533, | |
| "kl": 0.3701171875, | |
| "learning_rate": 5.079725793589405e-07, | |
| "loss": 0.0138, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 487 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 406.375, | |
| "epoch": 0.070117461115701, | |
| "grad_norm": 0.06436394900083542, | |
| "kl": 0.39208984375, | |
| "learning_rate": 5.067937976180407e-07, | |
| "loss": 0.004, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 488 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 413.125, | |
| "epoch": 0.07026114443765939, | |
| "grad_norm": 2.4305238723754883, | |
| "kl": 0.4248046875, | |
| "learning_rate": 5.057091363167046e-07, | |
| "loss": 0.0754, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 489 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 342.25, | |
| "epoch": 0.0704048277596178, | |
| "grad_norm": 0.06973189115524292, | |
| "kl": 0.3818359375, | |
| "learning_rate": 5.047186409651489e-07, | |
| "loss": 0.0036, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 367.8125, | |
| "epoch": 0.07054851108157621, | |
| "grad_norm": 1.2751497030258179, | |
| "kl": 0.38330078125, | |
| "learning_rate": 5.038223531225742e-07, | |
| "loss": 0.0632, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 491 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 378.5625, | |
| "epoch": 0.07069219440353461, | |
| "grad_norm": 2.2283360958099365, | |
| "kl": 0.40185546875, | |
| "learning_rate": 5.030203103954232e-07, | |
| "loss": 0.157, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 492 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 359.3125, | |
| "epoch": 0.07083587772549302, | |
| "grad_norm": 0.06642284989356995, | |
| "kl": 0.36181640625, | |
| "learning_rate": 5.023125464358026e-07, | |
| "loss": 0.0036, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 493 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 359.125, | |
| "epoch": 0.07097956104745141, | |
| "grad_norm": 1.7237136363983154, | |
| "kl": 0.419921875, | |
| "learning_rate": 5.016990909400709e-07, | |
| "loss": 0.1536, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 494 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 412.5625, | |
| "epoch": 0.07112324436940982, | |
| "grad_norm": 0.07103113830089569, | |
| "kl": 0.37353515625, | |
| "learning_rate": 5.011799696475915e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 427.0, | |
| "epoch": 0.07126692769136822, | |
| "grad_norm": 0.07387176156044006, | |
| "kl": 0.40576171875, | |
| "learning_rate": 5.007552043396547e-07, | |
| "loss": 0.0041, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 442.5, | |
| "epoch": 0.07141061101332663, | |
| "grad_norm": 2.5220470428466797, | |
| "kl": 0.3955078125, | |
| "learning_rate": 5.004248128385618e-07, | |
| "loss": 0.1158, | |
| "reward": 0.08750000223517418, | |
| "reward_std": 0.02500000037252903, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.875, | |
| "step": 497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 316.625, | |
| "epoch": 0.07155429433528503, | |
| "grad_norm": 0.09226960688829422, | |
| "kl": 0.3818359375, | |
| "learning_rate": 5.001888090068784e-07, | |
| "loss": 0.0037, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 498 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 321.375, | |
| "epoch": 0.07169797765724344, | |
| "grad_norm": 1.362955093383789, | |
| "kl": 0.40478515625, | |
| "learning_rate": 5.000472027468528e-07, | |
| "loss": -0.0529, | |
| "reward": 0.09375000186264515, | |
| "reward_std": 0.012500000186264515, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.9375, | |
| "step": 499 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 339.375, | |
| "epoch": 0.07184166097920183, | |
| "grad_norm": 0.08727846294641495, | |
| "kl": 0.38037109375, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.0039, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07184166097920183, | |
| "step": 500, | |
| "total_flos": 0.0, | |
| "train_loss": 0.05661330611514859, | |
| "train_runtime": 3774.5693, | |
| "train_samples_per_second": 2.119, | |
| "train_steps_per_second": 0.132 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |