diff --git "a/checkpoint-41775/trainer_state.json" "b/checkpoint-41775/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-41775/trainer_state.json" @@ -0,0 +1,12009 @@ +{ + "best_metric": 6.482921600341797, + "best_model_checkpoint": "base-multilingual-gnd-bert-uncased/checkpoint-41775", + "epoch": 15.0, + "eval_steps": 500, + "global_step": 41775, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008976660682226212, + "grad_norm": 2.178659200668335, + "learning_rate": 1.1751264893096131e-07, + "loss": 9.9955, + "step": 25 + }, + { + "epoch": 0.017953321364452424, + "grad_norm": 2.0735507011413574, + "learning_rate": 2.3992165823404603e-07, + "loss": 10.0016, + "step": 50 + }, + { + "epoch": 0.026929982046678635, + "grad_norm": 2.1424293518066406, + "learning_rate": 3.623306675371308e-07, + "loss": 9.9915, + "step": 75 + }, + { + "epoch": 0.03590664272890485, + "grad_norm": 2.267226219177246, + "learning_rate": 4.798433164680921e-07, + "loss": 9.9942, + "step": 100 + }, + { + "epoch": 0.04488330341113106, + "grad_norm": 2.8778066635131836, + "learning_rate": 6.022523257711768e-07, + "loss": 9.9916, + "step": 125 + }, + { + "epoch": 0.05385996409335727, + "grad_norm": 1.935097336769104, + "learning_rate": 7.246613350742616e-07, + "loss": 9.9895, + "step": 150 + }, + { + "epoch": 0.06283662477558348, + "grad_norm": 2.0227699279785156, + "learning_rate": 8.470703443773462e-07, + "loss": 9.9922, + "step": 175 + }, + { + "epoch": 0.0718132854578097, + "grad_norm": 2.357494354248047, + "learning_rate": 9.694793536804308e-07, + "loss": 9.9938, + "step": 200 + }, + { + "epoch": 0.0807899461400359, + "grad_norm": 2.232741594314575, + "learning_rate": 1.0918883629835156e-06, + "loss": 9.9943, + "step": 225 + }, + { + "epoch": 0.08976660682226212, + "grad_norm": 2.091804265975952, + "learning_rate": 1.2142973722866003e-06, + "loss": 9.9835, + "step": 250 + }, + { + "epoch": 0.09874326750448834, + "grad_norm": 1.9601001739501953, + "learning_rate": 1.336706381589685e-06, + "loss": 9.9906, + "step": 275 + }, + { + "epoch": 0.10771992818671454, + "grad_norm": 1.8632302284240723, + "learning_rate": 1.4591153908927696e-06, + "loss": 9.9876, + "step": 300 + }, + { + "epoch": 0.11669658886894076, + "grad_norm": 1.6834025382995605, + "learning_rate": 1.5815244001958546e-06, + "loss": 9.9798, + "step": 325 + }, + { + "epoch": 0.12567324955116696, + "grad_norm": 1.8173505067825317, + "learning_rate": 1.7039334094989391e-06, + "loss": 9.9718, + "step": 350 + }, + { + "epoch": 0.13464991023339318, + "grad_norm": 1.7641466856002808, + "learning_rate": 1.826342418802024e-06, + "loss": 9.9802, + "step": 375 + }, + { + "epoch": 0.1436265709156194, + "grad_norm": 1.8006683588027954, + "learning_rate": 1.9487514281051086e-06, + "loss": 9.9751, + "step": 400 + }, + { + "epoch": 0.1526032315978456, + "grad_norm": 1.9189248085021973, + "learning_rate": 2.0711604374081934e-06, + "loss": 9.9562, + "step": 425 + }, + { + "epoch": 0.1615798922800718, + "grad_norm": 2.786694288253784, + "learning_rate": 2.1935694467112777e-06, + "loss": 9.9439, + "step": 450 + }, + { + "epoch": 0.17055655296229802, + "grad_norm": 2.511087656021118, + "learning_rate": 2.315978456014363e-06, + "loss": 9.9219, + "step": 475 + }, + { + "epoch": 0.17953321364452424, + "grad_norm": 2.477684259414673, + "learning_rate": 2.433491104945324e-06, + "loss": 9.9123, + "step": 500 + }, + { + "epoch": 0.18850987432675045, + "grad_norm": 2.771333932876587, + "learning_rate": 2.5559001142484087e-06, + "loss": 9.9035, + "step": 525 + }, + { + "epoch": 0.19748653500897667, + "grad_norm": 2.995973825454712, + "learning_rate": 2.678309123551494e-06, + "loss": 9.8671, + "step": 550 + }, + { + "epoch": 0.20646319569120286, + "grad_norm": 3.164210319519043, + "learning_rate": 2.800718132854578e-06, + "loss": 9.8354, + "step": 575 + }, + { + "epoch": 0.21543985637342908, + "grad_norm": 3.1597137451171875, + "learning_rate": 2.923127142157663e-06, + "loss": 9.8755, + "step": 600 + }, + { + "epoch": 0.2244165170556553, + "grad_norm": 3.275249719619751, + "learning_rate": 3.0455361514607477e-06, + "loss": 9.8017, + "step": 625 + }, + { + "epoch": 0.2333931777378815, + "grad_norm": 3.032151699066162, + "learning_rate": 3.167945160763832e-06, + "loss": 9.7906, + "step": 650 + }, + { + "epoch": 0.24236983842010773, + "grad_norm": 3.3924691677093506, + "learning_rate": 3.290354170066917e-06, + "loss": 9.7557, + "step": 675 + }, + { + "epoch": 0.2513464991023339, + "grad_norm": 3.69755482673645, + "learning_rate": 3.412763179370002e-06, + "loss": 9.7763, + "step": 700 + }, + { + "epoch": 0.26032315978456017, + "grad_norm": 3.307591199874878, + "learning_rate": 3.5351721886730866e-06, + "loss": 9.7048, + "step": 725 + }, + { + "epoch": 0.26929982046678635, + "grad_norm": 3.7737700939178467, + "learning_rate": 3.657581197976171e-06, + "loss": 9.6937, + "step": 750 + }, + { + "epoch": 0.27827648114901254, + "grad_norm": 3.389641046524048, + "learning_rate": 3.7799902072792557e-06, + "loss": 9.6912, + "step": 775 + }, + { + "epoch": 0.2872531418312388, + "grad_norm": 3.4336354732513428, + "learning_rate": 3.90239921658234e-06, + "loss": 9.6288, + "step": 800 + }, + { + "epoch": 0.296229802513465, + "grad_norm": 3.661147117614746, + "learning_rate": 4.024808225885425e-06, + "loss": 9.6372, + "step": 825 + }, + { + "epoch": 0.3052064631956912, + "grad_norm": 3.455627202987671, + "learning_rate": 4.1472172351885095e-06, + "loss": 9.5926, + "step": 850 + }, + { + "epoch": 0.3141831238779174, + "grad_norm": 3.8364195823669434, + "learning_rate": 4.269626244491594e-06, + "loss": 9.5483, + "step": 875 + }, + { + "epoch": 0.3231597845601436, + "grad_norm": 3.7730331420898438, + "learning_rate": 4.39203525379468e-06, + "loss": 9.5825, + "step": 900 + }, + { + "epoch": 0.33213644524236985, + "grad_norm": 3.700287342071533, + "learning_rate": 4.514444263097764e-06, + "loss": 9.5432, + "step": 925 + }, + { + "epoch": 0.34111310592459604, + "grad_norm": 3.826079845428467, + "learning_rate": 4.636853272400849e-06, + "loss": 9.5418, + "step": 950 + }, + { + "epoch": 0.3500897666068223, + "grad_norm": 3.6339972019195557, + "learning_rate": 4.759262281703934e-06, + "loss": 9.5365, + "step": 975 + }, + { + "epoch": 0.3590664272890485, + "grad_norm": 3.8660428524017334, + "learning_rate": 4.881671291007018e-06, + "loss": 9.5211, + "step": 1000 + }, + { + "epoch": 0.36804308797127466, + "grad_norm": 3.728740692138672, + "learning_rate": 5.004080300310103e-06, + "loss": 9.5022, + "step": 1025 + }, + { + "epoch": 0.3770197486535009, + "grad_norm": 3.3869097232818604, + "learning_rate": 5.1264893096131875e-06, + "loss": 9.4851, + "step": 1050 + }, + { + "epoch": 0.3859964093357271, + "grad_norm": 3.584878444671631, + "learning_rate": 5.248898318916272e-06, + "loss": 9.4627, + "step": 1075 + }, + { + "epoch": 0.39497307001795334, + "grad_norm": 3.4840195178985596, + "learning_rate": 5.371307328219357e-06, + "loss": 9.4107, + "step": 1100 + }, + { + "epoch": 0.40394973070017953, + "grad_norm": 3.8751413822174072, + "learning_rate": 5.493716337522441e-06, + "loss": 9.4361, + "step": 1125 + }, + { + "epoch": 0.4129263913824057, + "grad_norm": 3.7450449466705322, + "learning_rate": 5.6161253468255265e-06, + "loss": 9.4551, + "step": 1150 + }, + { + "epoch": 0.42190305206463197, + "grad_norm": 3.6604928970336914, + "learning_rate": 5.738534356128612e-06, + "loss": 9.4539, + "step": 1175 + }, + { + "epoch": 0.43087971274685816, + "grad_norm": 3.5738086700439453, + "learning_rate": 5.860943365431696e-06, + "loss": 9.4052, + "step": 1200 + }, + { + "epoch": 0.4398563734290844, + "grad_norm": 3.543718099594116, + "learning_rate": 5.983352374734781e-06, + "loss": 9.3754, + "step": 1225 + }, + { + "epoch": 0.4488330341113106, + "grad_norm": 3.6965208053588867, + "learning_rate": 6.1057613840378655e-06, + "loss": 9.3714, + "step": 1250 + }, + { + "epoch": 0.4578096947935368, + "grad_norm": 3.3985488414764404, + "learning_rate": 6.22817039334095e-06, + "loss": 9.337, + "step": 1275 + }, + { + "epoch": 0.466786355475763, + "grad_norm": 3.7057738304138184, + "learning_rate": 6.350579402644035e-06, + "loss": 9.3503, + "step": 1300 + }, + { + "epoch": 0.4757630161579892, + "grad_norm": 3.5311150550842285, + "learning_rate": 6.472988411947119e-06, + "loss": 9.2895, + "step": 1325 + }, + { + "epoch": 0.48473967684021546, + "grad_norm": 3.8652713298797607, + "learning_rate": 6.595397421250204e-06, + "loss": 9.2929, + "step": 1350 + }, + { + "epoch": 0.49371633752244165, + "grad_norm": 4.024794578552246, + "learning_rate": 6.717806430553289e-06, + "loss": 9.3452, + "step": 1375 + }, + { + "epoch": 0.5026929982046678, + "grad_norm": 3.746289014816284, + "learning_rate": 6.840215439856373e-06, + "loss": 9.3084, + "step": 1400 + }, + { + "epoch": 0.5116696588868941, + "grad_norm": 3.613616466522217, + "learning_rate": 6.962624449159458e-06, + "loss": 9.2875, + "step": 1425 + }, + { + "epoch": 0.5206463195691203, + "grad_norm": 3.6074392795562744, + "learning_rate": 7.0850334584625435e-06, + "loss": 9.2628, + "step": 1450 + }, + { + "epoch": 0.5296229802513465, + "grad_norm": 3.741739511489868, + "learning_rate": 7.207442467765628e-06, + "loss": 9.2599, + "step": 1475 + }, + { + "epoch": 0.5385996409335727, + "grad_norm": 3.5845210552215576, + "learning_rate": 7.329851477068713e-06, + "loss": 9.2169, + "step": 1500 + }, + { + "epoch": 0.547576301615799, + "grad_norm": 4.354323387145996, + "learning_rate": 7.452260486371797e-06, + "loss": 9.2109, + "step": 1525 + }, + { + "epoch": 0.5565529622980251, + "grad_norm": 3.952972888946533, + "learning_rate": 7.5746694956748825e-06, + "loss": 9.2905, + "step": 1550 + }, + { + "epoch": 0.5655296229802513, + "grad_norm": 4.377786159515381, + "learning_rate": 7.697078504977968e-06, + "loss": 9.2147, + "step": 1575 + }, + { + "epoch": 0.5745062836624776, + "grad_norm": 3.4569475650787354, + "learning_rate": 7.819487514281051e-06, + "loss": 9.223, + "step": 1600 + }, + { + "epoch": 0.5834829443447038, + "grad_norm": 3.766814708709717, + "learning_rate": 7.941896523584136e-06, + "loss": 9.188, + "step": 1625 + }, + { + "epoch": 0.59245960502693, + "grad_norm": 3.660252332687378, + "learning_rate": 8.06430553288722e-06, + "loss": 9.1451, + "step": 1650 + }, + { + "epoch": 0.6014362657091562, + "grad_norm": 3.711934804916382, + "learning_rate": 8.186714542190307e-06, + "loss": 9.1778, + "step": 1675 + }, + { + "epoch": 0.6104129263913824, + "grad_norm": 3.448937177658081, + "learning_rate": 8.30912355149339e-06, + "loss": 9.1997, + "step": 1700 + }, + { + "epoch": 0.6193895870736086, + "grad_norm": 3.748152017593384, + "learning_rate": 8.431532560796475e-06, + "loss": 9.19, + "step": 1725 + }, + { + "epoch": 0.6283662477558348, + "grad_norm": 3.5555007457733154, + "learning_rate": 8.553941570099559e-06, + "loss": 9.1537, + "step": 1750 + }, + { + "epoch": 0.6373429084380611, + "grad_norm": 3.5215094089508057, + "learning_rate": 8.676350579402644e-06, + "loss": 9.1892, + "step": 1775 + }, + { + "epoch": 0.6463195691202872, + "grad_norm": 3.671006679534912, + "learning_rate": 8.798759588705727e-06, + "loss": 9.1068, + "step": 1800 + }, + { + "epoch": 0.6552962298025135, + "grad_norm": 4.4277567863464355, + "learning_rate": 8.921168598008814e-06, + "loss": 9.0982, + "step": 1825 + }, + { + "epoch": 0.6642728904847397, + "grad_norm": 4.583724021911621, + "learning_rate": 9.0435776073119e-06, + "loss": 9.0986, + "step": 1850 + }, + { + "epoch": 0.6732495511669659, + "grad_norm": 3.769141674041748, + "learning_rate": 9.165986616614983e-06, + "loss": 9.118, + "step": 1875 + }, + { + "epoch": 0.6822262118491921, + "grad_norm": 3.800373077392578, + "learning_rate": 9.288395625918068e-06, + "loss": 9.0868, + "step": 1900 + }, + { + "epoch": 0.6912028725314183, + "grad_norm": 3.838160514831543, + "learning_rate": 9.410804635221152e-06, + "loss": 9.0122, + "step": 1925 + }, + { + "epoch": 0.7001795332136446, + "grad_norm": 4.043140411376953, + "learning_rate": 9.533213644524238e-06, + "loss": 9.0322, + "step": 1950 + }, + { + "epoch": 0.7091561938958707, + "grad_norm": 5.179311275482178, + "learning_rate": 9.655622653827322e-06, + "loss": 9.1068, + "step": 1975 + }, + { + "epoch": 0.718132854578097, + "grad_norm": 3.5329511165618896, + "learning_rate": 9.778031663130407e-06, + "loss": 9.0682, + "step": 2000 + }, + { + "epoch": 0.7271095152603232, + "grad_norm": 4.334126949310303, + "learning_rate": 9.90044067243349e-06, + "loss": 9.0724, + "step": 2025 + }, + { + "epoch": 0.7360861759425493, + "grad_norm": 4.666273593902588, + "learning_rate": 1.0022849681736576e-05, + "loss": 8.9987, + "step": 2050 + }, + { + "epoch": 0.7450628366247756, + "grad_norm": 4.142033100128174, + "learning_rate": 1.014525869103966e-05, + "loss": 9.0619, + "step": 2075 + }, + { + "epoch": 0.7540394973070018, + "grad_norm": 3.9526703357696533, + "learning_rate": 1.0267667700342746e-05, + "loss": 9.0537, + "step": 2100 + }, + { + "epoch": 0.7630161579892281, + "grad_norm": 3.6465206146240234, + "learning_rate": 1.0390076709645831e-05, + "loss": 8.9707, + "step": 2125 + }, + { + "epoch": 0.7719928186714542, + "grad_norm": 3.824362277984619, + "learning_rate": 1.0512485718948915e-05, + "loss": 9.06, + "step": 2150 + }, + { + "epoch": 0.7809694793536804, + "grad_norm": 4.353694915771484, + "learning_rate": 1.0634894728252e-05, + "loss": 9.0163, + "step": 2175 + }, + { + "epoch": 0.7899461400359067, + "grad_norm": 3.6305439472198486, + "learning_rate": 1.0757303737555083e-05, + "loss": 9.0576, + "step": 2200 + }, + { + "epoch": 0.7989228007181328, + "grad_norm": 4.027074813842773, + "learning_rate": 1.087971274685817e-05, + "loss": 9.0419, + "step": 2225 + }, + { + "epoch": 0.8078994614003591, + "grad_norm": 3.7664365768432617, + "learning_rate": 1.1002121756161254e-05, + "loss": 8.9664, + "step": 2250 + }, + { + "epoch": 0.8168761220825853, + "grad_norm": 3.61863374710083, + "learning_rate": 1.1124530765464339e-05, + "loss": 8.9999, + "step": 2275 + }, + { + "epoch": 0.8258527827648114, + "grad_norm": 3.445437431335449, + "learning_rate": 1.1246939774767422e-05, + "loss": 8.9634, + "step": 2300 + }, + { + "epoch": 0.8348294434470377, + "grad_norm": 3.6437652111053467, + "learning_rate": 1.1369348784070508e-05, + "loss": 8.9947, + "step": 2325 + }, + { + "epoch": 0.8438061041292639, + "grad_norm": 4.014418125152588, + "learning_rate": 1.1491757793373591e-05, + "loss": 8.9723, + "step": 2350 + }, + { + "epoch": 0.8527827648114902, + "grad_norm": 3.297578811645508, + "learning_rate": 1.1614166802676678e-05, + "loss": 8.9942, + "step": 2375 + }, + { + "epoch": 0.8617594254937163, + "grad_norm": 3.5939810276031494, + "learning_rate": 1.1736575811979763e-05, + "loss": 9.0305, + "step": 2400 + }, + { + "epoch": 0.8707360861759426, + "grad_norm": 3.5020339488983154, + "learning_rate": 1.1858984821282847e-05, + "loss": 8.9972, + "step": 2425 + }, + { + "epoch": 0.8797127468581688, + "grad_norm": 4.216231346130371, + "learning_rate": 1.1981393830585932e-05, + "loss": 9.0035, + "step": 2450 + }, + { + "epoch": 0.8886894075403949, + "grad_norm": 3.605320453643799, + "learning_rate": 1.2103802839889015e-05, + "loss": 8.977, + "step": 2475 + }, + { + "epoch": 0.8976660682226212, + "grad_norm": 3.4958698749542236, + "learning_rate": 1.2226211849192102e-05, + "loss": 9.0033, + "step": 2500 + }, + { + "epoch": 0.9066427289048474, + "grad_norm": 3.6349751949310303, + "learning_rate": 1.2348620858495186e-05, + "loss": 9.0033, + "step": 2525 + }, + { + "epoch": 0.9156193895870736, + "grad_norm": 4.710958003997803, + "learning_rate": 1.247102986779827e-05, + "loss": 8.9403, + "step": 2550 + }, + { + "epoch": 0.9245960502692998, + "grad_norm": 3.4458677768707275, + "learning_rate": 1.2593438877101354e-05, + "loss": 8.9524, + "step": 2575 + }, + { + "epoch": 0.933572710951526, + "grad_norm": 3.6183245182037354, + "learning_rate": 1.271584788640444e-05, + "loss": 8.9459, + "step": 2600 + }, + { + "epoch": 0.9425493716337523, + "grad_norm": 3.551482915878296, + "learning_rate": 1.2838256895707523e-05, + "loss": 8.9674, + "step": 2625 + }, + { + "epoch": 0.9515260323159784, + "grad_norm": 3.2625911235809326, + "learning_rate": 1.296066590501061e-05, + "loss": 8.9399, + "step": 2650 + }, + { + "epoch": 0.9605026929982047, + "grad_norm": 3.4796018600463867, + "learning_rate": 1.3083074914313695e-05, + "loss": 8.9473, + "step": 2675 + }, + { + "epoch": 0.9694793536804309, + "grad_norm": 3.3425681591033936, + "learning_rate": 1.3205483923616778e-05, + "loss": 8.9006, + "step": 2700 + }, + { + "epoch": 0.9784560143626571, + "grad_norm": 3.8689053058624268, + "learning_rate": 1.3327892932919863e-05, + "loss": 8.993, + "step": 2725 + }, + { + "epoch": 0.9874326750448833, + "grad_norm": 3.6820685863494873, + "learning_rate": 1.3450301942222947e-05, + "loss": 8.8965, + "step": 2750 + }, + { + "epoch": 0.9964093357271095, + "grad_norm": 3.585042715072632, + "learning_rate": 1.3572710951526034e-05, + "loss": 8.8934, + "step": 2775 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.003799667767742921, + "eval_f1_macro": 7.757665097038169e-07, + "eval_f1_micro": 0.003799667767742921, + "eval_f1_weighted": 5.3968479310057175e-05, + "eval_loss": 9.10261344909668, + "eval_precision_macro": 3.9110937443978037e-07, + "eval_precision_micro": 0.003799667767742921, + "eval_precision_weighted": 2.718933060563953e-05, + "eval_recall_macro": 5.1607987876084194e-05, + "eval_recall_micro": 0.003799667767742921, + "eval_recall_weighted": 0.003799667767742921, + "eval_runtime": 97.1557, + "eval_samples_per_second": 539.062, + "eval_steps_per_second": 8.43, + "step": 2785 + }, + { + "epoch": 1.0053859964093357, + "grad_norm": 3.954648971557617, + "learning_rate": 1.3695119960829117e-05, + "loss": 8.8224, + "step": 2800 + }, + { + "epoch": 1.014362657091562, + "grad_norm": 3.5299644470214844, + "learning_rate": 1.3817528970132202e-05, + "loss": 8.809, + "step": 2825 + }, + { + "epoch": 1.0233393177737882, + "grad_norm": 3.476799726486206, + "learning_rate": 1.3939937979435286e-05, + "loss": 8.8508, + "step": 2850 + }, + { + "epoch": 1.0323159784560143, + "grad_norm": 3.4868431091308594, + "learning_rate": 1.4062346988738371e-05, + "loss": 8.7979, + "step": 2875 + }, + { + "epoch": 1.0412926391382407, + "grad_norm": 3.615778684616089, + "learning_rate": 1.4184755998041455e-05, + "loss": 8.9139, + "step": 2900 + }, + { + "epoch": 1.0502692998204668, + "grad_norm": 3.7465271949768066, + "learning_rate": 1.4307165007344541e-05, + "loss": 8.8856, + "step": 2925 + }, + { + "epoch": 1.059245960502693, + "grad_norm": 3.319967746734619, + "learning_rate": 1.4429574016647627e-05, + "loss": 8.7693, + "step": 2950 + }, + { + "epoch": 1.0682226211849193, + "grad_norm": 4.431064605712891, + "learning_rate": 1.455198302595071e-05, + "loss": 8.8126, + "step": 2975 + }, + { + "epoch": 1.0771992818671454, + "grad_norm": 3.532322883605957, + "learning_rate": 1.4674392035253795e-05, + "loss": 8.8041, + "step": 3000 + }, + { + "epoch": 1.0861759425493716, + "grad_norm": 3.2850875854492188, + "learning_rate": 1.4796801044556879e-05, + "loss": 8.8564, + "step": 3025 + }, + { + "epoch": 1.095152603231598, + "grad_norm": 4.427009105682373, + "learning_rate": 1.4919210053859966e-05, + "loss": 8.8648, + "step": 3050 + }, + { + "epoch": 1.104129263913824, + "grad_norm": 3.4329776763916016, + "learning_rate": 1.5041619063163047e-05, + "loss": 8.7549, + "step": 3075 + }, + { + "epoch": 1.1131059245960502, + "grad_norm": 3.3925068378448486, + "learning_rate": 1.5164028072466134e-05, + "loss": 8.7724, + "step": 3100 + }, + { + "epoch": 1.1220825852782765, + "grad_norm": 3.770634174346924, + "learning_rate": 1.528643708176922e-05, + "loss": 8.7933, + "step": 3125 + }, + { + "epoch": 1.1310592459605027, + "grad_norm": 3.5723226070404053, + "learning_rate": 1.54088460910723e-05, + "loss": 8.7945, + "step": 3150 + }, + { + "epoch": 1.140035906642729, + "grad_norm": 3.477163791656494, + "learning_rate": 1.5531255100375388e-05, + "loss": 8.8284, + "step": 3175 + }, + { + "epoch": 1.1490125673249552, + "grad_norm": 3.318432331085205, + "learning_rate": 1.565366410967847e-05, + "loss": 8.8334, + "step": 3200 + }, + { + "epoch": 1.1579892280071813, + "grad_norm": 3.4390690326690674, + "learning_rate": 1.577607311898156e-05, + "loss": 8.75, + "step": 3225 + }, + { + "epoch": 1.1669658886894076, + "grad_norm": 3.470695734024048, + "learning_rate": 1.5898482128284645e-05, + "loss": 8.7666, + "step": 3250 + }, + { + "epoch": 1.1759425493716338, + "grad_norm": 3.9793922901153564, + "learning_rate": 1.6020891137587725e-05, + "loss": 8.8133, + "step": 3275 + }, + { + "epoch": 1.18491921005386, + "grad_norm": 3.5956616401672363, + "learning_rate": 1.6143300146890812e-05, + "loss": 8.8232, + "step": 3300 + }, + { + "epoch": 1.1938958707360863, + "grad_norm": 3.5978870391845703, + "learning_rate": 1.6265709156193896e-05, + "loss": 8.8898, + "step": 3325 + }, + { + "epoch": 1.2028725314183124, + "grad_norm": 3.689972400665283, + "learning_rate": 1.6388118165496983e-05, + "loss": 8.6941, + "step": 3350 + }, + { + "epoch": 1.2118491921005385, + "grad_norm": 3.456151008605957, + "learning_rate": 1.6510527174800063e-05, + "loss": 8.7425, + "step": 3375 + }, + { + "epoch": 1.220825852782765, + "grad_norm": 3.489474058151245, + "learning_rate": 1.663293618410315e-05, + "loss": 8.7695, + "step": 3400 + }, + { + "epoch": 1.229802513464991, + "grad_norm": 3.421856164932251, + "learning_rate": 1.6755345193406236e-05, + "loss": 8.7982, + "step": 3425 + }, + { + "epoch": 1.2387791741472172, + "grad_norm": 3.369530439376831, + "learning_rate": 1.687775420270932e-05, + "loss": 8.7905, + "step": 3450 + }, + { + "epoch": 1.2477558348294435, + "grad_norm": 3.429445743560791, + "learning_rate": 1.7000163212012407e-05, + "loss": 8.738, + "step": 3475 + }, + { + "epoch": 1.2567324955116697, + "grad_norm": 3.657141923904419, + "learning_rate": 1.7122572221315487e-05, + "loss": 8.7307, + "step": 3500 + }, + { + "epoch": 1.2657091561938958, + "grad_norm": 3.6843385696411133, + "learning_rate": 1.7244981230618574e-05, + "loss": 8.8149, + "step": 3525 + }, + { + "epoch": 1.2746858168761221, + "grad_norm": 3.546025276184082, + "learning_rate": 1.736739023992166e-05, + "loss": 8.7749, + "step": 3550 + }, + { + "epoch": 1.2836624775583483, + "grad_norm": 3.6940503120422363, + "learning_rate": 1.7489799249224744e-05, + "loss": 8.7899, + "step": 3575 + }, + { + "epoch": 1.2926391382405744, + "grad_norm": 3.571078300476074, + "learning_rate": 1.7612208258527828e-05, + "loss": 8.7015, + "step": 3600 + }, + { + "epoch": 1.3016157989228008, + "grad_norm": 3.535168170928955, + "learning_rate": 1.773461726783091e-05, + "loss": 8.7525, + "step": 3625 + }, + { + "epoch": 1.310592459605027, + "grad_norm": 3.3296639919281006, + "learning_rate": 1.7857026277133998e-05, + "loss": 8.7224, + "step": 3650 + }, + { + "epoch": 1.319569120287253, + "grad_norm": 3.559189796447754, + "learning_rate": 1.7979435286437085e-05, + "loss": 8.7525, + "step": 3675 + }, + { + "epoch": 1.3285457809694794, + "grad_norm": 3.613591194152832, + "learning_rate": 1.8101844295740165e-05, + "loss": 8.7541, + "step": 3700 + }, + { + "epoch": 1.3375224416517055, + "grad_norm": 3.7953031063079834, + "learning_rate": 1.822425330504325e-05, + "loss": 8.7225, + "step": 3725 + }, + { + "epoch": 1.3464991023339317, + "grad_norm": 3.3213746547698975, + "learning_rate": 1.8346662314346335e-05, + "loss": 8.7792, + "step": 3750 + }, + { + "epoch": 1.355475763016158, + "grad_norm": 3.5377542972564697, + "learning_rate": 1.8469071323649422e-05, + "loss": 8.7391, + "step": 3775 + }, + { + "epoch": 1.3644524236983842, + "grad_norm": 3.6847341060638428, + "learning_rate": 1.859148033295251e-05, + "loss": 8.6978, + "step": 3800 + }, + { + "epoch": 1.3734290843806103, + "grad_norm": 3.600795030593872, + "learning_rate": 1.871388934225559e-05, + "loss": 8.7294, + "step": 3825 + }, + { + "epoch": 1.3824057450628366, + "grad_norm": 3.6048150062561035, + "learning_rate": 1.8836298351558676e-05, + "loss": 8.6831, + "step": 3850 + }, + { + "epoch": 1.3913824057450628, + "grad_norm": 3.468243360519409, + "learning_rate": 1.895870736086176e-05, + "loss": 8.7841, + "step": 3875 + }, + { + "epoch": 1.400359066427289, + "grad_norm": 3.5154032707214355, + "learning_rate": 1.9081116370164846e-05, + "loss": 8.8165, + "step": 3900 + }, + { + "epoch": 1.4093357271095153, + "grad_norm": 3.417863130569458, + "learning_rate": 1.9203525379467926e-05, + "loss": 8.6733, + "step": 3925 + }, + { + "epoch": 1.4183123877917414, + "grad_norm": 4.58489990234375, + "learning_rate": 1.9325934388771013e-05, + "loss": 8.6999, + "step": 3950 + }, + { + "epoch": 1.4272890484739678, + "grad_norm": 3.658950090408325, + "learning_rate": 1.94483433980741e-05, + "loss": 8.7248, + "step": 3975 + }, + { + "epoch": 1.436265709156194, + "grad_norm": 3.5406038761138916, + "learning_rate": 1.9570752407377183e-05, + "loss": 8.6602, + "step": 4000 + }, + { + "epoch": 1.44524236983842, + "grad_norm": 3.369560718536377, + "learning_rate": 1.969316141668027e-05, + "loss": 8.6005, + "step": 4025 + }, + { + "epoch": 1.4542190305206464, + "grad_norm": 3.471907615661621, + "learning_rate": 1.981557042598335e-05, + "loss": 8.6592, + "step": 4050 + }, + { + "epoch": 1.4631956912028725, + "grad_norm": 3.6347670555114746, + "learning_rate": 1.9937979435286437e-05, + "loss": 8.6645, + "step": 4075 + }, + { + "epoch": 1.4721723518850989, + "grad_norm": 3.5391299724578857, + "learning_rate": 2.0060388444589524e-05, + "loss": 8.7028, + "step": 4100 + }, + { + "epoch": 1.481149012567325, + "grad_norm": 3.718747138977051, + "learning_rate": 2.0182797453892608e-05, + "loss": 8.6514, + "step": 4125 + }, + { + "epoch": 1.4901256732495511, + "grad_norm": 4.32841682434082, + "learning_rate": 2.030520646319569e-05, + "loss": 8.608, + "step": 4150 + }, + { + "epoch": 1.4991023339317775, + "grad_norm": 3.4185009002685547, + "learning_rate": 2.0427615472498775e-05, + "loss": 8.6457, + "step": 4175 + }, + { + "epoch": 1.5080789946140036, + "grad_norm": 4.27463960647583, + "learning_rate": 2.055002448180186e-05, + "loss": 8.6587, + "step": 4200 + }, + { + "epoch": 1.5170556552962298, + "grad_norm": 4.1021904945373535, + "learning_rate": 2.067243349110495e-05, + "loss": 8.6341, + "step": 4225 + }, + { + "epoch": 1.5260323159784561, + "grad_norm": 3.501014471054077, + "learning_rate": 2.079484250040803e-05, + "loss": 8.7058, + "step": 4250 + }, + { + "epoch": 1.5350089766606823, + "grad_norm": 3.6494107246398926, + "learning_rate": 2.0917251509711115e-05, + "loss": 8.6445, + "step": 4275 + }, + { + "epoch": 1.5439856373429084, + "grad_norm": 3.5575292110443115, + "learning_rate": 2.10396605190142e-05, + "loss": 8.6991, + "step": 4300 + }, + { + "epoch": 1.5529622980251347, + "grad_norm": 3.7855875492095947, + "learning_rate": 2.1162069528317286e-05, + "loss": 8.6649, + "step": 4325 + }, + { + "epoch": 1.5619389587073609, + "grad_norm": 3.633938789367676, + "learning_rate": 2.1284478537620372e-05, + "loss": 8.6093, + "step": 4350 + }, + { + "epoch": 1.570915619389587, + "grad_norm": 4.158860683441162, + "learning_rate": 2.1406887546923453e-05, + "loss": 8.668, + "step": 4375 + }, + { + "epoch": 1.5798922800718134, + "grad_norm": 3.5720789432525635, + "learning_rate": 2.152929655622654e-05, + "loss": 8.7017, + "step": 4400 + }, + { + "epoch": 1.5888689407540395, + "grad_norm": 3.623241662979126, + "learning_rate": 2.1651705565529623e-05, + "loss": 8.6003, + "step": 4425 + }, + { + "epoch": 1.5978456014362656, + "grad_norm": 3.7537903785705566, + "learning_rate": 2.177411457483271e-05, + "loss": 8.6199, + "step": 4450 + }, + { + "epoch": 1.606822262118492, + "grad_norm": 4.340215682983398, + "learning_rate": 2.189652358413579e-05, + "loss": 8.5999, + "step": 4475 + }, + { + "epoch": 1.6157989228007181, + "grad_norm": 4.174263000488281, + "learning_rate": 2.2018932593438877e-05, + "loss": 8.5664, + "step": 4500 + }, + { + "epoch": 1.6247755834829443, + "grad_norm": 3.7816569805145264, + "learning_rate": 2.2141341602741964e-05, + "loss": 8.645, + "step": 4525 + }, + { + "epoch": 1.6337522441651706, + "grad_norm": 3.594071388244629, + "learning_rate": 2.2263750612045047e-05, + "loss": 8.6532, + "step": 4550 + }, + { + "epoch": 1.6427289048473968, + "grad_norm": 3.484238862991333, + "learning_rate": 2.2386159621348134e-05, + "loss": 8.5995, + "step": 4575 + }, + { + "epoch": 1.6517055655296229, + "grad_norm": 3.4544036388397217, + "learning_rate": 2.2508568630651214e-05, + "loss": 8.5889, + "step": 4600 + }, + { + "epoch": 1.6606822262118492, + "grad_norm": 3.583512783050537, + "learning_rate": 2.26309776399543e-05, + "loss": 8.5958, + "step": 4625 + }, + { + "epoch": 1.6696588868940754, + "grad_norm": 4.013895511627197, + "learning_rate": 2.2753386649257388e-05, + "loss": 8.5927, + "step": 4650 + }, + { + "epoch": 1.6786355475763015, + "grad_norm": 4.089428901672363, + "learning_rate": 2.287579565856047e-05, + "loss": 8.6366, + "step": 4675 + }, + { + "epoch": 1.6876122082585279, + "grad_norm": 3.354036331176758, + "learning_rate": 2.2998204667863555e-05, + "loss": 8.6279, + "step": 4700 + }, + { + "epoch": 1.696588868940754, + "grad_norm": 3.352022647857666, + "learning_rate": 2.3120613677166638e-05, + "loss": 8.5971, + "step": 4725 + }, + { + "epoch": 1.7055655296229801, + "grad_norm": 3.41001033782959, + "learning_rate": 2.3243022686469725e-05, + "loss": 8.5792, + "step": 4750 + }, + { + "epoch": 1.7145421903052065, + "grad_norm": 3.7727572917938232, + "learning_rate": 2.3365431695772812e-05, + "loss": 8.5863, + "step": 4775 + }, + { + "epoch": 1.7235188509874326, + "grad_norm": 3.5080995559692383, + "learning_rate": 2.3487840705075892e-05, + "loss": 8.5817, + "step": 4800 + }, + { + "epoch": 1.7324955116696588, + "grad_norm": 4.126153945922852, + "learning_rate": 2.361024971437898e-05, + "loss": 8.6552, + "step": 4825 + }, + { + "epoch": 1.7414721723518851, + "grad_norm": 3.9986846446990967, + "learning_rate": 2.3732658723682062e-05, + "loss": 8.6162, + "step": 4850 + }, + { + "epoch": 1.7504488330341115, + "grad_norm": 3.815275192260742, + "learning_rate": 2.385506773298515e-05, + "loss": 8.6545, + "step": 4875 + }, + { + "epoch": 1.7594254937163374, + "grad_norm": 3.547140121459961, + "learning_rate": 2.3977476742288236e-05, + "loss": 8.5246, + "step": 4900 + }, + { + "epoch": 1.7684021543985637, + "grad_norm": 3.3800015449523926, + "learning_rate": 2.4099885751591316e-05, + "loss": 8.5884, + "step": 4925 + }, + { + "epoch": 1.77737881508079, + "grad_norm": 4.30742883682251, + "learning_rate": 2.4222294760894403e-05, + "loss": 8.5538, + "step": 4950 + }, + { + "epoch": 1.786355475763016, + "grad_norm": 3.6834473609924316, + "learning_rate": 2.4344703770197487e-05, + "loss": 8.5311, + "step": 4975 + }, + { + "epoch": 1.7953321364452424, + "grad_norm": 3.668306589126587, + "learning_rate": 2.4467112779500573e-05, + "loss": 8.5576, + "step": 5000 + }, + { + "epoch": 1.8043087971274687, + "grad_norm": 3.9610564708709717, + "learning_rate": 2.4589521788803653e-05, + "loss": 8.6011, + "step": 5025 + }, + { + "epoch": 1.8132854578096946, + "grad_norm": 3.7383155822753906, + "learning_rate": 2.471193079810674e-05, + "loss": 8.4977, + "step": 5050 + }, + { + "epoch": 1.822262118491921, + "grad_norm": 3.355693817138672, + "learning_rate": 2.4834339807409827e-05, + "loss": 8.4453, + "step": 5075 + }, + { + "epoch": 1.8312387791741473, + "grad_norm": 3.5332114696502686, + "learning_rate": 2.495674881671291e-05, + "loss": 8.5234, + "step": 5100 + }, + { + "epoch": 1.8402154398563735, + "grad_norm": 3.656186819076538, + "learning_rate": 2.5079157826015998e-05, + "loss": 8.5365, + "step": 5125 + }, + { + "epoch": 1.8491921005385996, + "grad_norm": 3.86851167678833, + "learning_rate": 2.5201566835319078e-05, + "loss": 8.4576, + "step": 5150 + }, + { + "epoch": 1.858168761220826, + "grad_norm": 3.9464573860168457, + "learning_rate": 2.5323975844622165e-05, + "loss": 8.4805, + "step": 5175 + }, + { + "epoch": 1.867145421903052, + "grad_norm": 5.354069232940674, + "learning_rate": 2.544638485392525e-05, + "loss": 8.508, + "step": 5200 + }, + { + "epoch": 1.8761220825852782, + "grad_norm": 4.059199333190918, + "learning_rate": 2.5568793863228335e-05, + "loss": 8.5081, + "step": 5225 + }, + { + "epoch": 1.8850987432675046, + "grad_norm": 3.725834608078003, + "learning_rate": 2.569120287253142e-05, + "loss": 8.5508, + "step": 5250 + }, + { + "epoch": 1.8940754039497307, + "grad_norm": 3.4674458503723145, + "learning_rate": 2.5813611881834502e-05, + "loss": 8.5138, + "step": 5275 + }, + { + "epoch": 1.9030520646319569, + "grad_norm": 3.767606496810913, + "learning_rate": 2.593602089113759e-05, + "loss": 8.4232, + "step": 5300 + }, + { + "epoch": 1.9120287253141832, + "grad_norm": 3.697279691696167, + "learning_rate": 2.6058429900440676e-05, + "loss": 8.4562, + "step": 5325 + }, + { + "epoch": 1.9210053859964094, + "grad_norm": 3.819312810897827, + "learning_rate": 2.6180838909743756e-05, + "loss": 8.4918, + "step": 5350 + }, + { + "epoch": 1.9299820466786355, + "grad_norm": 3.7438390254974365, + "learning_rate": 2.6303247919046842e-05, + "loss": 8.451, + "step": 5375 + }, + { + "epoch": 1.9389587073608618, + "grad_norm": 3.815999984741211, + "learning_rate": 2.6425656928349926e-05, + "loss": 8.4395, + "step": 5400 + }, + { + "epoch": 1.947935368043088, + "grad_norm": 4.05302619934082, + "learning_rate": 2.6548065937653013e-05, + "loss": 8.5029, + "step": 5425 + }, + { + "epoch": 1.9569120287253141, + "grad_norm": 3.577946424484253, + "learning_rate": 2.66704749469561e-05, + "loss": 8.4294, + "step": 5450 + }, + { + "epoch": 1.9658886894075405, + "grad_norm": 9.678783416748047, + "learning_rate": 2.679288395625918e-05, + "loss": 8.4482, + "step": 5475 + }, + { + "epoch": 1.9748653500897666, + "grad_norm": 4.010776996612549, + "learning_rate": 2.6915292965562267e-05, + "loss": 8.3947, + "step": 5500 + }, + { + "epoch": 1.9838420107719927, + "grad_norm": 3.5185089111328125, + "learning_rate": 2.703770197486535e-05, + "loss": 8.4666, + "step": 5525 + }, + { + "epoch": 1.992818671454219, + "grad_norm": 4.271341323852539, + "learning_rate": 2.7160110984168437e-05, + "loss": 8.3362, + "step": 5550 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.020143967311400914, + "eval_f1_macro": 4.6738374362974274e-05, + "eval_f1_micro": 0.020143967311400914, + "eval_f1_weighted": 0.00183801037387296, + "eval_loss": 8.67784309387207, + "eval_precision_macro": 3.189238901918665e-05, + "eval_precision_micro": 0.020143967311400914, + "eval_precision_weighted": 0.0011738580604653544, + "eval_recall_macro": 0.0004805732991486159, + "eval_recall_micro": 0.020143967311400914, + "eval_recall_weighted": 0.020143967311400914, + "eval_runtime": 85.0539, + "eval_samples_per_second": 615.762, + "eval_steps_per_second": 9.629, + "step": 5570 + }, + { + "epoch": 2.0017953321364454, + "grad_norm": 3.810455560684204, + "learning_rate": 2.7282519993471517e-05, + "loss": 8.4431, + "step": 5575 + }, + { + "epoch": 2.0107719928186714, + "grad_norm": 3.8905410766601562, + "learning_rate": 2.7404929002774604e-05, + "loss": 8.3018, + "step": 5600 + }, + { + "epoch": 2.0197486535008977, + "grad_norm": 3.929053544998169, + "learning_rate": 2.752733801207769e-05, + "loss": 8.3263, + "step": 5625 + }, + { + "epoch": 2.028725314183124, + "grad_norm": 3.9690544605255127, + "learning_rate": 2.7649747021380774e-05, + "loss": 8.2044, + "step": 5650 + }, + { + "epoch": 2.03770197486535, + "grad_norm": 3.8752903938293457, + "learning_rate": 2.777215603068386e-05, + "loss": 8.2499, + "step": 5675 + }, + { + "epoch": 2.0466786355475763, + "grad_norm": 4.219008922576904, + "learning_rate": 2.789456503998694e-05, + "loss": 8.2765, + "step": 5700 + }, + { + "epoch": 2.0556552962298027, + "grad_norm": 4.583775043487549, + "learning_rate": 2.8016974049290028e-05, + "loss": 8.2595, + "step": 5725 + }, + { + "epoch": 2.0646319569120286, + "grad_norm": 3.663065195083618, + "learning_rate": 2.8139383058593115e-05, + "loss": 8.2368, + "step": 5750 + }, + { + "epoch": 2.073608617594255, + "grad_norm": 4.059401035308838, + "learning_rate": 2.82617920678962e-05, + "loss": 8.304, + "step": 5775 + }, + { + "epoch": 2.0825852782764813, + "grad_norm": 4.920651435852051, + "learning_rate": 2.8384201077199282e-05, + "loss": 8.1972, + "step": 5800 + }, + { + "epoch": 2.0915619389587072, + "grad_norm": 4.369622707366943, + "learning_rate": 2.8506610086502365e-05, + "loss": 8.2356, + "step": 5825 + }, + { + "epoch": 2.1005385996409336, + "grad_norm": 3.5345966815948486, + "learning_rate": 2.8629019095805452e-05, + "loss": 8.2365, + "step": 5850 + }, + { + "epoch": 2.10951526032316, + "grad_norm": 3.6033074855804443, + "learning_rate": 2.875142810510854e-05, + "loss": 8.1474, + "step": 5875 + }, + { + "epoch": 2.118491921005386, + "grad_norm": 4.111495494842529, + "learning_rate": 2.887383711441162e-05, + "loss": 8.1056, + "step": 5900 + }, + { + "epoch": 2.127468581687612, + "grad_norm": 3.8606605529785156, + "learning_rate": 2.8996246123714706e-05, + "loss": 8.1754, + "step": 5925 + }, + { + "epoch": 2.1364452423698386, + "grad_norm": 3.6052939891815186, + "learning_rate": 2.911865513301779e-05, + "loss": 8.12, + "step": 5950 + }, + { + "epoch": 2.1454219030520645, + "grad_norm": 4.053567886352539, + "learning_rate": 2.9241064142320876e-05, + "loss": 8.1664, + "step": 5975 + }, + { + "epoch": 2.154398563734291, + "grad_norm": 3.5224320888519287, + "learning_rate": 2.9363473151623963e-05, + "loss": 8.2068, + "step": 6000 + }, + { + "epoch": 2.163375224416517, + "grad_norm": 3.4998466968536377, + "learning_rate": 2.9485882160927043e-05, + "loss": 8.1183, + "step": 6025 + }, + { + "epoch": 2.172351885098743, + "grad_norm": 3.8649959564208984, + "learning_rate": 2.960829117023013e-05, + "loss": 8.1797, + "step": 6050 + }, + { + "epoch": 2.1813285457809695, + "grad_norm": 4.538026809692383, + "learning_rate": 2.9730700179533214e-05, + "loss": 8.1798, + "step": 6075 + }, + { + "epoch": 2.190305206463196, + "grad_norm": 3.707103729248047, + "learning_rate": 2.98531091888363e-05, + "loss": 8.1837, + "step": 6100 + }, + { + "epoch": 2.1992818671454217, + "grad_norm": 3.472823143005371, + "learning_rate": 2.997551819813938e-05, + "loss": 8.2306, + "step": 6125 + }, + { + "epoch": 2.208258527827648, + "grad_norm": 3.7994871139526367, + "learning_rate": 2.998911919917306e-05, + "loss": 8.1638, + "step": 6150 + }, + { + "epoch": 2.2172351885098744, + "grad_norm": 4.035151481628418, + "learning_rate": 2.997551819813938e-05, + "loss": 8.1809, + "step": 6175 + }, + { + "epoch": 2.2262118491921004, + "grad_norm": 3.580284357070923, + "learning_rate": 2.9961917197105707e-05, + "loss": 8.0536, + "step": 6200 + }, + { + "epoch": 2.2351885098743267, + "grad_norm": 3.6368203163146973, + "learning_rate": 2.9948316196072033e-05, + "loss": 8.1916, + "step": 6225 + }, + { + "epoch": 2.244165170556553, + "grad_norm": 4.26426887512207, + "learning_rate": 2.9934715195038356e-05, + "loss": 7.9965, + "step": 6250 + }, + { + "epoch": 2.253141831238779, + "grad_norm": 4.299463272094727, + "learning_rate": 2.992111419400468e-05, + "loss": 8.1203, + "step": 6275 + }, + { + "epoch": 2.2621184919210053, + "grad_norm": 3.5705506801605225, + "learning_rate": 2.9907513192971002e-05, + "loss": 8.1003, + "step": 6300 + }, + { + "epoch": 2.2710951526032317, + "grad_norm": 3.5000367164611816, + "learning_rate": 2.989391219193733e-05, + "loss": 8.1502, + "step": 6325 + }, + { + "epoch": 2.280071813285458, + "grad_norm": 3.802910804748535, + "learning_rate": 2.988031119090365e-05, + "loss": 8.107, + "step": 6350 + }, + { + "epoch": 2.289048473967684, + "grad_norm": 4.049758434295654, + "learning_rate": 2.9866710189869974e-05, + "loss": 8.164, + "step": 6375 + }, + { + "epoch": 2.2980251346499103, + "grad_norm": 4.454545974731445, + "learning_rate": 2.98531091888363e-05, + "loss": 7.9735, + "step": 6400 + }, + { + "epoch": 2.3070017953321367, + "grad_norm": 3.8377397060394287, + "learning_rate": 2.9839508187802624e-05, + "loss": 8.1196, + "step": 6425 + }, + { + "epoch": 2.3159784560143626, + "grad_norm": 4.4243669509887695, + "learning_rate": 2.9825907186768946e-05, + "loss": 8.0872, + "step": 6450 + }, + { + "epoch": 2.324955116696589, + "grad_norm": 3.5931923389434814, + "learning_rate": 2.981230618573527e-05, + "loss": 8.0325, + "step": 6475 + }, + { + "epoch": 2.3339317773788153, + "grad_norm": 4.676905155181885, + "learning_rate": 2.9798705184701596e-05, + "loss": 8.0346, + "step": 6500 + }, + { + "epoch": 2.342908438061041, + "grad_norm": 3.978621482849121, + "learning_rate": 2.978510418366792e-05, + "loss": 7.9548, + "step": 6525 + }, + { + "epoch": 2.3518850987432676, + "grad_norm": 4.314471244812012, + "learning_rate": 2.977150318263424e-05, + "loss": 7.9723, + "step": 6550 + }, + { + "epoch": 2.360861759425494, + "grad_norm": 4.280722618103027, + "learning_rate": 2.9757902181600568e-05, + "loss": 8.0263, + "step": 6575 + }, + { + "epoch": 2.36983842010772, + "grad_norm": 3.8938496112823486, + "learning_rate": 2.974430118056689e-05, + "loss": 8.0137, + "step": 6600 + }, + { + "epoch": 2.378815080789946, + "grad_norm": 3.8950908184051514, + "learning_rate": 2.9730700179533214e-05, + "loss": 7.8949, + "step": 6625 + }, + { + "epoch": 2.3877917414721725, + "grad_norm": 3.994596481323242, + "learning_rate": 2.9717099178499537e-05, + "loss": 7.9722, + "step": 6650 + }, + { + "epoch": 2.3967684021543985, + "grad_norm": 3.9143664836883545, + "learning_rate": 2.9703498177465863e-05, + "loss": 7.942, + "step": 6675 + }, + { + "epoch": 2.405745062836625, + "grad_norm": 3.9852113723754883, + "learning_rate": 2.9689897176432186e-05, + "loss": 7.9485, + "step": 6700 + }, + { + "epoch": 2.414721723518851, + "grad_norm": 3.5762526988983154, + "learning_rate": 2.967629617539851e-05, + "loss": 7.9822, + "step": 6725 + }, + { + "epoch": 2.423698384201077, + "grad_norm": 4.05647087097168, + "learning_rate": 2.9662695174364835e-05, + "loss": 8.0409, + "step": 6750 + }, + { + "epoch": 2.4326750448833034, + "grad_norm": 3.703871250152588, + "learning_rate": 2.9649094173331158e-05, + "loss": 7.9335, + "step": 6775 + }, + { + "epoch": 2.44165170556553, + "grad_norm": 4.216712951660156, + "learning_rate": 2.963549317229748e-05, + "loss": 7.9965, + "step": 6800 + }, + { + "epoch": 2.4506283662477557, + "grad_norm": 3.966404438018799, + "learning_rate": 2.9621892171263807e-05, + "loss": 7.9238, + "step": 6825 + }, + { + "epoch": 2.459605026929982, + "grad_norm": 3.864591598510742, + "learning_rate": 2.960829117023013e-05, + "loss": 7.7767, + "step": 6850 + }, + { + "epoch": 2.4685816876122084, + "grad_norm": 4.291782379150391, + "learning_rate": 2.9594690169196453e-05, + "loss": 7.9624, + "step": 6875 + }, + { + "epoch": 2.4775583482944343, + "grad_norm": 4.558504104614258, + "learning_rate": 2.9581089168162776e-05, + "loss": 7.9523, + "step": 6900 + }, + { + "epoch": 2.4865350089766607, + "grad_norm": 4.219671249389648, + "learning_rate": 2.9567488167129102e-05, + "loss": 7.9622, + "step": 6925 + }, + { + "epoch": 2.495511669658887, + "grad_norm": 5.037184715270996, + "learning_rate": 2.9553887166095425e-05, + "loss": 7.9335, + "step": 6950 + }, + { + "epoch": 2.504488330341113, + "grad_norm": 4.224401473999023, + "learning_rate": 2.9540286165061748e-05, + "loss": 7.915, + "step": 6975 + }, + { + "epoch": 2.5134649910233393, + "grad_norm": 4.146813869476318, + "learning_rate": 2.9526685164028075e-05, + "loss": 7.8346, + "step": 7000 + }, + { + "epoch": 2.5224416517055657, + "grad_norm": 4.327123641967773, + "learning_rate": 2.9513084162994398e-05, + "loss": 7.8992, + "step": 7025 + }, + { + "epoch": 2.5314183123877916, + "grad_norm": 4.09780216217041, + "learning_rate": 2.949948316196072e-05, + "loss": 7.7509, + "step": 7050 + }, + { + "epoch": 2.540394973070018, + "grad_norm": 3.708174228668213, + "learning_rate": 2.9485882160927043e-05, + "loss": 7.8864, + "step": 7075 + }, + { + "epoch": 2.5493716337522443, + "grad_norm": 4.543442726135254, + "learning_rate": 2.947228115989337e-05, + "loss": 7.7985, + "step": 7100 + }, + { + "epoch": 2.55834829443447, + "grad_norm": 4.314262390136719, + "learning_rate": 2.9458680158859696e-05, + "loss": 7.8003, + "step": 7125 + }, + { + "epoch": 2.5673249551166966, + "grad_norm": 4.400716781616211, + "learning_rate": 2.9445079157826016e-05, + "loss": 7.8824, + "step": 7150 + }, + { + "epoch": 2.576301615798923, + "grad_norm": 3.732438325881958, + "learning_rate": 2.9431478156792342e-05, + "loss": 7.7859, + "step": 7175 + }, + { + "epoch": 2.585278276481149, + "grad_norm": 3.9268414974212646, + "learning_rate": 2.9417877155758665e-05, + "loss": 7.874, + "step": 7200 + }, + { + "epoch": 2.594254937163375, + "grad_norm": 4.375518798828125, + "learning_rate": 2.9404276154724988e-05, + "loss": 7.8417, + "step": 7225 + }, + { + "epoch": 2.6032315978456015, + "grad_norm": 4.266812324523926, + "learning_rate": 2.939067515369131e-05, + "loss": 7.7733, + "step": 7250 + }, + { + "epoch": 2.6122082585278275, + "grad_norm": 4.4456281661987305, + "learning_rate": 2.9377074152657637e-05, + "loss": 7.9548, + "step": 7275 + }, + { + "epoch": 2.621184919210054, + "grad_norm": 3.9390878677368164, + "learning_rate": 2.9363473151623963e-05, + "loss": 7.8277, + "step": 7300 + }, + { + "epoch": 2.63016157989228, + "grad_norm": 3.8264572620391846, + "learning_rate": 2.9349872150590283e-05, + "loss": 7.7803, + "step": 7325 + }, + { + "epoch": 2.639138240574506, + "grad_norm": 3.892812728881836, + "learning_rate": 2.933627114955661e-05, + "loss": 7.7675, + "step": 7350 + }, + { + "epoch": 2.6481149012567324, + "grad_norm": 4.1571431159973145, + "learning_rate": 2.9322670148522932e-05, + "loss": 7.7328, + "step": 7375 + }, + { + "epoch": 2.657091561938959, + "grad_norm": 3.909924030303955, + "learning_rate": 2.9309069147489255e-05, + "loss": 7.7999, + "step": 7400 + }, + { + "epoch": 2.6660682226211847, + "grad_norm": 4.378894329071045, + "learning_rate": 2.9295468146455578e-05, + "loss": 7.7471, + "step": 7425 + }, + { + "epoch": 2.675044883303411, + "grad_norm": 4.578962326049805, + "learning_rate": 2.9281867145421904e-05, + "loss": 7.8213, + "step": 7450 + }, + { + "epoch": 2.6840215439856374, + "grad_norm": 3.7791943550109863, + "learning_rate": 2.926826614438823e-05, + "loss": 7.7907, + "step": 7475 + }, + { + "epoch": 2.6929982046678633, + "grad_norm": 4.088997840881348, + "learning_rate": 2.925466514335455e-05, + "loss": 7.7568, + "step": 7500 + }, + { + "epoch": 2.7019748653500897, + "grad_norm": 4.145353317260742, + "learning_rate": 2.9241064142320876e-05, + "loss": 7.7953, + "step": 7525 + }, + { + "epoch": 2.710951526032316, + "grad_norm": 3.932884693145752, + "learning_rate": 2.92274631412872e-05, + "loss": 7.8174, + "step": 7550 + }, + { + "epoch": 2.719928186714542, + "grad_norm": 4.223902702331543, + "learning_rate": 2.9213862140253522e-05, + "loss": 7.7187, + "step": 7575 + }, + { + "epoch": 2.7289048473967683, + "grad_norm": 4.5789475440979, + "learning_rate": 2.920026113921985e-05, + "loss": 7.7759, + "step": 7600 + }, + { + "epoch": 2.7378815080789947, + "grad_norm": 3.8797249794006348, + "learning_rate": 2.918666013818617e-05, + "loss": 7.7303, + "step": 7625 + }, + { + "epoch": 2.7468581687612206, + "grad_norm": 3.972078561782837, + "learning_rate": 2.9173059137152494e-05, + "loss": 7.8359, + "step": 7650 + }, + { + "epoch": 2.755834829443447, + "grad_norm": 4.015561103820801, + "learning_rate": 2.9159458136118817e-05, + "loss": 7.7385, + "step": 7675 + }, + { + "epoch": 2.7648114901256733, + "grad_norm": 4.0610175132751465, + "learning_rate": 2.9145857135085144e-05, + "loss": 7.8182, + "step": 7700 + }, + { + "epoch": 2.773788150807899, + "grad_norm": 3.830951690673828, + "learning_rate": 2.9132256134051467e-05, + "loss": 7.815, + "step": 7725 + }, + { + "epoch": 2.7827648114901256, + "grad_norm": 4.563226222991943, + "learning_rate": 2.911865513301779e-05, + "loss": 7.6803, + "step": 7750 + }, + { + "epoch": 2.791741472172352, + "grad_norm": 3.881695032119751, + "learning_rate": 2.9105054131984116e-05, + "loss": 7.7342, + "step": 7775 + }, + { + "epoch": 2.800718132854578, + "grad_norm": 3.9463860988616943, + "learning_rate": 2.909145313095044e-05, + "loss": 7.7088, + "step": 7800 + }, + { + "epoch": 2.809694793536804, + "grad_norm": 4.120128631591797, + "learning_rate": 2.9077852129916762e-05, + "loss": 7.6627, + "step": 7825 + }, + { + "epoch": 2.8186714542190305, + "grad_norm": 4.3098859786987305, + "learning_rate": 2.9064251128883085e-05, + "loss": 7.6363, + "step": 7850 + }, + { + "epoch": 2.827648114901257, + "grad_norm": 4.071619987487793, + "learning_rate": 2.905065012784941e-05, + "loss": 7.6963, + "step": 7875 + }, + { + "epoch": 2.836624775583483, + "grad_norm": 3.8804428577423096, + "learning_rate": 2.9037049126815737e-05, + "loss": 7.6416, + "step": 7900 + }, + { + "epoch": 2.845601436265709, + "grad_norm": 4.083714485168457, + "learning_rate": 2.9023448125782057e-05, + "loss": 7.7278, + "step": 7925 + }, + { + "epoch": 2.8545780969479355, + "grad_norm": 4.240456581115723, + "learning_rate": 2.9009847124748383e-05, + "loss": 7.6262, + "step": 7950 + }, + { + "epoch": 2.8635547576301614, + "grad_norm": 4.835022449493408, + "learning_rate": 2.8996246123714706e-05, + "loss": 7.702, + "step": 7975 + }, + { + "epoch": 2.872531418312388, + "grad_norm": 4.663205146789551, + "learning_rate": 2.898264512268103e-05, + "loss": 7.6106, + "step": 8000 + }, + { + "epoch": 2.881508078994614, + "grad_norm": 4.244050979614258, + "learning_rate": 2.8969044121647352e-05, + "loss": 7.6543, + "step": 8025 + }, + { + "epoch": 2.89048473967684, + "grad_norm": 4.555705547332764, + "learning_rate": 2.8955443120613678e-05, + "loss": 7.6147, + "step": 8050 + }, + { + "epoch": 2.8994614003590664, + "grad_norm": 4.634610652923584, + "learning_rate": 2.8941842119580005e-05, + "loss": 7.6375, + "step": 8075 + }, + { + "epoch": 2.9084380610412928, + "grad_norm": 4.7310872077941895, + "learning_rate": 2.8928241118546324e-05, + "loss": 7.6672, + "step": 8100 + }, + { + "epoch": 2.917414721723519, + "grad_norm": 4.2090535163879395, + "learning_rate": 2.891464011751265e-05, + "loss": 7.6896, + "step": 8125 + }, + { + "epoch": 2.926391382405745, + "grad_norm": 4.26003885269165, + "learning_rate": 2.8901039116478973e-05, + "loss": 7.6139, + "step": 8150 + }, + { + "epoch": 2.9353680430879714, + "grad_norm": 3.9109861850738525, + "learning_rate": 2.8887438115445296e-05, + "loss": 7.6489, + "step": 8175 + }, + { + "epoch": 2.9443447037701977, + "grad_norm": 3.941312074661255, + "learning_rate": 2.887383711441162e-05, + "loss": 7.6729, + "step": 8200 + }, + { + "epoch": 2.9533213644524237, + "grad_norm": 4.32257080078125, + "learning_rate": 2.8860236113377946e-05, + "loss": 7.5527, + "step": 8225 + }, + { + "epoch": 2.96229802513465, + "grad_norm": 4.645498752593994, + "learning_rate": 2.8846635112344272e-05, + "loss": 7.629, + "step": 8250 + }, + { + "epoch": 2.9712746858168764, + "grad_norm": 9.202658653259277, + "learning_rate": 2.883303411131059e-05, + "loss": 7.7176, + "step": 8275 + }, + { + "epoch": 2.9802513464991023, + "grad_norm": 4.260191917419434, + "learning_rate": 2.8819433110276918e-05, + "loss": 7.6286, + "step": 8300 + }, + { + "epoch": 2.9892280071813286, + "grad_norm": 4.0278520584106445, + "learning_rate": 2.880583210924324e-05, + "loss": 7.6078, + "step": 8325 + }, + { + "epoch": 2.998204667863555, + "grad_norm": 4.7067036628723145, + "learning_rate": 2.8792231108209564e-05, + "loss": 7.5723, + "step": 8350 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.040364309854314245, + "eval_f1_macro": 0.0003713899839519505, + "eval_f1_micro": 0.040364309854314245, + "eval_f1_weighted": 0.009067297034397171, + "eval_loss": 7.923557758331299, + "eval_precision_macro": 0.0002917604484715947, + "eval_precision_micro": 0.040364309854314245, + "eval_precision_weighted": 0.006342323431274457, + "eval_recall_macro": 0.0015586664412088494, + "eval_recall_micro": 0.040364309854314245, + "eval_recall_weighted": 0.040364309854314245, + "eval_runtime": 85.908, + "eval_samples_per_second": 609.64, + "eval_steps_per_second": 9.533, + "step": 8355 + }, + { + "epoch": 3.007181328545781, + "grad_norm": 4.291841983795166, + "learning_rate": 2.877863010717589e-05, + "loss": 7.4826, + "step": 8375 + }, + { + "epoch": 3.0161579892280073, + "grad_norm": 4.364449977874756, + "learning_rate": 2.8765029106142213e-05, + "loss": 7.4131, + "step": 8400 + }, + { + "epoch": 3.025134649910233, + "grad_norm": 4.276158332824707, + "learning_rate": 2.875142810510854e-05, + "loss": 7.3721, + "step": 8425 + }, + { + "epoch": 3.0341113105924595, + "grad_norm": 4.518945217132568, + "learning_rate": 2.873782710407486e-05, + "loss": 7.4414, + "step": 8450 + }, + { + "epoch": 3.043087971274686, + "grad_norm": 4.2682576179504395, + "learning_rate": 2.8724226103041185e-05, + "loss": 7.3415, + "step": 8475 + }, + { + "epoch": 3.0520646319569122, + "grad_norm": 4.0463433265686035, + "learning_rate": 2.8710625102007508e-05, + "loss": 7.435, + "step": 8500 + }, + { + "epoch": 3.061041292639138, + "grad_norm": 4.511585712432861, + "learning_rate": 2.869702410097383e-05, + "loss": 7.2637, + "step": 8525 + }, + { + "epoch": 3.0700179533213645, + "grad_norm": 3.960149049758911, + "learning_rate": 2.8683423099940157e-05, + "loss": 7.4639, + "step": 8550 + }, + { + "epoch": 3.078994614003591, + "grad_norm": 3.955357074737549, + "learning_rate": 2.8670366138947826e-05, + "loss": 7.4204, + "step": 8575 + }, + { + "epoch": 3.087971274685817, + "grad_norm": 4.323599338531494, + "learning_rate": 2.8656765137914153e-05, + "loss": 7.405, + "step": 8600 + }, + { + "epoch": 3.096947935368043, + "grad_norm": 4.224911689758301, + "learning_rate": 2.8643164136880475e-05, + "loss": 7.3838, + "step": 8625 + }, + { + "epoch": 3.1059245960502695, + "grad_norm": 4.005449295043945, + "learning_rate": 2.86295631358468e-05, + "loss": 7.3923, + "step": 8650 + }, + { + "epoch": 3.1149012567324954, + "grad_norm": 4.911019325256348, + "learning_rate": 2.8615962134813125e-05, + "loss": 7.3235, + "step": 8675 + }, + { + "epoch": 3.1238779174147218, + "grad_norm": 4.210154056549072, + "learning_rate": 2.8602361133779444e-05, + "loss": 7.3662, + "step": 8700 + }, + { + "epoch": 3.132854578096948, + "grad_norm": 4.508319854736328, + "learning_rate": 2.858876013274577e-05, + "loss": 7.3389, + "step": 8725 + }, + { + "epoch": 3.141831238779174, + "grad_norm": 4.50676155090332, + "learning_rate": 2.8575159131712097e-05, + "loss": 7.369, + "step": 8750 + }, + { + "epoch": 3.1508078994614004, + "grad_norm": 4.309554100036621, + "learning_rate": 2.856155813067842e-05, + "loss": 7.333, + "step": 8775 + }, + { + "epoch": 3.1597845601436267, + "grad_norm": 4.3068389892578125, + "learning_rate": 2.8547957129644743e-05, + "loss": 7.294, + "step": 8800 + }, + { + "epoch": 3.1687612208258527, + "grad_norm": 3.9945571422576904, + "learning_rate": 2.8534356128611066e-05, + "loss": 7.3287, + "step": 8825 + }, + { + "epoch": 3.177737881508079, + "grad_norm": 4.984664440155029, + "learning_rate": 2.8520755127577392e-05, + "loss": 7.3824, + "step": 8850 + }, + { + "epoch": 3.1867145421903054, + "grad_norm": 4.480121612548828, + "learning_rate": 2.850715412654371e-05, + "loss": 7.245, + "step": 8875 + }, + { + "epoch": 3.1956912028725313, + "grad_norm": 4.436404705047607, + "learning_rate": 2.8493553125510038e-05, + "loss": 7.2902, + "step": 8900 + }, + { + "epoch": 3.2046678635547576, + "grad_norm": 4.16496467590332, + "learning_rate": 2.8479952124476364e-05, + "loss": 7.3412, + "step": 8925 + }, + { + "epoch": 3.213644524236984, + "grad_norm": 4.932197570800781, + "learning_rate": 2.8466351123442687e-05, + "loss": 7.3339, + "step": 8950 + }, + { + "epoch": 3.22262118491921, + "grad_norm": 4.341710090637207, + "learning_rate": 2.845275012240901e-05, + "loss": 7.3247, + "step": 8975 + }, + { + "epoch": 3.2315978456014363, + "grad_norm": 4.222483158111572, + "learning_rate": 2.8439149121375333e-05, + "loss": 7.2293, + "step": 9000 + }, + { + "epoch": 3.2405745062836626, + "grad_norm": 4.777403831481934, + "learning_rate": 2.842554812034166e-05, + "loss": 7.3215, + "step": 9025 + }, + { + "epoch": 3.2495511669658885, + "grad_norm": 4.994255542755127, + "learning_rate": 2.841194711930798e-05, + "loss": 7.3197, + "step": 9050 + }, + { + "epoch": 3.258527827648115, + "grad_norm": 4.174838066101074, + "learning_rate": 2.8398346118274305e-05, + "loss": 7.4234, + "step": 9075 + }, + { + "epoch": 3.2675044883303412, + "grad_norm": 5.492510795593262, + "learning_rate": 2.838474511724063e-05, + "loss": 7.3188, + "step": 9100 + }, + { + "epoch": 3.276481149012567, + "grad_norm": 4.795655727386475, + "learning_rate": 2.8371144116206954e-05, + "loss": 7.3201, + "step": 9125 + }, + { + "epoch": 3.2854578096947935, + "grad_norm": 4.100697994232178, + "learning_rate": 2.8357543115173277e-05, + "loss": 7.2567, + "step": 9150 + }, + { + "epoch": 3.29443447037702, + "grad_norm": 4.36522912979126, + "learning_rate": 2.83439421141396e-05, + "loss": 7.1949, + "step": 9175 + }, + { + "epoch": 3.3034111310592458, + "grad_norm": 4.485873699188232, + "learning_rate": 2.8330341113105927e-05, + "loss": 7.3359, + "step": 9200 + }, + { + "epoch": 3.312387791741472, + "grad_norm": 4.570251941680908, + "learning_rate": 2.831674011207225e-05, + "loss": 7.2541, + "step": 9225 + }, + { + "epoch": 3.3213644524236985, + "grad_norm": 4.160237789154053, + "learning_rate": 2.8303139111038572e-05, + "loss": 7.2323, + "step": 9250 + }, + { + "epoch": 3.3303411131059244, + "grad_norm": 4.046338081359863, + "learning_rate": 2.82895381100049e-05, + "loss": 7.3472, + "step": 9275 + }, + { + "epoch": 3.3393177737881508, + "grad_norm": 5.035393714904785, + "learning_rate": 2.827593710897122e-05, + "loss": 7.2552, + "step": 9300 + }, + { + "epoch": 3.348294434470377, + "grad_norm": 4.273747444152832, + "learning_rate": 2.8262336107937545e-05, + "loss": 7.1619, + "step": 9325 + }, + { + "epoch": 3.357271095152603, + "grad_norm": 4.130581855773926, + "learning_rate": 2.8248735106903868e-05, + "loss": 7.1638, + "step": 9350 + }, + { + "epoch": 3.3662477558348294, + "grad_norm": 5.46106481552124, + "learning_rate": 2.8235134105870194e-05, + "loss": 7.2968, + "step": 9375 + }, + { + "epoch": 3.3752244165170557, + "grad_norm": 4.929064750671387, + "learning_rate": 2.8221533104836517e-05, + "loss": 7.1596, + "step": 9400 + }, + { + "epoch": 3.3842010771992816, + "grad_norm": 4.462962627410889, + "learning_rate": 2.820793210380284e-05, + "loss": 7.2969, + "step": 9425 + }, + { + "epoch": 3.393177737881508, + "grad_norm": 6.100955963134766, + "learning_rate": 2.8194331102769166e-05, + "loss": 7.1979, + "step": 9450 + }, + { + "epoch": 3.4021543985637344, + "grad_norm": 5.617452144622803, + "learning_rate": 2.8180730101735486e-05, + "loss": 7.2018, + "step": 9475 + }, + { + "epoch": 3.4111310592459603, + "grad_norm": 4.639012336730957, + "learning_rate": 2.8167129100701812e-05, + "loss": 7.2263, + "step": 9500 + }, + { + "epoch": 3.4201077199281866, + "grad_norm": 4.075222015380859, + "learning_rate": 2.8153528099668138e-05, + "loss": 7.3157, + "step": 9525 + }, + { + "epoch": 3.429084380610413, + "grad_norm": 4.12158727645874, + "learning_rate": 2.813992709863446e-05, + "loss": 7.2863, + "step": 9550 + }, + { + "epoch": 3.438061041292639, + "grad_norm": 4.648802280426025, + "learning_rate": 2.8126326097600784e-05, + "loss": 7.171, + "step": 9575 + }, + { + "epoch": 3.4470377019748653, + "grad_norm": 5.154353141784668, + "learning_rate": 2.8112725096567107e-05, + "loss": 7.2409, + "step": 9600 + }, + { + "epoch": 3.4560143626570916, + "grad_norm": 4.7442803382873535, + "learning_rate": 2.8099124095533433e-05, + "loss": 7.2396, + "step": 9625 + }, + { + "epoch": 3.464991023339318, + "grad_norm": 5.436051845550537, + "learning_rate": 2.8085523094499753e-05, + "loss": 7.2338, + "step": 9650 + }, + { + "epoch": 3.473967684021544, + "grad_norm": 4.913558483123779, + "learning_rate": 2.807192209346608e-05, + "loss": 7.1864, + "step": 9675 + }, + { + "epoch": 3.4829443447037702, + "grad_norm": 4.733788967132568, + "learning_rate": 2.8058321092432405e-05, + "loss": 7.1857, + "step": 9700 + }, + { + "epoch": 3.4919210053859966, + "grad_norm": 4.321293354034424, + "learning_rate": 2.804472009139873e-05, + "loss": 7.2584, + "step": 9725 + }, + { + "epoch": 3.5008976660682225, + "grad_norm": 4.390983581542969, + "learning_rate": 2.803111909036505e-05, + "loss": 7.272, + "step": 9750 + }, + { + "epoch": 3.509874326750449, + "grad_norm": 4.587342262268066, + "learning_rate": 2.8017518089331374e-05, + "loss": 7.1649, + "step": 9775 + }, + { + "epoch": 3.5188509874326748, + "grad_norm": 4.806514263153076, + "learning_rate": 2.80039170882977e-05, + "loss": 7.1172, + "step": 9800 + }, + { + "epoch": 3.527827648114901, + "grad_norm": 4.270833492279053, + "learning_rate": 2.7990316087264023e-05, + "loss": 7.2049, + "step": 9825 + }, + { + "epoch": 3.5368043087971275, + "grad_norm": 4.865777492523193, + "learning_rate": 2.7976715086230346e-05, + "loss": 7.321, + "step": 9850 + }, + { + "epoch": 3.545780969479354, + "grad_norm": 4.647130489349365, + "learning_rate": 2.7963114085196673e-05, + "loss": 7.0734, + "step": 9875 + }, + { + "epoch": 3.5547576301615798, + "grad_norm": 4.620852947235107, + "learning_rate": 2.7949513084162996e-05, + "loss": 7.0816, + "step": 9900 + }, + { + "epoch": 3.563734290843806, + "grad_norm": 4.2177863121032715, + "learning_rate": 2.793591208312932e-05, + "loss": 7.1292, + "step": 9925 + }, + { + "epoch": 3.5727109515260325, + "grad_norm": 5.09830904006958, + "learning_rate": 2.792231108209564e-05, + "loss": 7.146, + "step": 9950 + }, + { + "epoch": 3.5816876122082584, + "grad_norm": 5.468635082244873, + "learning_rate": 2.7908710081061968e-05, + "loss": 7.0981, + "step": 9975 + }, + { + "epoch": 3.5906642728904847, + "grad_norm": 4.4557390213012695, + "learning_rate": 2.789510908002829e-05, + "loss": 7.1775, + "step": 10000 + }, + { + "epoch": 3.599640933572711, + "grad_norm": 4.696282863616943, + "learning_rate": 2.7881508078994614e-05, + "loss": 7.1346, + "step": 10025 + }, + { + "epoch": 3.608617594254937, + "grad_norm": 4.4554901123046875, + "learning_rate": 2.786790707796094e-05, + "loss": 7.1048, + "step": 10050 + }, + { + "epoch": 3.6175942549371634, + "grad_norm": 5.411306381225586, + "learning_rate": 2.7854306076927263e-05, + "loss": 7.1101, + "step": 10075 + }, + { + "epoch": 3.6265709156193897, + "grad_norm": 4.448919296264648, + "learning_rate": 2.7840705075893586e-05, + "loss": 7.1116, + "step": 10100 + }, + { + "epoch": 3.635547576301616, + "grad_norm": 4.631777763366699, + "learning_rate": 2.782710407485991e-05, + "loss": 7.1396, + "step": 10125 + }, + { + "epoch": 3.644524236983842, + "grad_norm": 5.649587154388428, + "learning_rate": 2.7813503073826235e-05, + "loss": 7.0888, + "step": 10150 + }, + { + "epoch": 3.6535008976660683, + "grad_norm": 4.744529724121094, + "learning_rate": 2.7799902072792558e-05, + "loss": 7.1399, + "step": 10175 + }, + { + "epoch": 3.6624775583482947, + "grad_norm": 5.389572620391846, + "learning_rate": 2.778630107175888e-05, + "loss": 7.1214, + "step": 10200 + }, + { + "epoch": 3.6714542190305206, + "grad_norm": 4.516997814178467, + "learning_rate": 2.7772700070725207e-05, + "loss": 7.1284, + "step": 10225 + }, + { + "epoch": 3.680430879712747, + "grad_norm": 4.594954967498779, + "learning_rate": 2.775909906969153e-05, + "loss": 7.0725, + "step": 10250 + }, + { + "epoch": 3.6894075403949733, + "grad_norm": 4.272472858428955, + "learning_rate": 2.7745498068657853e-05, + "loss": 7.1381, + "step": 10275 + }, + { + "epoch": 3.6983842010771992, + "grad_norm": 4.584362030029297, + "learning_rate": 2.773189706762418e-05, + "loss": 7.1608, + "step": 10300 + }, + { + "epoch": 3.7073608617594256, + "grad_norm": 4.476674556732178, + "learning_rate": 2.7718296066590502e-05, + "loss": 7.2127, + "step": 10325 + }, + { + "epoch": 3.716337522441652, + "grad_norm": 4.72322416305542, + "learning_rate": 2.7704695065556825e-05, + "loss": 7.158, + "step": 10350 + }, + { + "epoch": 3.725314183123878, + "grad_norm": 4.130901336669922, + "learning_rate": 2.7691094064523148e-05, + "loss": 7.0735, + "step": 10375 + }, + { + "epoch": 3.734290843806104, + "grad_norm": 5.266684055328369, + "learning_rate": 2.7677493063489475e-05, + "loss": 7.0583, + "step": 10400 + }, + { + "epoch": 3.7432675044883306, + "grad_norm": 5.526418209075928, + "learning_rate": 2.7663892062455797e-05, + "loss": 7.0519, + "step": 10425 + }, + { + "epoch": 3.7522441651705565, + "grad_norm": 4.7698469161987305, + "learning_rate": 2.765029106142212e-05, + "loss": 7.0843, + "step": 10450 + }, + { + "epoch": 3.761220825852783, + "grad_norm": 4.691744327545166, + "learning_rate": 2.7636690060388447e-05, + "loss": 7.1258, + "step": 10475 + }, + { + "epoch": 3.770197486535009, + "grad_norm": 4.733063220977783, + "learning_rate": 2.762308905935477e-05, + "loss": 7.1145, + "step": 10500 + }, + { + "epoch": 3.779174147217235, + "grad_norm": 5.544365406036377, + "learning_rate": 2.7609488058321093e-05, + "loss": 7.0392, + "step": 10525 + }, + { + "epoch": 3.7881508078994615, + "grad_norm": 4.6468682289123535, + "learning_rate": 2.7595887057287416e-05, + "loss": 7.0265, + "step": 10550 + }, + { + "epoch": 3.797127468581688, + "grad_norm": 4.756863594055176, + "learning_rate": 2.7582286056253742e-05, + "loss": 7.1827, + "step": 10575 + }, + { + "epoch": 3.8061041292639137, + "grad_norm": 4.464404582977295, + "learning_rate": 2.7568685055220065e-05, + "loss": 7.1633, + "step": 10600 + }, + { + "epoch": 3.81508078994614, + "grad_norm": 4.956221580505371, + "learning_rate": 2.7555084054186388e-05, + "loss": 7.1148, + "step": 10625 + }, + { + "epoch": 3.8240574506283664, + "grad_norm": 5.12930154800415, + "learning_rate": 2.7541483053152714e-05, + "loss": 6.9735, + "step": 10650 + }, + { + "epoch": 3.8330341113105924, + "grad_norm": 4.422974109649658, + "learning_rate": 2.7527882052119037e-05, + "loss": 7.1054, + "step": 10675 + }, + { + "epoch": 3.8420107719928187, + "grad_norm": 4.57315731048584, + "learning_rate": 2.751428105108536e-05, + "loss": 6.9588, + "step": 10700 + }, + { + "epoch": 3.850987432675045, + "grad_norm": 5.3082990646362305, + "learning_rate": 2.7500680050051683e-05, + "loss": 7.0921, + "step": 10725 + }, + { + "epoch": 3.859964093357271, + "grad_norm": 4.260403156280518, + "learning_rate": 2.748707904901801e-05, + "loss": 7.1037, + "step": 10750 + }, + { + "epoch": 3.8689407540394973, + "grad_norm": 4.527919292449951, + "learning_rate": 2.7473478047984335e-05, + "loss": 6.9412, + "step": 10775 + }, + { + "epoch": 3.8779174147217237, + "grad_norm": 4.931026458740234, + "learning_rate": 2.7459877046950655e-05, + "loss": 6.9359, + "step": 10800 + }, + { + "epoch": 3.8868940754039496, + "grad_norm": 4.717371940612793, + "learning_rate": 2.744627604591698e-05, + "loss": 6.9542, + "step": 10825 + }, + { + "epoch": 3.895870736086176, + "grad_norm": 4.613494873046875, + "learning_rate": 2.7432675044883304e-05, + "loss": 7.0741, + "step": 10850 + }, + { + "epoch": 3.9048473967684023, + "grad_norm": 4.484718322753906, + "learning_rate": 2.7419618083890973e-05, + "loss": 7.103, + "step": 10875 + }, + { + "epoch": 3.9138240574506282, + "grad_norm": 6.357881546020508, + "learning_rate": 2.74060170828573e-05, + "loss": 6.9778, + "step": 10900 + }, + { + "epoch": 3.9228007181328546, + "grad_norm": 4.754181861877441, + "learning_rate": 2.7392416081823623e-05, + "loss": 7.0823, + "step": 10925 + }, + { + "epoch": 3.931777378815081, + "grad_norm": 5.122907638549805, + "learning_rate": 2.737881508078995e-05, + "loss": 7.159, + "step": 10950 + }, + { + "epoch": 3.940754039497307, + "grad_norm": 4.739684104919434, + "learning_rate": 2.7365758119797618e-05, + "loss": 7.1159, + "step": 10975 + }, + { + "epoch": 3.949730700179533, + "grad_norm": 4.732143402099609, + "learning_rate": 2.735215711876394e-05, + "loss": 7.0376, + "step": 11000 + }, + { + "epoch": 3.9587073608617596, + "grad_norm": 4.990660190582275, + "learning_rate": 2.7338556117730267e-05, + "loss": 7.0048, + "step": 11025 + }, + { + "epoch": 3.9676840215439855, + "grad_norm": 4.788618564605713, + "learning_rate": 2.7324955116696587e-05, + "loss": 6.9456, + "step": 11050 + }, + { + "epoch": 3.976660682226212, + "grad_norm": 5.532341957092285, + "learning_rate": 2.7311354115662913e-05, + "loss": 7.0403, + "step": 11075 + }, + { + "epoch": 3.985637342908438, + "grad_norm": 5.019996166229248, + "learning_rate": 2.729775311462924e-05, + "loss": 6.9608, + "step": 11100 + }, + { + "epoch": 3.994614003590664, + "grad_norm": 4.57565975189209, + "learning_rate": 2.7284152113595562e-05, + "loss": 7.0322, + "step": 11125 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.0535963187138411, + "eval_f1_macro": 0.0008651252073441486, + "eval_f1_micro": 0.0535963187138411, + "eval_f1_weighted": 0.015602527088430167, + "eval_loss": 7.477319717407227, + "eval_precision_macro": 0.000753322983638723, + "eval_precision_micro": 0.0535963187138411, + "eval_precision_weighted": 0.011619975640680912, + "eval_recall_macro": 0.002864996644090035, + "eval_recall_micro": 0.0535963187138411, + "eval_recall_weighted": 0.0535963187138411, + "eval_runtime": 85.675, + "eval_samples_per_second": 611.298, + "eval_steps_per_second": 9.559, + "step": 11140 + }, + { + "epoch": 4.003590664272891, + "grad_norm": 4.810396671295166, + "learning_rate": 2.7270551112561885e-05, + "loss": 6.9089, + "step": 11150 + }, + { + "epoch": 4.012567324955117, + "grad_norm": 5.361721992492676, + "learning_rate": 2.7256950111528208e-05, + "loss": 6.6726, + "step": 11175 + }, + { + "epoch": 4.021543985637343, + "grad_norm": 4.60791015625, + "learning_rate": 2.7243349110494534e-05, + "loss": 6.8238, + "step": 11200 + }, + { + "epoch": 4.0305206463195695, + "grad_norm": 4.379905700683594, + "learning_rate": 2.7229748109460857e-05, + "loss": 6.7936, + "step": 11225 + }, + { + "epoch": 4.039497307001795, + "grad_norm": 4.7019124031066895, + "learning_rate": 2.721614710842718e-05, + "loss": 6.7282, + "step": 11250 + }, + { + "epoch": 4.048473967684021, + "grad_norm": 4.940536975860596, + "learning_rate": 2.7202546107393507e-05, + "loss": 6.8201, + "step": 11275 + }, + { + "epoch": 4.057450628366248, + "grad_norm": 4.110478401184082, + "learning_rate": 2.7188945106359826e-05, + "loss": 6.7566, + "step": 11300 + }, + { + "epoch": 4.066427289048474, + "grad_norm": 4.422680854797363, + "learning_rate": 2.7175344105326152e-05, + "loss": 6.7298, + "step": 11325 + }, + { + "epoch": 4.0754039497307, + "grad_norm": 4.569235801696777, + "learning_rate": 2.7161743104292475e-05, + "loss": 6.7008, + "step": 11350 + }, + { + "epoch": 4.084380610412927, + "grad_norm": 4.897010803222656, + "learning_rate": 2.7148142103258802e-05, + "loss": 6.6552, + "step": 11375 + }, + { + "epoch": 4.093357271095153, + "grad_norm": 4.791720390319824, + "learning_rate": 2.7134541102225125e-05, + "loss": 6.6918, + "step": 11400 + }, + { + "epoch": 4.102333931777379, + "grad_norm": 5.308394908905029, + "learning_rate": 2.7120940101191448e-05, + "loss": 6.9076, + "step": 11425 + }, + { + "epoch": 4.111310592459605, + "grad_norm": 5.037841320037842, + "learning_rate": 2.7107339100157774e-05, + "loss": 6.735, + "step": 11450 + }, + { + "epoch": 4.120287253141831, + "grad_norm": 5.336513996124268, + "learning_rate": 2.7093738099124093e-05, + "loss": 6.7777, + "step": 11475 + }, + { + "epoch": 4.129263913824057, + "grad_norm": 4.943955898284912, + "learning_rate": 2.708013709809042e-05, + "loss": 6.8673, + "step": 11500 + }, + { + "epoch": 4.138240574506284, + "grad_norm": 5.733214378356934, + "learning_rate": 2.7066536097056746e-05, + "loss": 6.8006, + "step": 11525 + }, + { + "epoch": 4.14721723518851, + "grad_norm": 5.24770975112915, + "learning_rate": 2.705293509602307e-05, + "loss": 6.6862, + "step": 11550 + }, + { + "epoch": 4.156193895870736, + "grad_norm": 4.939833641052246, + "learning_rate": 2.7039334094989392e-05, + "loss": 6.6744, + "step": 11575 + }, + { + "epoch": 4.165170556552963, + "grad_norm": 4.50258207321167, + "learning_rate": 2.7025733093955715e-05, + "loss": 6.8506, + "step": 11600 + }, + { + "epoch": 4.174147217235189, + "grad_norm": 5.146048545837402, + "learning_rate": 2.701213209292204e-05, + "loss": 6.7481, + "step": 11625 + }, + { + "epoch": 4.1831238779174145, + "grad_norm": 5.90827751159668, + "learning_rate": 2.699853109188836e-05, + "loss": 6.8362, + "step": 11650 + }, + { + "epoch": 4.192100538599641, + "grad_norm": 4.529995918273926, + "learning_rate": 2.6984930090854687e-05, + "loss": 6.7737, + "step": 11675 + }, + { + "epoch": 4.201077199281867, + "grad_norm": 5.020559787750244, + "learning_rate": 2.6971329089821013e-05, + "loss": 6.8785, + "step": 11700 + }, + { + "epoch": 4.210053859964093, + "grad_norm": 5.17188835144043, + "learning_rate": 2.6957728088787336e-05, + "loss": 6.8051, + "step": 11725 + }, + { + "epoch": 4.21903052064632, + "grad_norm": 4.9098219871521, + "learning_rate": 2.694412708775366e-05, + "loss": 6.7541, + "step": 11750 + }, + { + "epoch": 4.228007181328546, + "grad_norm": 5.259347915649414, + "learning_rate": 2.6930526086719982e-05, + "loss": 6.738, + "step": 11775 + }, + { + "epoch": 4.236983842010772, + "grad_norm": 4.867190837860107, + "learning_rate": 2.691692508568631e-05, + "loss": 6.7314, + "step": 11800 + }, + { + "epoch": 4.2459605026929985, + "grad_norm": 5.05924129486084, + "learning_rate": 2.6903324084652628e-05, + "loss": 6.7043, + "step": 11825 + }, + { + "epoch": 4.254937163375224, + "grad_norm": 5.422475814819336, + "learning_rate": 2.6889723083618954e-05, + "loss": 6.7425, + "step": 11850 + }, + { + "epoch": 4.26391382405745, + "grad_norm": 4.785470962524414, + "learning_rate": 2.687612208258528e-05, + "loss": 6.71, + "step": 11875 + }, + { + "epoch": 4.272890484739677, + "grad_norm": 5.0549798011779785, + "learning_rate": 2.6862521081551604e-05, + "loss": 6.7477, + "step": 11900 + }, + { + "epoch": 4.281867145421903, + "grad_norm": 4.992685317993164, + "learning_rate": 2.6848920080517926e-05, + "loss": 6.7135, + "step": 11925 + }, + { + "epoch": 4.290843806104129, + "grad_norm": 4.637200832366943, + "learning_rate": 2.683531907948425e-05, + "loss": 6.7139, + "step": 11950 + }, + { + "epoch": 4.299820466786356, + "grad_norm": 4.825590133666992, + "learning_rate": 2.6821718078450576e-05, + "loss": 6.698, + "step": 11975 + }, + { + "epoch": 4.308797127468582, + "grad_norm": 4.806486129760742, + "learning_rate": 2.68081170774169e-05, + "loss": 6.7397, + "step": 12000 + }, + { + "epoch": 4.317773788150808, + "grad_norm": 4.935576915740967, + "learning_rate": 2.679451607638322e-05, + "loss": 6.727, + "step": 12025 + }, + { + "epoch": 4.326750448833034, + "grad_norm": 5.050081253051758, + "learning_rate": 2.6780915075349548e-05, + "loss": 6.645, + "step": 12050 + }, + { + "epoch": 4.33572710951526, + "grad_norm": 5.2789225578308105, + "learning_rate": 2.676731407431587e-05, + "loss": 6.673, + "step": 12075 + }, + { + "epoch": 4.344703770197486, + "grad_norm": 4.885463714599609, + "learning_rate": 2.6753713073282194e-05, + "loss": 6.7153, + "step": 12100 + }, + { + "epoch": 4.353680430879713, + "grad_norm": 5.462228775024414, + "learning_rate": 2.6740112072248517e-05, + "loss": 6.6611, + "step": 12125 + }, + { + "epoch": 4.362657091561939, + "grad_norm": 5.114634990692139, + "learning_rate": 2.6726511071214843e-05, + "loss": 6.6893, + "step": 12150 + }, + { + "epoch": 4.371633752244165, + "grad_norm": 4.541257381439209, + "learning_rate": 2.6712910070181166e-05, + "loss": 6.7365, + "step": 12175 + }, + { + "epoch": 4.380610412926392, + "grad_norm": 5.211061000823975, + "learning_rate": 2.669930906914749e-05, + "loss": 6.7419, + "step": 12200 + }, + { + "epoch": 4.3895870736086176, + "grad_norm": 4.9504852294921875, + "learning_rate": 2.6685708068113815e-05, + "loss": 6.6139, + "step": 12225 + }, + { + "epoch": 4.3985637342908435, + "grad_norm": 5.869256496429443, + "learning_rate": 2.6672107067080138e-05, + "loss": 6.6851, + "step": 12250 + }, + { + "epoch": 4.40754039497307, + "grad_norm": 5.472086429595947, + "learning_rate": 2.665850606604646e-05, + "loss": 6.7047, + "step": 12275 + }, + { + "epoch": 4.416517055655296, + "grad_norm": 5.531244277954102, + "learning_rate": 2.6644905065012787e-05, + "loss": 6.5862, + "step": 12300 + }, + { + "epoch": 4.425493716337522, + "grad_norm": 4.993152141571045, + "learning_rate": 2.663130406397911e-05, + "loss": 6.767, + "step": 12325 + }, + { + "epoch": 4.434470377019749, + "grad_norm": 5.301982402801514, + "learning_rate": 2.6617703062945433e-05, + "loss": 6.6788, + "step": 12350 + }, + { + "epoch": 4.443447037701975, + "grad_norm": 4.631206512451172, + "learning_rate": 2.6604102061911756e-05, + "loss": 6.6905, + "step": 12375 + }, + { + "epoch": 4.452423698384201, + "grad_norm": 5.317607402801514, + "learning_rate": 2.6590501060878082e-05, + "loss": 6.6364, + "step": 12400 + }, + { + "epoch": 4.4614003590664275, + "grad_norm": 5.503232002258301, + "learning_rate": 2.6576900059844405e-05, + "loss": 6.7201, + "step": 12425 + }, + { + "epoch": 4.470377019748653, + "grad_norm": 4.654670238494873, + "learning_rate": 2.6563299058810728e-05, + "loss": 6.6884, + "step": 12450 + }, + { + "epoch": 4.479353680430879, + "grad_norm": 5.257537841796875, + "learning_rate": 2.6549698057777055e-05, + "loss": 6.7215, + "step": 12475 + }, + { + "epoch": 4.488330341113106, + "grad_norm": 5.210605621337891, + "learning_rate": 2.6536097056743378e-05, + "loss": 6.6522, + "step": 12500 + }, + { + "epoch": 4.497307001795332, + "grad_norm": 5.283504486083984, + "learning_rate": 2.65224960557097e-05, + "loss": 6.7188, + "step": 12525 + }, + { + "epoch": 4.506283662477558, + "grad_norm": 4.933950424194336, + "learning_rate": 2.6508895054676023e-05, + "loss": 6.6864, + "step": 12550 + }, + { + "epoch": 4.515260323159785, + "grad_norm": 5.506487846374512, + "learning_rate": 2.649529405364235e-05, + "loss": 6.6419, + "step": 12575 + }, + { + "epoch": 4.524236983842011, + "grad_norm": 5.170058250427246, + "learning_rate": 2.6481693052608673e-05, + "loss": 6.6441, + "step": 12600 + }, + { + "epoch": 4.533213644524237, + "grad_norm": 5.326727390289307, + "learning_rate": 2.6468092051574996e-05, + "loss": 6.6739, + "step": 12625 + }, + { + "epoch": 4.542190305206463, + "grad_norm": 4.882199764251709, + "learning_rate": 2.6454491050541322e-05, + "loss": 6.6873, + "step": 12650 + }, + { + "epoch": 4.551166965888689, + "grad_norm": 4.9388885498046875, + "learning_rate": 2.6440890049507645e-05, + "loss": 6.7409, + "step": 12675 + }, + { + "epoch": 4.560143626570916, + "grad_norm": 4.774771213531494, + "learning_rate": 2.6427289048473968e-05, + "loss": 6.7148, + "step": 12700 + }, + { + "epoch": 4.569120287253142, + "grad_norm": 5.204039096832275, + "learning_rate": 2.641368804744029e-05, + "loss": 6.683, + "step": 12725 + }, + { + "epoch": 4.578096947935368, + "grad_norm": 5.481900691986084, + "learning_rate": 2.6400087046406617e-05, + "loss": 6.6945, + "step": 12750 + }, + { + "epoch": 4.587073608617594, + "grad_norm": 5.369918346405029, + "learning_rate": 2.6386486045372943e-05, + "loss": 6.6928, + "step": 12775 + }, + { + "epoch": 4.596050269299821, + "grad_norm": 4.932347297668457, + "learning_rate": 2.6372885044339263e-05, + "loss": 6.623, + "step": 12800 + }, + { + "epoch": 4.6050269299820465, + "grad_norm": 5.369187355041504, + "learning_rate": 2.635928404330559e-05, + "loss": 6.6083, + "step": 12825 + }, + { + "epoch": 4.614003590664273, + "grad_norm": 4.608354091644287, + "learning_rate": 2.6345683042271912e-05, + "loss": 6.5989, + "step": 12850 + }, + { + "epoch": 4.622980251346499, + "grad_norm": 5.258671760559082, + "learning_rate": 2.6332082041238235e-05, + "loss": 6.6337, + "step": 12875 + }, + { + "epoch": 4.631956912028725, + "grad_norm": 4.928924083709717, + "learning_rate": 2.6318481040204558e-05, + "loss": 6.6841, + "step": 12900 + }, + { + "epoch": 4.640933572710951, + "grad_norm": 5.073070526123047, + "learning_rate": 2.6304880039170884e-05, + "loss": 6.6259, + "step": 12925 + }, + { + "epoch": 4.649910233393178, + "grad_norm": 5.602537155151367, + "learning_rate": 2.6291279038137207e-05, + "loss": 6.7736, + "step": 12950 + }, + { + "epoch": 4.658886894075404, + "grad_norm": 4.999927520751953, + "learning_rate": 2.627767803710353e-05, + "loss": 6.6233, + "step": 12975 + }, + { + "epoch": 4.667863554757631, + "grad_norm": 5.143951892852783, + "learning_rate": 2.6264077036069856e-05, + "loss": 6.6963, + "step": 13000 + }, + { + "epoch": 4.6768402154398565, + "grad_norm": 5.203060626983643, + "learning_rate": 2.625047603503618e-05, + "loss": 6.6785, + "step": 13025 + }, + { + "epoch": 4.685816876122082, + "grad_norm": 5.3349289894104, + "learning_rate": 2.6236875034002502e-05, + "loss": 6.6298, + "step": 13050 + }, + { + "epoch": 4.694793536804308, + "grad_norm": 6.17996883392334, + "learning_rate": 2.622327403296883e-05, + "loss": 6.5807, + "step": 13075 + }, + { + "epoch": 4.703770197486535, + "grad_norm": 5.6811628341674805, + "learning_rate": 2.620967303193515e-05, + "loss": 6.6519, + "step": 13100 + }, + { + "epoch": 4.712746858168761, + "grad_norm": 5.4090166091918945, + "learning_rate": 2.6196072030901474e-05, + "loss": 6.6567, + "step": 13125 + }, + { + "epoch": 4.721723518850988, + "grad_norm": 5.64863395690918, + "learning_rate": 2.6182471029867797e-05, + "loss": 6.727, + "step": 13150 + }, + { + "epoch": 4.730700179533214, + "grad_norm": 5.539173126220703, + "learning_rate": 2.6168870028834124e-05, + "loss": 6.5315, + "step": 13175 + }, + { + "epoch": 4.73967684021544, + "grad_norm": 5.546090126037598, + "learning_rate": 2.6155269027800447e-05, + "loss": 6.6293, + "step": 13200 + }, + { + "epoch": 4.748653500897666, + "grad_norm": 5.386585712432861, + "learning_rate": 2.614166802676677e-05, + "loss": 6.5927, + "step": 13225 + }, + { + "epoch": 4.757630161579892, + "grad_norm": 5.196156978607178, + "learning_rate": 2.6128067025733096e-05, + "loss": 6.6879, + "step": 13250 + }, + { + "epoch": 4.766606822262118, + "grad_norm": 5.108584403991699, + "learning_rate": 2.611446602469942e-05, + "loss": 6.5423, + "step": 13275 + }, + { + "epoch": 4.775583482944345, + "grad_norm": 6.006952285766602, + "learning_rate": 2.6100865023665742e-05, + "loss": 6.4593, + "step": 13300 + }, + { + "epoch": 4.784560143626571, + "grad_norm": 5.575499534606934, + "learning_rate": 2.6087264022632065e-05, + "loss": 6.6071, + "step": 13325 + }, + { + "epoch": 4.793536804308797, + "grad_norm": 6.095732688903809, + "learning_rate": 2.607366302159839e-05, + "loss": 6.5975, + "step": 13350 + }, + { + "epoch": 4.802513464991024, + "grad_norm": 5.288640975952148, + "learning_rate": 2.6060062020564714e-05, + "loss": 6.5856, + "step": 13375 + }, + { + "epoch": 4.81149012567325, + "grad_norm": 5.813202381134033, + "learning_rate": 2.6046461019531037e-05, + "loss": 6.5931, + "step": 13400 + }, + { + "epoch": 4.8204667863554755, + "grad_norm": 5.142013072967529, + "learning_rate": 2.6032860018497363e-05, + "loss": 6.657, + "step": 13425 + }, + { + "epoch": 4.829443447037702, + "grad_norm": 7.107198715209961, + "learning_rate": 2.6019259017463686e-05, + "loss": 6.4158, + "step": 13450 + }, + { + "epoch": 4.838420107719928, + "grad_norm": 5.339532852172852, + "learning_rate": 2.600565801643001e-05, + "loss": 6.5691, + "step": 13475 + }, + { + "epoch": 4.847396768402154, + "grad_norm": 5.03562593460083, + "learning_rate": 2.5992057015396332e-05, + "loss": 6.5863, + "step": 13500 + }, + { + "epoch": 4.856373429084381, + "grad_norm": 5.4362969398498535, + "learning_rate": 2.5978456014362658e-05, + "loss": 6.6099, + "step": 13525 + }, + { + "epoch": 4.865350089766607, + "grad_norm": 5.3372907638549805, + "learning_rate": 2.5964855013328985e-05, + "loss": 6.6796, + "step": 13550 + }, + { + "epoch": 4.874326750448833, + "grad_norm": 5.250656604766846, + "learning_rate": 2.5951254012295304e-05, + "loss": 6.536, + "step": 13575 + }, + { + "epoch": 4.88330341113106, + "grad_norm": 5.196676254272461, + "learning_rate": 2.593765301126163e-05, + "loss": 6.5428, + "step": 13600 + }, + { + "epoch": 4.8922800718132855, + "grad_norm": 5.402597427368164, + "learning_rate": 2.5924052010227953e-05, + "loss": 6.6571, + "step": 13625 + }, + { + "epoch": 4.901256732495511, + "grad_norm": 5.131245136260986, + "learning_rate": 2.5910451009194276e-05, + "loss": 6.5993, + "step": 13650 + }, + { + "epoch": 4.910233393177738, + "grad_norm": 5.219388008117676, + "learning_rate": 2.58968500081606e-05, + "loss": 6.5978, + "step": 13675 + }, + { + "epoch": 4.919210053859964, + "grad_norm": 5.173649311065674, + "learning_rate": 2.5883249007126926e-05, + "loss": 6.6292, + "step": 13700 + }, + { + "epoch": 4.92818671454219, + "grad_norm": 5.420453071594238, + "learning_rate": 2.5869648006093252e-05, + "loss": 6.5967, + "step": 13725 + }, + { + "epoch": 4.937163375224417, + "grad_norm": 5.249734878540039, + "learning_rate": 2.5856591045100918e-05, + "loss": 6.6466, + "step": 13750 + }, + { + "epoch": 4.946140035906643, + "grad_norm": 5.484232425689697, + "learning_rate": 2.5842990044067244e-05, + "loss": 6.6097, + "step": 13775 + }, + { + "epoch": 4.955116696588869, + "grad_norm": 5.411509037017822, + "learning_rate": 2.582938904303357e-05, + "loss": 6.5168, + "step": 13800 + }, + { + "epoch": 4.9640933572710955, + "grad_norm": 5.186717987060547, + "learning_rate": 2.581578804199989e-05, + "loss": 6.6326, + "step": 13825 + }, + { + "epoch": 4.973070017953321, + "grad_norm": 5.17860221862793, + "learning_rate": 2.5802187040966216e-05, + "loss": 6.5666, + "step": 13850 + }, + { + "epoch": 4.982046678635547, + "grad_norm": 5.576063632965088, + "learning_rate": 2.578858603993254e-05, + "loss": 6.6427, + "step": 13875 + }, + { + "epoch": 4.991023339317774, + "grad_norm": 5.810356616973877, + "learning_rate": 2.5774985038898865e-05, + "loss": 6.5841, + "step": 13900 + }, + { + "epoch": 5.0, + "grad_norm": 5.1273956298828125, + "learning_rate": 2.5761384037865188e-05, + "loss": 6.463, + "step": 13925 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.06415519447043323, + "eval_f1_macro": 0.0017639510002836409, + "eval_f1_micro": 0.06415519447043323, + "eval_f1_weighted": 0.02387528577143177, + "eval_loss": 7.1647257804870605, + "eval_precision_macro": 0.0016218596853953499, + "eval_precision_micro": 0.06415519447043323, + "eval_precision_weighted": 0.019212849932021898, + "eval_recall_macro": 0.004534374725064504, + "eval_recall_micro": 0.06415519447043323, + "eval_recall_weighted": 0.06415519447043323, + "eval_runtime": 85.6551, + "eval_samples_per_second": 611.441, + "eval_steps_per_second": 9.562, + "step": 13925 + }, + { + "epoch": 5.008976660682226, + "grad_norm": 4.756453514099121, + "learning_rate": 2.574778303683151e-05, + "loss": 6.3293, + "step": 13950 + }, + { + "epoch": 5.017953321364453, + "grad_norm": 5.295063018798828, + "learning_rate": 2.5734182035797837e-05, + "loss": 6.2634, + "step": 13975 + }, + { + "epoch": 5.026929982046679, + "grad_norm": 5.207920074462891, + "learning_rate": 2.5720581034764157e-05, + "loss": 6.3088, + "step": 14000 + }, + { + "epoch": 5.0359066427289045, + "grad_norm": 5.293393135070801, + "learning_rate": 2.5706980033730483e-05, + "loss": 6.2692, + "step": 14025 + }, + { + "epoch": 5.044883303411131, + "grad_norm": 4.975879669189453, + "learning_rate": 2.5693379032696806e-05, + "loss": 6.3305, + "step": 14050 + }, + { + "epoch": 5.053859964093357, + "grad_norm": 5.425548076629639, + "learning_rate": 2.5679778031663133e-05, + "loss": 6.3423, + "step": 14075 + }, + { + "epoch": 5.062836624775583, + "grad_norm": 5.644728183746338, + "learning_rate": 2.5666177030629455e-05, + "loss": 6.2995, + "step": 14100 + }, + { + "epoch": 5.07181328545781, + "grad_norm": 5.252628803253174, + "learning_rate": 2.565257602959578e-05, + "loss": 6.4325, + "step": 14125 + }, + { + "epoch": 5.080789946140036, + "grad_norm": 5.8973164558410645, + "learning_rate": 2.5638975028562105e-05, + "loss": 6.3701, + "step": 14150 + }, + { + "epoch": 5.089766606822262, + "grad_norm": 6.239661693572998, + "learning_rate": 2.5625374027528424e-05, + "loss": 6.3396, + "step": 14175 + }, + { + "epoch": 5.098743267504489, + "grad_norm": 5.662258625030518, + "learning_rate": 2.561177302649475e-05, + "loss": 6.2999, + "step": 14200 + }, + { + "epoch": 5.1077199281867145, + "grad_norm": 5.256962299346924, + "learning_rate": 2.5598172025461074e-05, + "loss": 6.2109, + "step": 14225 + }, + { + "epoch": 5.11669658886894, + "grad_norm": 5.537407875061035, + "learning_rate": 2.55845710244274e-05, + "loss": 6.2665, + "step": 14250 + }, + { + "epoch": 5.125673249551167, + "grad_norm": 5.025729656219482, + "learning_rate": 2.5570970023393723e-05, + "loss": 6.3329, + "step": 14275 + }, + { + "epoch": 5.134649910233393, + "grad_norm": 5.487298488616943, + "learning_rate": 2.5557369022360046e-05, + "loss": 6.3146, + "step": 14300 + }, + { + "epoch": 5.143626570915619, + "grad_norm": 5.395374774932861, + "learning_rate": 2.5543768021326372e-05, + "loss": 6.2854, + "step": 14325 + }, + { + "epoch": 5.152603231597846, + "grad_norm": 5.130805492401123, + "learning_rate": 2.553016702029269e-05, + "loss": 6.3169, + "step": 14350 + }, + { + "epoch": 5.161579892280072, + "grad_norm": 6.2107014656066895, + "learning_rate": 2.5516566019259018e-05, + "loss": 6.4094, + "step": 14375 + }, + { + "epoch": 5.170556552962298, + "grad_norm": 5.773256301879883, + "learning_rate": 2.5502965018225344e-05, + "loss": 6.4208, + "step": 14400 + }, + { + "epoch": 5.1795332136445245, + "grad_norm": 7.502089977264404, + "learning_rate": 2.5489364017191667e-05, + "loss": 6.31, + "step": 14425 + }, + { + "epoch": 5.18850987432675, + "grad_norm": 4.814985275268555, + "learning_rate": 2.547576301615799e-05, + "loss": 6.2723, + "step": 14450 + }, + { + "epoch": 5.197486535008976, + "grad_norm": 5.640610694885254, + "learning_rate": 2.5462162015124313e-05, + "loss": 6.2076, + "step": 14475 + }, + { + "epoch": 5.206463195691203, + "grad_norm": 5.890365123748779, + "learning_rate": 2.544856101409064e-05, + "loss": 6.3299, + "step": 14500 + }, + { + "epoch": 5.215439856373429, + "grad_norm": 6.371877193450928, + "learning_rate": 2.543496001305696e-05, + "loss": 6.2514, + "step": 14525 + }, + { + "epoch": 5.224416517055655, + "grad_norm": 5.170773029327393, + "learning_rate": 2.5421359012023285e-05, + "loss": 6.2677, + "step": 14550 + }, + { + "epoch": 5.233393177737882, + "grad_norm": 5.94767951965332, + "learning_rate": 2.540775801098961e-05, + "loss": 6.3077, + "step": 14575 + }, + { + "epoch": 5.242369838420108, + "grad_norm": 5.372101306915283, + "learning_rate": 2.5394157009955934e-05, + "loss": 6.3495, + "step": 14600 + }, + { + "epoch": 5.2513464991023335, + "grad_norm": 4.869545936584473, + "learning_rate": 2.5380556008922257e-05, + "loss": 6.3959, + "step": 14625 + }, + { + "epoch": 5.26032315978456, + "grad_norm": 6.060793399810791, + "learning_rate": 2.536695500788858e-05, + "loss": 6.2705, + "step": 14650 + }, + { + "epoch": 5.269299820466786, + "grad_norm": 5.262942790985107, + "learning_rate": 2.5353354006854907e-05, + "loss": 6.2128, + "step": 14675 + }, + { + "epoch": 5.278276481149012, + "grad_norm": 5.637872219085693, + "learning_rate": 2.533975300582123e-05, + "loss": 6.3334, + "step": 14700 + }, + { + "epoch": 5.287253141831239, + "grad_norm": 5.676011562347412, + "learning_rate": 2.5326152004787552e-05, + "loss": 6.271, + "step": 14725 + }, + { + "epoch": 5.296229802513465, + "grad_norm": 6.002972602844238, + "learning_rate": 2.531255100375388e-05, + "loss": 6.1934, + "step": 14750 + }, + { + "epoch": 5.305206463195692, + "grad_norm": 5.8074798583984375, + "learning_rate": 2.5298950002720198e-05, + "loss": 6.2997, + "step": 14775 + }, + { + "epoch": 5.314183123877918, + "grad_norm": 5.46445369720459, + "learning_rate": 2.5285349001686525e-05, + "loss": 6.3365, + "step": 14800 + }, + { + "epoch": 5.3231597845601435, + "grad_norm": 5.858017444610596, + "learning_rate": 2.5271748000652848e-05, + "loss": 6.138, + "step": 14825 + }, + { + "epoch": 5.332136445242369, + "grad_norm": 5.6020073890686035, + "learning_rate": 2.5258146999619174e-05, + "loss": 6.2538, + "step": 14850 + }, + { + "epoch": 5.341113105924596, + "grad_norm": 6.0575127601623535, + "learning_rate": 2.5244545998585497e-05, + "loss": 6.3292, + "step": 14875 + }, + { + "epoch": 5.350089766606822, + "grad_norm": 5.455022811889648, + "learning_rate": 2.523094499755182e-05, + "loss": 6.2699, + "step": 14900 + }, + { + "epoch": 5.359066427289049, + "grad_norm": 6.06628942489624, + "learning_rate": 2.5217343996518146e-05, + "loss": 6.2243, + "step": 14925 + }, + { + "epoch": 5.368043087971275, + "grad_norm": 5.760193347930908, + "learning_rate": 2.5203742995484466e-05, + "loss": 6.3414, + "step": 14950 + }, + { + "epoch": 5.377019748653501, + "grad_norm": 6.125321388244629, + "learning_rate": 2.5190141994450792e-05, + "loss": 6.1872, + "step": 14975 + }, + { + "epoch": 5.385996409335727, + "grad_norm": 6.62969446182251, + "learning_rate": 2.5176540993417115e-05, + "loss": 6.2625, + "step": 15000 + }, + { + "epoch": 5.3949730700179535, + "grad_norm": 5.933910369873047, + "learning_rate": 2.516293999238344e-05, + "loss": 6.2701, + "step": 15025 + }, + { + "epoch": 5.403949730700179, + "grad_norm": 5.867855072021484, + "learning_rate": 2.5149338991349764e-05, + "loss": 6.2534, + "step": 15050 + }, + { + "epoch": 5.412926391382406, + "grad_norm": 5.391022205352783, + "learning_rate": 2.5135737990316087e-05, + "loss": 6.2907, + "step": 15075 + }, + { + "epoch": 5.421903052064632, + "grad_norm": 6.104460716247559, + "learning_rate": 2.5122136989282413e-05, + "loss": 6.3503, + "step": 15100 + }, + { + "epoch": 5.430879712746858, + "grad_norm": 5.619187831878662, + "learning_rate": 2.5108535988248733e-05, + "loss": 6.3671, + "step": 15125 + }, + { + "epoch": 5.439856373429085, + "grad_norm": 6.143537998199463, + "learning_rate": 2.509493498721506e-05, + "loss": 6.2403, + "step": 15150 + }, + { + "epoch": 5.448833034111311, + "grad_norm": 5.967230796813965, + "learning_rate": 2.5081333986181385e-05, + "loss": 6.219, + "step": 15175 + }, + { + "epoch": 5.457809694793537, + "grad_norm": 5.538968086242676, + "learning_rate": 2.506773298514771e-05, + "loss": 6.3469, + "step": 15200 + }, + { + "epoch": 5.466786355475763, + "grad_norm": 5.721219539642334, + "learning_rate": 2.505413198411403e-05, + "loss": 6.3591, + "step": 15225 + }, + { + "epoch": 5.475763016157989, + "grad_norm": 5.89268159866333, + "learning_rate": 2.5040530983080354e-05, + "loss": 6.3421, + "step": 15250 + }, + { + "epoch": 5.484739676840215, + "grad_norm": 5.549192428588867, + "learning_rate": 2.502692998204668e-05, + "loss": 6.2005, + "step": 15275 + }, + { + "epoch": 5.493716337522442, + "grad_norm": 5.588681221008301, + "learning_rate": 2.5013328981013e-05, + "loss": 6.2821, + "step": 15300 + }, + { + "epoch": 5.502692998204668, + "grad_norm": 5.735651016235352, + "learning_rate": 2.4999727979979326e-05, + "loss": 6.3793, + "step": 15325 + }, + { + "epoch": 5.511669658886894, + "grad_norm": 5.585961818695068, + "learning_rate": 2.4986126978945653e-05, + "loss": 6.2269, + "step": 15350 + }, + { + "epoch": 5.520646319569121, + "grad_norm": 6.181479454040527, + "learning_rate": 2.4972525977911976e-05, + "loss": 6.2209, + "step": 15375 + }, + { + "epoch": 5.529622980251347, + "grad_norm": 5.356082916259766, + "learning_rate": 2.49589249768783e-05, + "loss": 6.224, + "step": 15400 + }, + { + "epoch": 5.5385996409335725, + "grad_norm": 5.908627033233643, + "learning_rate": 2.494532397584462e-05, + "loss": 6.2517, + "step": 15425 + }, + { + "epoch": 5.547576301615799, + "grad_norm": 4.9644927978515625, + "learning_rate": 2.4931722974810948e-05, + "loss": 6.3259, + "step": 15450 + }, + { + "epoch": 5.556552962298025, + "grad_norm": 5.580685138702393, + "learning_rate": 2.491812197377727e-05, + "loss": 6.326, + "step": 15475 + }, + { + "epoch": 5.565529622980251, + "grad_norm": 5.565394878387451, + "learning_rate": 2.4904520972743594e-05, + "loss": 6.3379, + "step": 15500 + }, + { + "epoch": 5.574506283662478, + "grad_norm": 5.581964492797852, + "learning_rate": 2.489091997170992e-05, + "loss": 6.1481, + "step": 15525 + }, + { + "epoch": 5.583482944344704, + "grad_norm": 5.441143035888672, + "learning_rate": 2.4877318970676243e-05, + "loss": 6.1909, + "step": 15550 + }, + { + "epoch": 5.59245960502693, + "grad_norm": 5.888316631317139, + "learning_rate": 2.4863717969642566e-05, + "loss": 6.3157, + "step": 15575 + }, + { + "epoch": 5.6014362657091565, + "grad_norm": 5.89539909362793, + "learning_rate": 2.485011696860889e-05, + "loss": 6.1538, + "step": 15600 + }, + { + "epoch": 5.6104129263913824, + "grad_norm": 5.8886399269104, + "learning_rate": 2.4836515967575215e-05, + "loss": 6.2123, + "step": 15625 + }, + { + "epoch": 5.619389587073608, + "grad_norm": 5.497063636779785, + "learning_rate": 2.4822914966541538e-05, + "loss": 6.3159, + "step": 15650 + }, + { + "epoch": 5.628366247755835, + "grad_norm": 5.9336323738098145, + "learning_rate": 2.480931396550786e-05, + "loss": 6.1855, + "step": 15675 + }, + { + "epoch": 5.637342908438061, + "grad_norm": 6.318659782409668, + "learning_rate": 2.4795712964474187e-05, + "loss": 6.2227, + "step": 15700 + }, + { + "epoch": 5.646319569120287, + "grad_norm": 6.2130327224731445, + "learning_rate": 2.478211196344051e-05, + "loss": 6.2743, + "step": 15725 + }, + { + "epoch": 5.655296229802514, + "grad_norm": 5.517823219299316, + "learning_rate": 2.4768510962406833e-05, + "loss": 6.2002, + "step": 15750 + }, + { + "epoch": 5.66427289048474, + "grad_norm": 6.00836181640625, + "learning_rate": 2.4754909961373156e-05, + "loss": 6.2853, + "step": 15775 + }, + { + "epoch": 5.673249551166966, + "grad_norm": 6.160185813903809, + "learning_rate": 2.4741308960339482e-05, + "loss": 6.2896, + "step": 15800 + }, + { + "epoch": 5.682226211849192, + "grad_norm": 5.46562385559082, + "learning_rate": 2.4727707959305805e-05, + "loss": 6.2434, + "step": 15825 + }, + { + "epoch": 5.691202872531418, + "grad_norm": 5.78077507019043, + "learning_rate": 2.4714106958272128e-05, + "loss": 6.2761, + "step": 15850 + }, + { + "epoch": 5.700179533213644, + "grad_norm": 6.718456268310547, + "learning_rate": 2.4700505957238455e-05, + "loss": 6.1318, + "step": 15875 + }, + { + "epoch": 5.709156193895871, + "grad_norm": 5.776496410369873, + "learning_rate": 2.4686904956204778e-05, + "loss": 6.237, + "step": 15900 + }, + { + "epoch": 5.718132854578097, + "grad_norm": 6.068448543548584, + "learning_rate": 2.46733039551711e-05, + "loss": 6.2444, + "step": 15925 + }, + { + "epoch": 5.727109515260323, + "grad_norm": 5.443953037261963, + "learning_rate": 2.4659702954137427e-05, + "loss": 6.2708, + "step": 15950 + }, + { + "epoch": 5.73608617594255, + "grad_norm": 5.471415996551514, + "learning_rate": 2.464610195310375e-05, + "loss": 6.2917, + "step": 15975 + }, + { + "epoch": 5.745062836624776, + "grad_norm": 5.76008415222168, + "learning_rate": 2.4632500952070073e-05, + "loss": 6.223, + "step": 16000 + }, + { + "epoch": 5.7540394973070015, + "grad_norm": 6.015965461730957, + "learning_rate": 2.4618899951036396e-05, + "loss": 6.2477, + "step": 16025 + }, + { + "epoch": 5.763016157989228, + "grad_norm": 6.855495929718018, + "learning_rate": 2.4605298950002722e-05, + "loss": 6.2995, + "step": 16050 + }, + { + "epoch": 5.771992818671454, + "grad_norm": 6.23684549331665, + "learning_rate": 2.4591697948969045e-05, + "loss": 6.1955, + "step": 16075 + }, + { + "epoch": 5.78096947935368, + "grad_norm": 6.927788257598877, + "learning_rate": 2.4578096947935368e-05, + "loss": 6.2508, + "step": 16100 + }, + { + "epoch": 5.789946140035907, + "grad_norm": 5.425616264343262, + "learning_rate": 2.4564495946901694e-05, + "loss": 6.2155, + "step": 16125 + }, + { + "epoch": 5.798922800718133, + "grad_norm": 5.135979175567627, + "learning_rate": 2.4550894945868017e-05, + "loss": 6.2943, + "step": 16150 + }, + { + "epoch": 5.807899461400359, + "grad_norm": 6.088143348693848, + "learning_rate": 2.453729394483434e-05, + "loss": 6.3113, + "step": 16175 + }, + { + "epoch": 5.8168761220825855, + "grad_norm": 5.345020294189453, + "learning_rate": 2.4523692943800663e-05, + "loss": 6.2453, + "step": 16200 + }, + { + "epoch": 5.825852782764811, + "grad_norm": 5.7146830558776855, + "learning_rate": 2.451009194276699e-05, + "loss": 6.3495, + "step": 16225 + }, + { + "epoch": 5.834829443447037, + "grad_norm": 6.323094844818115, + "learning_rate": 2.4496490941733315e-05, + "loss": 6.2242, + "step": 16250 + }, + { + "epoch": 5.843806104129264, + "grad_norm": 5.723015785217285, + "learning_rate": 2.4482889940699635e-05, + "loss": 6.3133, + "step": 16275 + }, + { + "epoch": 5.85278276481149, + "grad_norm": 5.654751777648926, + "learning_rate": 2.446928893966596e-05, + "loss": 6.2004, + "step": 16300 + }, + { + "epoch": 5.861759425493716, + "grad_norm": 6.224421501159668, + "learning_rate": 2.4455687938632284e-05, + "loss": 6.2426, + "step": 16325 + }, + { + "epoch": 5.870736086175943, + "grad_norm": 6.2358880043029785, + "learning_rate": 2.4442086937598607e-05, + "loss": 6.2076, + "step": 16350 + }, + { + "epoch": 5.879712746858169, + "grad_norm": 5.774993419647217, + "learning_rate": 2.442848593656493e-05, + "loss": 6.211, + "step": 16375 + }, + { + "epoch": 5.888689407540395, + "grad_norm": 5.289491176605225, + "learning_rate": 2.4414884935531256e-05, + "loss": 6.1686, + "step": 16400 + }, + { + "epoch": 5.897666068222621, + "grad_norm": 5.795011520385742, + "learning_rate": 2.440128393449758e-05, + "loss": 6.2364, + "step": 16425 + }, + { + "epoch": 5.906642728904847, + "grad_norm": 5.870504379272461, + "learning_rate": 2.4387682933463902e-05, + "loss": 6.174, + "step": 16450 + }, + { + "epoch": 5.915619389587073, + "grad_norm": 6.114706516265869, + "learning_rate": 2.437408193243023e-05, + "loss": 6.2479, + "step": 16475 + }, + { + "epoch": 5.9245960502693, + "grad_norm": 6.081083297729492, + "learning_rate": 2.436048093139655e-05, + "loss": 6.2723, + "step": 16500 + }, + { + "epoch": 5.933572710951526, + "grad_norm": 5.542026996612549, + "learning_rate": 2.4346879930362874e-05, + "loss": 6.1785, + "step": 16525 + }, + { + "epoch": 5.942549371633753, + "grad_norm": 5.569460868835449, + "learning_rate": 2.4333822969370547e-05, + "loss": 6.2639, + "step": 16550 + }, + { + "epoch": 5.951526032315979, + "grad_norm": 5.733129024505615, + "learning_rate": 2.432022196833687e-05, + "loss": 6.2204, + "step": 16575 + }, + { + "epoch": 5.960502692998205, + "grad_norm": 6.0980916023254395, + "learning_rate": 2.4306620967303193e-05, + "loss": 6.221, + "step": 16600 + }, + { + "epoch": 5.9694793536804305, + "grad_norm": 6.039352893829346, + "learning_rate": 2.4293019966269516e-05, + "loss": 6.2628, + "step": 16625 + }, + { + "epoch": 5.978456014362657, + "grad_norm": 7.105417251586914, + "learning_rate": 2.4279418965235842e-05, + "loss": 6.1462, + "step": 16650 + }, + { + "epoch": 5.987432675044883, + "grad_norm": 5.547228813171387, + "learning_rate": 2.426581796420217e-05, + "loss": 6.2557, + "step": 16675 + }, + { + "epoch": 5.99640933572711, + "grad_norm": 5.769441604614258, + "learning_rate": 2.4252216963168488e-05, + "loss": 6.1922, + "step": 16700 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.07032249441506119, + "eval_f1_macro": 0.0026891349378826736, + "eval_f1_micro": 0.07032249441506119, + "eval_f1_weighted": 0.029982431848483405, + "eval_loss": 6.9314866065979, + "eval_precision_macro": 0.0024956283473523497, + "eval_precision_micro": 0.07032249441506119, + "eval_precision_weighted": 0.024441655701922964, + "eval_recall_macro": 0.006120353258283244, + "eval_recall_micro": 0.07032249441506119, + "eval_recall_weighted": 0.07032249441506119, + "eval_runtime": 85.1401, + "eval_samples_per_second": 615.139, + "eval_steps_per_second": 9.619, + "step": 16710 + }, + { + "epoch": 6.005385996409336, + "grad_norm": 6.2654218673706055, + "learning_rate": 2.4238615962134814e-05, + "loss": 6.0267, + "step": 16725 + }, + { + "epoch": 6.014362657091562, + "grad_norm": 6.292488098144531, + "learning_rate": 2.4225014961101137e-05, + "loss": 5.9813, + "step": 16750 + }, + { + "epoch": 6.023339317773788, + "grad_norm": 5.38007116317749, + "learning_rate": 2.421141396006746e-05, + "loss": 5.8201, + "step": 16775 + }, + { + "epoch": 6.0323159784560145, + "grad_norm": 5.788445472717285, + "learning_rate": 2.4197812959033786e-05, + "loss": 5.8281, + "step": 16800 + }, + { + "epoch": 6.04129263913824, + "grad_norm": 5.553605079650879, + "learning_rate": 2.418421195800011e-05, + "loss": 5.9868, + "step": 16825 + }, + { + "epoch": 6.050269299820466, + "grad_norm": 6.312150478363037, + "learning_rate": 2.4170610956966436e-05, + "loss": 5.9261, + "step": 16850 + }, + { + "epoch": 6.059245960502693, + "grad_norm": 5.773979187011719, + "learning_rate": 2.4157009955932755e-05, + "loss": 5.9544, + "step": 16875 + }, + { + "epoch": 6.068222621184919, + "grad_norm": 5.81102991104126, + "learning_rate": 2.414340895489908e-05, + "loss": 5.8732, + "step": 16900 + }, + { + "epoch": 6.077199281867145, + "grad_norm": 5.83776330947876, + "learning_rate": 2.4129807953865404e-05, + "loss": 6.1245, + "step": 16925 + }, + { + "epoch": 6.086175942549372, + "grad_norm": 6.065564155578613, + "learning_rate": 2.4116206952831727e-05, + "loss": 6.0469, + "step": 16950 + }, + { + "epoch": 6.095152603231598, + "grad_norm": 6.709592342376709, + "learning_rate": 2.4102605951798054e-05, + "loss": 5.8605, + "step": 16975 + }, + { + "epoch": 6.1041292639138245, + "grad_norm": 6.311851978302002, + "learning_rate": 2.4089004950764377e-05, + "loss": 5.9424, + "step": 17000 + }, + { + "epoch": 6.11310592459605, + "grad_norm": 5.825013637542725, + "learning_rate": 2.4075403949730703e-05, + "loss": 5.9604, + "step": 17025 + }, + { + "epoch": 6.122082585278276, + "grad_norm": 6.239871501922607, + "learning_rate": 2.4061802948697022e-05, + "loss": 5.9286, + "step": 17050 + }, + { + "epoch": 6.131059245960503, + "grad_norm": 5.891648292541504, + "learning_rate": 2.404820194766335e-05, + "loss": 6.0732, + "step": 17075 + }, + { + "epoch": 6.140035906642729, + "grad_norm": 6.239706039428711, + "learning_rate": 2.4034600946629675e-05, + "loss": 5.8824, + "step": 17100 + }, + { + "epoch": 6.149012567324955, + "grad_norm": 6.323997497558594, + "learning_rate": 2.4020999945595995e-05, + "loss": 5.8209, + "step": 17125 + }, + { + "epoch": 6.157989228007182, + "grad_norm": 6.753236293792725, + "learning_rate": 2.400739894456232e-05, + "loss": 5.8846, + "step": 17150 + }, + { + "epoch": 6.166965888689408, + "grad_norm": 6.048013210296631, + "learning_rate": 2.3993797943528644e-05, + "loss": 5.9736, + "step": 17175 + }, + { + "epoch": 6.175942549371634, + "grad_norm": 5.691511154174805, + "learning_rate": 2.398019694249497e-05, + "loss": 5.9151, + "step": 17200 + }, + { + "epoch": 6.18491921005386, + "grad_norm": 5.416627883911133, + "learning_rate": 2.396659594146129e-05, + "loss": 5.9826, + "step": 17225 + }, + { + "epoch": 6.193895870736086, + "grad_norm": 6.720509052276611, + "learning_rate": 2.3952994940427616e-05, + "loss": 5.952, + "step": 17250 + }, + { + "epoch": 6.202872531418312, + "grad_norm": 6.703721523284912, + "learning_rate": 2.3939393939393942e-05, + "loss": 5.9134, + "step": 17275 + }, + { + "epoch": 6.211849192100539, + "grad_norm": 5.694454193115234, + "learning_rate": 2.3925792938360262e-05, + "loss": 5.8913, + "step": 17300 + }, + { + "epoch": 6.220825852782765, + "grad_norm": 6.364493370056152, + "learning_rate": 2.3912191937326588e-05, + "loss": 5.978, + "step": 17325 + }, + { + "epoch": 6.229802513464991, + "grad_norm": 5.665753364562988, + "learning_rate": 2.389859093629291e-05, + "loss": 5.965, + "step": 17350 + }, + { + "epoch": 6.238779174147218, + "grad_norm": 5.858243942260742, + "learning_rate": 2.3884989935259237e-05, + "loss": 6.0104, + "step": 17375 + }, + { + "epoch": 6.2477558348294435, + "grad_norm": 6.382242679595947, + "learning_rate": 2.387138893422556e-05, + "loss": 5.9541, + "step": 17400 + }, + { + "epoch": 6.256732495511669, + "grad_norm": 6.411990165710449, + "learning_rate": 2.3857787933191883e-05, + "loss": 5.9669, + "step": 17425 + }, + { + "epoch": 6.265709156193896, + "grad_norm": 6.114449501037598, + "learning_rate": 2.384418693215821e-05, + "loss": 5.8677, + "step": 17450 + }, + { + "epoch": 6.274685816876122, + "grad_norm": 6.533391952514648, + "learning_rate": 2.383058593112453e-05, + "loss": 5.938, + "step": 17475 + }, + { + "epoch": 6.283662477558348, + "grad_norm": 8.52158260345459, + "learning_rate": 2.3816984930090855e-05, + "loss": 5.9483, + "step": 17500 + }, + { + "epoch": 6.292639138240575, + "grad_norm": 6.580171585083008, + "learning_rate": 2.380338392905718e-05, + "loss": 5.9841, + "step": 17525 + }, + { + "epoch": 6.301615798922801, + "grad_norm": 5.69027853012085, + "learning_rate": 2.3789782928023505e-05, + "loss": 5.925, + "step": 17550 + }, + { + "epoch": 6.310592459605027, + "grad_norm": 5.6233601570129395, + "learning_rate": 2.3776181926989828e-05, + "loss": 5.9669, + "step": 17575 + }, + { + "epoch": 6.3195691202872535, + "grad_norm": 6.2371745109558105, + "learning_rate": 2.376258092595615e-05, + "loss": 5.8904, + "step": 17600 + }, + { + "epoch": 6.328545780969479, + "grad_norm": 5.969923496246338, + "learning_rate": 2.3748979924922477e-05, + "loss": 5.8635, + "step": 17625 + }, + { + "epoch": 6.337522441651705, + "grad_norm": 6.581711769104004, + "learning_rate": 2.3735378923888796e-05, + "loss": 5.8705, + "step": 17650 + }, + { + "epoch": 6.346499102333932, + "grad_norm": 7.706914901733398, + "learning_rate": 2.3721777922855123e-05, + "loss": 5.8823, + "step": 17675 + }, + { + "epoch": 6.355475763016158, + "grad_norm": 5.880135536193848, + "learning_rate": 2.3708176921821446e-05, + "loss": 6.025, + "step": 17700 + }, + { + "epoch": 6.364452423698384, + "grad_norm": 6.610002517700195, + "learning_rate": 2.3694575920787772e-05, + "loss": 5.8736, + "step": 17725 + }, + { + "epoch": 6.373429084380611, + "grad_norm": 6.272444725036621, + "learning_rate": 2.3680974919754095e-05, + "loss": 5.9971, + "step": 17750 + }, + { + "epoch": 6.382405745062837, + "grad_norm": 5.924382209777832, + "learning_rate": 2.3667373918720418e-05, + "loss": 6.0, + "step": 17775 + }, + { + "epoch": 6.391382405745063, + "grad_norm": 6.688093185424805, + "learning_rate": 2.3653772917686744e-05, + "loss": 6.016, + "step": 17800 + }, + { + "epoch": 6.400359066427289, + "grad_norm": 6.366824626922607, + "learning_rate": 2.3640171916653064e-05, + "loss": 5.9389, + "step": 17825 + }, + { + "epoch": 6.409335727109515, + "grad_norm": 7.547670841217041, + "learning_rate": 2.362657091561939e-05, + "loss": 5.9739, + "step": 17850 + }, + { + "epoch": 6.418312387791741, + "grad_norm": 6.187005519866943, + "learning_rate": 2.3612969914585716e-05, + "loss": 5.9683, + "step": 17875 + }, + { + "epoch": 6.427289048473968, + "grad_norm": 6.719017028808594, + "learning_rate": 2.359936891355204e-05, + "loss": 6.0247, + "step": 17900 + }, + { + "epoch": 6.436265709156194, + "grad_norm": 5.512848377227783, + "learning_rate": 2.3585767912518362e-05, + "loss": 5.9771, + "step": 17925 + }, + { + "epoch": 6.44524236983842, + "grad_norm": 6.735719203948975, + "learning_rate": 2.3572166911484685e-05, + "loss": 5.9676, + "step": 17950 + }, + { + "epoch": 6.454219030520647, + "grad_norm": 6.028948783874512, + "learning_rate": 2.355856591045101e-05, + "loss": 5.9454, + "step": 17975 + }, + { + "epoch": 6.4631956912028725, + "grad_norm": 6.55318021774292, + "learning_rate": 2.354496490941733e-05, + "loss": 5.953, + "step": 18000 + }, + { + "epoch": 6.472172351885098, + "grad_norm": 6.120448112487793, + "learning_rate": 2.3531363908383657e-05, + "loss": 6.0133, + "step": 18025 + }, + { + "epoch": 6.481149012567325, + "grad_norm": 5.909543514251709, + "learning_rate": 2.3517762907349984e-05, + "loss": 5.8594, + "step": 18050 + }, + { + "epoch": 6.490125673249551, + "grad_norm": 6.231213092803955, + "learning_rate": 2.3504161906316307e-05, + "loss": 6.0328, + "step": 18075 + }, + { + "epoch": 6.499102333931777, + "grad_norm": 6.281095504760742, + "learning_rate": 2.349056090528263e-05, + "loss": 6.034, + "step": 18100 + }, + { + "epoch": 6.508078994614004, + "grad_norm": 6.382711410522461, + "learning_rate": 2.3476959904248952e-05, + "loss": 5.969, + "step": 18125 + }, + { + "epoch": 6.51705565529623, + "grad_norm": 7.322602272033691, + "learning_rate": 2.346335890321528e-05, + "loss": 5.9423, + "step": 18150 + }, + { + "epoch": 6.526032315978456, + "grad_norm": 6.3949055671691895, + "learning_rate": 2.34497579021816e-05, + "loss": 5.8968, + "step": 18175 + }, + { + "epoch": 6.5350089766606825, + "grad_norm": 6.037936210632324, + "learning_rate": 2.3436156901147925e-05, + "loss": 5.9819, + "step": 18200 + }, + { + "epoch": 6.543985637342908, + "grad_norm": 7.0256876945495605, + "learning_rate": 2.342255590011425e-05, + "loss": 5.993, + "step": 18225 + }, + { + "epoch": 6.552962298025134, + "grad_norm": 6.5712175369262695, + "learning_rate": 2.340895489908057e-05, + "loss": 5.9455, + "step": 18250 + }, + { + "epoch": 6.561938958707361, + "grad_norm": 6.159936428070068, + "learning_rate": 2.3395353898046897e-05, + "loss": 5.9531, + "step": 18275 + }, + { + "epoch": 6.570915619389587, + "grad_norm": 6.35716438293457, + "learning_rate": 2.338175289701322e-05, + "loss": 5.8989, + "step": 18300 + }, + { + "epoch": 6.579892280071813, + "grad_norm": 6.238170146942139, + "learning_rate": 2.3368151895979546e-05, + "loss": 5.8734, + "step": 18325 + }, + { + "epoch": 6.58886894075404, + "grad_norm": 6.341493606567383, + "learning_rate": 2.335455089494587e-05, + "loss": 5.9352, + "step": 18350 + }, + { + "epoch": 6.597845601436266, + "grad_norm": 6.389400959014893, + "learning_rate": 2.3340949893912192e-05, + "loss": 5.9594, + "step": 18375 + }, + { + "epoch": 6.6068222621184916, + "grad_norm": 5.954926490783691, + "learning_rate": 2.3327348892878518e-05, + "loss": 5.9776, + "step": 18400 + }, + { + "epoch": 6.615798922800718, + "grad_norm": 6.335061073303223, + "learning_rate": 2.3313747891844838e-05, + "loss": 5.8925, + "step": 18425 + }, + { + "epoch": 6.624775583482944, + "grad_norm": 6.639955043792725, + "learning_rate": 2.3300146890811164e-05, + "loss": 5.9666, + "step": 18450 + }, + { + "epoch": 6.63375224416517, + "grad_norm": 5.547654151916504, + "learning_rate": 2.3286545889777487e-05, + "loss": 5.9422, + "step": 18475 + }, + { + "epoch": 6.642728904847397, + "grad_norm": 5.840072154998779, + "learning_rate": 2.3272944888743813e-05, + "loss": 5.9001, + "step": 18500 + }, + { + "epoch": 6.651705565529623, + "grad_norm": 5.738095283508301, + "learning_rate": 2.3259343887710136e-05, + "loss": 5.7659, + "step": 18525 + }, + { + "epoch": 6.660682226211849, + "grad_norm": 6.199224472045898, + "learning_rate": 2.324574288667646e-05, + "loss": 5.9353, + "step": 18550 + }, + { + "epoch": 6.669658886894076, + "grad_norm": 6.0351948738098145, + "learning_rate": 2.3232141885642785e-05, + "loss": 5.9293, + "step": 18575 + }, + { + "epoch": 6.6786355475763015, + "grad_norm": 6.220561981201172, + "learning_rate": 2.3218540884609105e-05, + "loss": 5.9053, + "step": 18600 + }, + { + "epoch": 6.687612208258528, + "grad_norm": 6.428813934326172, + "learning_rate": 2.320493988357543e-05, + "loss": 5.9716, + "step": 18625 + }, + { + "epoch": 6.696588868940754, + "grad_norm": 5.739636421203613, + "learning_rate": 2.3191338882541758e-05, + "loss": 5.8615, + "step": 18650 + }, + { + "epoch": 6.70556552962298, + "grad_norm": 6.180948257446289, + "learning_rate": 2.317773788150808e-05, + "loss": 5.9577, + "step": 18675 + }, + { + "epoch": 6.714542190305206, + "grad_norm": 7.102181434631348, + "learning_rate": 2.3164136880474403e-05, + "loss": 6.0308, + "step": 18700 + }, + { + "epoch": 6.723518850987433, + "grad_norm": 6.791645526885986, + "learning_rate": 2.3150535879440726e-05, + "loss": 5.9459, + "step": 18725 + }, + { + "epoch": 6.732495511669659, + "grad_norm": 5.84605073928833, + "learning_rate": 2.3136934878407053e-05, + "loss": 6.0125, + "step": 18750 + }, + { + "epoch": 6.741472172351886, + "grad_norm": 6.937188148498535, + "learning_rate": 2.3123333877373372e-05, + "loss": 5.9464, + "step": 18775 + }, + { + "epoch": 6.7504488330341115, + "grad_norm": 6.010704040527344, + "learning_rate": 2.31097328763397e-05, + "loss": 5.9046, + "step": 18800 + }, + { + "epoch": 6.759425493716337, + "grad_norm": 6.645330905914307, + "learning_rate": 2.3096131875306025e-05, + "loss": 5.9337, + "step": 18825 + }, + { + "epoch": 6.768402154398563, + "grad_norm": 6.296532154083252, + "learning_rate": 2.3082530874272348e-05, + "loss": 5.9277, + "step": 18850 + }, + { + "epoch": 6.77737881508079, + "grad_norm": 6.628443717956543, + "learning_rate": 2.306892987323867e-05, + "loss": 5.9995, + "step": 18875 + }, + { + "epoch": 6.786355475763016, + "grad_norm": 6.0027008056640625, + "learning_rate": 2.3055328872204994e-05, + "loss": 5.9324, + "step": 18900 + }, + { + "epoch": 6.795332136445243, + "grad_norm": 6.6854963302612305, + "learning_rate": 2.304172787117132e-05, + "loss": 5.9699, + "step": 18925 + }, + { + "epoch": 6.804308797127469, + "grad_norm": 6.620862007141113, + "learning_rate": 2.302867091017899e-05, + "loss": 5.9726, + "step": 18950 + }, + { + "epoch": 6.813285457809695, + "grad_norm": 6.0174970626831055, + "learning_rate": 2.3015069909145312e-05, + "loss": 5.9114, + "step": 18975 + }, + { + "epoch": 6.8222621184919205, + "grad_norm": 6.668771743774414, + "learning_rate": 2.300146890811164e-05, + "loss": 5.9291, + "step": 19000 + }, + { + "epoch": 6.831238779174147, + "grad_norm": 6.235230922698975, + "learning_rate": 2.2987867907077965e-05, + "loss": 5.9018, + "step": 19025 + }, + { + "epoch": 6.840215439856373, + "grad_norm": 6.5324530601501465, + "learning_rate": 2.2974266906044284e-05, + "loss": 5.952, + "step": 19050 + }, + { + "epoch": 6.8491921005386, + "grad_norm": 6.1475348472595215, + "learning_rate": 2.296066590501061e-05, + "loss": 5.871, + "step": 19075 + }, + { + "epoch": 6.858168761220826, + "grad_norm": 7.301822662353516, + "learning_rate": 2.2947064903976933e-05, + "loss": 5.9669, + "step": 19100 + }, + { + "epoch": 6.867145421903052, + "grad_norm": 6.059082508087158, + "learning_rate": 2.2933463902943256e-05, + "loss": 5.9621, + "step": 19125 + }, + { + "epoch": 6.876122082585278, + "grad_norm": 6.890921592712402, + "learning_rate": 2.291986290190958e-05, + "loss": 5.8912, + "step": 19150 + }, + { + "epoch": 6.885098743267505, + "grad_norm": 6.411311149597168, + "learning_rate": 2.2906261900875906e-05, + "loss": 5.8301, + "step": 19175 + }, + { + "epoch": 6.8940754039497305, + "grad_norm": 7.005683898925781, + "learning_rate": 2.2892660899842232e-05, + "loss": 5.8938, + "step": 19200 + }, + { + "epoch": 6.903052064631957, + "grad_norm": 6.082488536834717, + "learning_rate": 2.287905989880855e-05, + "loss": 5.9425, + "step": 19225 + }, + { + "epoch": 6.912028725314183, + "grad_norm": 6.545422077178955, + "learning_rate": 2.2865458897774878e-05, + "loss": 5.9055, + "step": 19250 + }, + { + "epoch": 6.921005385996409, + "grad_norm": 5.953825950622559, + "learning_rate": 2.28518578967412e-05, + "loss": 5.9544, + "step": 19275 + }, + { + "epoch": 6.929982046678636, + "grad_norm": 6.4229326248168945, + "learning_rate": 2.2838256895707524e-05, + "loss": 5.9816, + "step": 19300 + }, + { + "epoch": 6.938958707360862, + "grad_norm": 6.387946128845215, + "learning_rate": 2.2824655894673847e-05, + "loss": 5.9237, + "step": 19325 + }, + { + "epoch": 6.947935368043088, + "grad_norm": 6.794723033905029, + "learning_rate": 2.2811054893640173e-05, + "loss": 5.9716, + "step": 19350 + }, + { + "epoch": 6.956912028725315, + "grad_norm": 6.4051032066345215, + "learning_rate": 2.27974538926065e-05, + "loss": 5.9151, + "step": 19375 + }, + { + "epoch": 6.9658886894075405, + "grad_norm": 6.211987495422363, + "learning_rate": 2.278385289157282e-05, + "loss": 5.7965, + "step": 19400 + }, + { + "epoch": 6.974865350089766, + "grad_norm": 6.4058451652526855, + "learning_rate": 2.2770251890539145e-05, + "loss": 5.9717, + "step": 19425 + }, + { + "epoch": 6.983842010771993, + "grad_norm": 6.61019229888916, + "learning_rate": 2.2756650889505468e-05, + "loss": 5.9086, + "step": 19450 + }, + { + "epoch": 6.992818671454219, + "grad_norm": 5.987739562988281, + "learning_rate": 2.274304988847179e-05, + "loss": 5.8631, + "step": 19475 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.0744467569167319, + "eval_f1_macro": 0.0036631679497793816, + "eval_f1_micro": 0.0744467569167319, + "eval_f1_weighted": 0.034595568215339476, + "eval_loss": 6.776797771453857, + "eval_precision_macro": 0.0034985469514193625, + "eval_precision_micro": 0.0744467569167319, + "eval_precision_weighted": 0.028360246141284295, + "eval_recall_macro": 0.00753598195095026, + "eval_recall_micro": 0.0744467569167319, + "eval_recall_weighted": 0.0744467569167319, + "eval_runtime": 84.8596, + "eval_samples_per_second": 617.173, + "eval_steps_per_second": 9.651, + "step": 19495 + }, + { + "epoch": 7.001795332136445, + "grad_norm": 5.556822776794434, + "learning_rate": 2.2729448887438117e-05, + "loss": 5.8233, + "step": 19500 + }, + { + "epoch": 7.010771992818672, + "grad_norm": 5.829295635223389, + "learning_rate": 2.271584788640444e-05, + "loss": 5.609, + "step": 19525 + }, + { + "epoch": 7.019748653500898, + "grad_norm": 6.4555206298828125, + "learning_rate": 2.2702246885370766e-05, + "loss": 5.579, + "step": 19550 + }, + { + "epoch": 7.028725314183124, + "grad_norm": 6.140953540802002, + "learning_rate": 2.2688645884337086e-05, + "loss": 5.6672, + "step": 19575 + }, + { + "epoch": 7.03770197486535, + "grad_norm": 6.755456924438477, + "learning_rate": 2.2675044883303412e-05, + "loss": 5.7174, + "step": 19600 + }, + { + "epoch": 7.046678635547576, + "grad_norm": 6.674192905426025, + "learning_rate": 2.2661443882269735e-05, + "loss": 5.6422, + "step": 19625 + }, + { + "epoch": 7.055655296229802, + "grad_norm": 6.046741962432861, + "learning_rate": 2.2647842881236058e-05, + "loss": 5.7104, + "step": 19650 + }, + { + "epoch": 7.064631956912029, + "grad_norm": 6.7078776359558105, + "learning_rate": 2.2634241880202384e-05, + "loss": 5.6039, + "step": 19675 + }, + { + "epoch": 7.073608617594255, + "grad_norm": 6.653388023376465, + "learning_rate": 2.2620640879168707e-05, + "loss": 5.7359, + "step": 19700 + }, + { + "epoch": 7.082585278276481, + "grad_norm": 5.977482318878174, + "learning_rate": 2.2607039878135034e-05, + "loss": 5.6459, + "step": 19725 + }, + { + "epoch": 7.091561938958708, + "grad_norm": 6.353503227233887, + "learning_rate": 2.2593438877101353e-05, + "loss": 5.6278, + "step": 19750 + }, + { + "epoch": 7.100538599640934, + "grad_norm": 6.304482460021973, + "learning_rate": 2.257983787606768e-05, + "loss": 5.6226, + "step": 19775 + }, + { + "epoch": 7.1095152603231595, + "grad_norm": 6.658697128295898, + "learning_rate": 2.2566236875034006e-05, + "loss": 5.6457, + "step": 19800 + }, + { + "epoch": 7.118491921005386, + "grad_norm": 6.028088569641113, + "learning_rate": 2.2552635874000325e-05, + "loss": 5.7036, + "step": 19825 + }, + { + "epoch": 7.127468581687612, + "grad_norm": 6.678377151489258, + "learning_rate": 2.2539034872966652e-05, + "loss": 5.6367, + "step": 19850 + }, + { + "epoch": 7.136445242369838, + "grad_norm": 6.422173976898193, + "learning_rate": 2.2525433871932975e-05, + "loss": 5.7023, + "step": 19875 + }, + { + "epoch": 7.145421903052065, + "grad_norm": 6.91300106048584, + "learning_rate": 2.25118328708993e-05, + "loss": 5.679, + "step": 19900 + }, + { + "epoch": 7.154398563734291, + "grad_norm": 6.7222185134887695, + "learning_rate": 2.249823186986562e-05, + "loss": 5.6775, + "step": 19925 + }, + { + "epoch": 7.163375224416517, + "grad_norm": 6.931186676025391, + "learning_rate": 2.2484630868831947e-05, + "loss": 5.7135, + "step": 19950 + }, + { + "epoch": 7.1723518850987436, + "grad_norm": 6.536242961883545, + "learning_rate": 2.2471029867798273e-05, + "loss": 5.6971, + "step": 19975 + }, + { + "epoch": 7.1813285457809695, + "grad_norm": 7.188952922821045, + "learning_rate": 2.2457428866764593e-05, + "loss": 5.642, + "step": 20000 + }, + { + "epoch": 7.190305206463195, + "grad_norm": 6.5444231033325195, + "learning_rate": 2.244382786573092e-05, + "loss": 5.5429, + "step": 20025 + }, + { + "epoch": 7.199281867145422, + "grad_norm": 6.928316116333008, + "learning_rate": 2.2430226864697242e-05, + "loss": 5.6052, + "step": 20050 + }, + { + "epoch": 7.208258527827648, + "grad_norm": 5.921298980712891, + "learning_rate": 2.2416625863663565e-05, + "loss": 5.7054, + "step": 20075 + }, + { + "epoch": 7.217235188509874, + "grad_norm": 7.2444539070129395, + "learning_rate": 2.2403024862629888e-05, + "loss": 5.5717, + "step": 20100 + }, + { + "epoch": 7.226211849192101, + "grad_norm": 6.392436504364014, + "learning_rate": 2.2389423861596214e-05, + "loss": 5.5921, + "step": 20125 + }, + { + "epoch": 7.235188509874327, + "grad_norm": 5.97305965423584, + "learning_rate": 2.237582286056254e-05, + "loss": 5.6225, + "step": 20150 + }, + { + "epoch": 7.244165170556553, + "grad_norm": 7.177309989929199, + "learning_rate": 2.236222185952886e-05, + "loss": 5.5967, + "step": 20175 + }, + { + "epoch": 7.253141831238779, + "grad_norm": 6.433371543884277, + "learning_rate": 2.2348620858495186e-05, + "loss": 5.6549, + "step": 20200 + }, + { + "epoch": 7.262118491921005, + "grad_norm": 6.7610931396484375, + "learning_rate": 2.233501985746151e-05, + "loss": 5.6062, + "step": 20225 + }, + { + "epoch": 7.271095152603231, + "grad_norm": 6.779208183288574, + "learning_rate": 2.2321418856427832e-05, + "loss": 5.7143, + "step": 20250 + }, + { + "epoch": 7.280071813285458, + "grad_norm": 6.562967777252197, + "learning_rate": 2.230781785539416e-05, + "loss": 5.7285, + "step": 20275 + }, + { + "epoch": 7.289048473967684, + "grad_norm": 6.089322090148926, + "learning_rate": 2.229421685436048e-05, + "loss": 5.5517, + "step": 20300 + }, + { + "epoch": 7.29802513464991, + "grad_norm": 6.653488636016846, + "learning_rate": 2.2280615853326808e-05, + "loss": 5.5935, + "step": 20325 + }, + { + "epoch": 7.307001795332137, + "grad_norm": 6.642482280731201, + "learning_rate": 2.2267014852293127e-05, + "loss": 5.617, + "step": 20350 + }, + { + "epoch": 7.315978456014363, + "grad_norm": 6.1333746910095215, + "learning_rate": 2.2253413851259454e-05, + "loss": 5.7305, + "step": 20375 + }, + { + "epoch": 7.3249551166965885, + "grad_norm": 7.009139537811279, + "learning_rate": 2.2239812850225777e-05, + "loss": 5.6923, + "step": 20400 + }, + { + "epoch": 7.333931777378815, + "grad_norm": 6.850212574005127, + "learning_rate": 2.22262118491921e-05, + "loss": 5.7117, + "step": 20425 + }, + { + "epoch": 7.342908438061041, + "grad_norm": 6.620204925537109, + "learning_rate": 2.2212610848158426e-05, + "loss": 5.7273, + "step": 20450 + }, + { + "epoch": 7.351885098743267, + "grad_norm": 6.509439468383789, + "learning_rate": 2.219900984712475e-05, + "loss": 5.6961, + "step": 20475 + }, + { + "epoch": 7.360861759425494, + "grad_norm": 6.4462175369262695, + "learning_rate": 2.2185408846091075e-05, + "loss": 5.6606, + "step": 20500 + }, + { + "epoch": 7.36983842010772, + "grad_norm": 6.805684566497803, + "learning_rate": 2.2171807845057395e-05, + "loss": 5.6661, + "step": 20525 + }, + { + "epoch": 7.378815080789946, + "grad_norm": 7.137880325317383, + "learning_rate": 2.215820684402372e-05, + "loss": 5.661, + "step": 20550 + }, + { + "epoch": 7.3877917414721725, + "grad_norm": 6.145432472229004, + "learning_rate": 2.2144605842990047e-05, + "loss": 5.6423, + "step": 20575 + }, + { + "epoch": 7.3967684021543985, + "grad_norm": 5.8184428215026855, + "learning_rate": 2.2131004841956367e-05, + "loss": 5.6215, + "step": 20600 + }, + { + "epoch": 7.405745062836624, + "grad_norm": 7.280550479888916, + "learning_rate": 2.2117403840922693e-05, + "loss": 5.6036, + "step": 20625 + }, + { + "epoch": 7.414721723518851, + "grad_norm": 6.6761579513549805, + "learning_rate": 2.2103802839889016e-05, + "loss": 5.6328, + "step": 20650 + }, + { + "epoch": 7.423698384201077, + "grad_norm": 7.014335632324219, + "learning_rate": 2.2090201838855342e-05, + "loss": 5.6899, + "step": 20675 + }, + { + "epoch": 7.432675044883303, + "grad_norm": 6.469388484954834, + "learning_rate": 2.2076600837821662e-05, + "loss": 5.7752, + "step": 20700 + }, + { + "epoch": 7.44165170556553, + "grad_norm": 6.872607707977295, + "learning_rate": 2.2062999836787988e-05, + "loss": 5.6329, + "step": 20725 + }, + { + "epoch": 7.450628366247756, + "grad_norm": 6.775146961212158, + "learning_rate": 2.2049398835754314e-05, + "loss": 5.7192, + "step": 20750 + }, + { + "epoch": 7.459605026929982, + "grad_norm": 6.907915115356445, + "learning_rate": 2.2035797834720634e-05, + "loss": 5.7694, + "step": 20775 + }, + { + "epoch": 7.468581687612208, + "grad_norm": 7.009703159332275, + "learning_rate": 2.202219683368696e-05, + "loss": 5.6102, + "step": 20800 + }, + { + "epoch": 7.477558348294434, + "grad_norm": 6.614011764526367, + "learning_rate": 2.2008595832653283e-05, + "loss": 5.7138, + "step": 20825 + }, + { + "epoch": 7.486535008976661, + "grad_norm": 6.360806941986084, + "learning_rate": 2.199499483161961e-05, + "loss": 5.5825, + "step": 20850 + }, + { + "epoch": 7.495511669658887, + "grad_norm": 6.215551853179932, + "learning_rate": 2.198139383058593e-05, + "loss": 5.7086, + "step": 20875 + }, + { + "epoch": 7.504488330341113, + "grad_norm": 6.4077558517456055, + "learning_rate": 2.1967792829552255e-05, + "loss": 5.6438, + "step": 20900 + }, + { + "epoch": 7.513464991023339, + "grad_norm": 6.877986431121826, + "learning_rate": 2.1954191828518582e-05, + "loss": 5.7822, + "step": 20925 + }, + { + "epoch": 7.522441651705566, + "grad_norm": 6.4825615882873535, + "learning_rate": 2.19405908274849e-05, + "loss": 5.5948, + "step": 20950 + }, + { + "epoch": 7.531418312387792, + "grad_norm": 7.496140956878662, + "learning_rate": 2.1926989826451228e-05, + "loss": 5.6567, + "step": 20975 + }, + { + "epoch": 7.540394973070018, + "grad_norm": 7.285343170166016, + "learning_rate": 2.191338882541755e-05, + "loss": 5.5912, + "step": 21000 + }, + { + "epoch": 7.549371633752244, + "grad_norm": 6.460578918457031, + "learning_rate": 2.1899787824383877e-05, + "loss": 5.629, + "step": 21025 + }, + { + "epoch": 7.55834829443447, + "grad_norm": 5.92124080657959, + "learning_rate": 2.18861868233502e-05, + "loss": 5.685, + "step": 21050 + }, + { + "epoch": 7.567324955116696, + "grad_norm": 6.797677040100098, + "learning_rate": 2.1872585822316523e-05, + "loss": 5.6664, + "step": 21075 + }, + { + "epoch": 7.576301615798923, + "grad_norm": 6.591789722442627, + "learning_rate": 2.185898482128285e-05, + "loss": 5.5537, + "step": 21100 + }, + { + "epoch": 7.585278276481149, + "grad_norm": 6.387152671813965, + "learning_rate": 2.184538382024917e-05, + "loss": 5.6443, + "step": 21125 + }, + { + "epoch": 7.594254937163376, + "grad_norm": 7.262204647064209, + "learning_rate": 2.1831782819215495e-05, + "loss": 5.616, + "step": 21150 + }, + { + "epoch": 7.6032315978456015, + "grad_norm": 6.380317211151123, + "learning_rate": 2.1818181818181818e-05, + "loss": 5.6214, + "step": 21175 + }, + { + "epoch": 7.6122082585278275, + "grad_norm": 7.110475063323975, + "learning_rate": 2.1804580817148144e-05, + "loss": 5.6917, + "step": 21200 + }, + { + "epoch": 7.621184919210053, + "grad_norm": 6.102097511291504, + "learning_rate": 2.1790979816114467e-05, + "loss": 5.5971, + "step": 21225 + }, + { + "epoch": 7.63016157989228, + "grad_norm": 6.3271074295043945, + "learning_rate": 2.177737881508079e-05, + "loss": 5.7113, + "step": 21250 + }, + { + "epoch": 7.639138240574506, + "grad_norm": 7.220957279205322, + "learning_rate": 2.1763777814047116e-05, + "loss": 5.6334, + "step": 21275 + }, + { + "epoch": 7.648114901256733, + "grad_norm": 6.601646900177002, + "learning_rate": 2.1750176813013436e-05, + "loss": 5.5745, + "step": 21300 + }, + { + "epoch": 7.657091561938959, + "grad_norm": 6.524709224700928, + "learning_rate": 2.1736575811979762e-05, + "loss": 5.5333, + "step": 21325 + }, + { + "epoch": 7.666068222621185, + "grad_norm": 6.756369113922119, + "learning_rate": 2.172297481094609e-05, + "loss": 5.6472, + "step": 21350 + }, + { + "epoch": 7.6750448833034115, + "grad_norm": 6.383217811584473, + "learning_rate": 2.170937380991241e-05, + "loss": 5.692, + "step": 21375 + }, + { + "epoch": 7.684021543985637, + "grad_norm": 7.318099021911621, + "learning_rate": 2.1695772808878734e-05, + "loss": 5.6915, + "step": 21400 + }, + { + "epoch": 7.692998204667863, + "grad_norm": 7.586529731750488, + "learning_rate": 2.1682171807845057e-05, + "loss": 5.7309, + "step": 21425 + }, + { + "epoch": 7.70197486535009, + "grad_norm": 6.5823211669921875, + "learning_rate": 2.1668570806811384e-05, + "loss": 5.559, + "step": 21450 + }, + { + "epoch": 7.710951526032316, + "grad_norm": 6.923408031463623, + "learning_rate": 2.1654969805777703e-05, + "loss": 5.68, + "step": 21475 + }, + { + "epoch": 7.719928186714542, + "grad_norm": 7.580366134643555, + "learning_rate": 2.164136880474403e-05, + "loss": 5.7561, + "step": 21500 + }, + { + "epoch": 7.728904847396769, + "grad_norm": 7.2258620262146, + "learning_rate": 2.1627767803710356e-05, + "loss": 5.5981, + "step": 21525 + }, + { + "epoch": 7.737881508078995, + "grad_norm": 7.518596172332764, + "learning_rate": 2.1614166802676675e-05, + "loss": 5.7574, + "step": 21550 + }, + { + "epoch": 7.746858168761221, + "grad_norm": 6.3187103271484375, + "learning_rate": 2.1600565801643e-05, + "loss": 5.6277, + "step": 21575 + }, + { + "epoch": 7.755834829443447, + "grad_norm": 7.41030740737915, + "learning_rate": 2.1586964800609325e-05, + "loss": 5.6742, + "step": 21600 + }, + { + "epoch": 7.764811490125673, + "grad_norm": 6.551275253295898, + "learning_rate": 2.157336379957565e-05, + "loss": 5.6329, + "step": 21625 + }, + { + "epoch": 7.773788150807899, + "grad_norm": 6.8663225173950195, + "learning_rate": 2.155976279854197e-05, + "loss": 5.6547, + "step": 21650 + }, + { + "epoch": 7.782764811490126, + "grad_norm": 6.90118932723999, + "learning_rate": 2.1546161797508297e-05, + "loss": 5.5683, + "step": 21675 + }, + { + "epoch": 7.791741472172352, + "grad_norm": 6.164306163787842, + "learning_rate": 2.1532560796474623e-05, + "loss": 5.6104, + "step": 21700 + }, + { + "epoch": 7.800718132854578, + "grad_norm": 7.06691837310791, + "learning_rate": 2.1518959795440943e-05, + "loss": 5.7071, + "step": 21725 + }, + { + "epoch": 7.809694793536805, + "grad_norm": 7.226975440979004, + "learning_rate": 2.150535879440727e-05, + "loss": 5.7383, + "step": 21750 + }, + { + "epoch": 7.8186714542190305, + "grad_norm": 6.844762802124023, + "learning_rate": 2.1491757793373592e-05, + "loss": 5.6647, + "step": 21775 + }, + { + "epoch": 7.8276481149012564, + "grad_norm": 6.8959879875183105, + "learning_rate": 2.1478156792339918e-05, + "loss": 5.623, + "step": 21800 + }, + { + "epoch": 7.836624775583483, + "grad_norm": 6.767614841461182, + "learning_rate": 2.146455579130624e-05, + "loss": 5.6971, + "step": 21825 + }, + { + "epoch": 7.845601436265709, + "grad_norm": 6.315961837768555, + "learning_rate": 2.1450954790272564e-05, + "loss": 5.6678, + "step": 21850 + }, + { + "epoch": 7.854578096947935, + "grad_norm": 6.509133815765381, + "learning_rate": 2.143735378923889e-05, + "loss": 5.6412, + "step": 21875 + }, + { + "epoch": 7.863554757630162, + "grad_norm": 6.633342266082764, + "learning_rate": 2.142375278820521e-05, + "loss": 5.6523, + "step": 21900 + }, + { + "epoch": 7.872531418312388, + "grad_norm": 6.822045803070068, + "learning_rate": 2.1410151787171536e-05, + "loss": 5.5978, + "step": 21925 + }, + { + "epoch": 7.881508078994614, + "grad_norm": 6.395514488220215, + "learning_rate": 2.139655078613786e-05, + "loss": 5.7021, + "step": 21950 + }, + { + "epoch": 7.8904847396768405, + "grad_norm": 6.564002990722656, + "learning_rate": 2.1382949785104185e-05, + "loss": 5.7583, + "step": 21975 + }, + { + "epoch": 7.899461400359066, + "grad_norm": 6.867312908172607, + "learning_rate": 2.136934878407051e-05, + "loss": 5.5786, + "step": 22000 + }, + { + "epoch": 7.908438061041292, + "grad_norm": 6.734884738922119, + "learning_rate": 2.135574778303683e-05, + "loss": 5.7288, + "step": 22025 + }, + { + "epoch": 7.917414721723519, + "grad_norm": 7.559576511383057, + "learning_rate": 2.1342146782003158e-05, + "loss": 5.7189, + "step": 22050 + }, + { + "epoch": 7.926391382405745, + "grad_norm": 6.911487102508545, + "learning_rate": 2.1328545780969477e-05, + "loss": 5.6497, + "step": 22075 + }, + { + "epoch": 7.935368043087971, + "grad_norm": 6.54379415512085, + "learning_rate": 2.1314944779935803e-05, + "loss": 5.6518, + "step": 22100 + }, + { + "epoch": 7.944344703770198, + "grad_norm": 7.149918079376221, + "learning_rate": 2.130134377890213e-05, + "loss": 5.7975, + "step": 22125 + }, + { + "epoch": 7.953321364452424, + "grad_norm": 7.397727966308594, + "learning_rate": 2.1287742777868453e-05, + "loss": 5.7447, + "step": 22150 + }, + { + "epoch": 7.96229802513465, + "grad_norm": 6.870881080627441, + "learning_rate": 2.1274141776834776e-05, + "loss": 5.6213, + "step": 22175 + }, + { + "epoch": 7.971274685816876, + "grad_norm": 7.491244316101074, + "learning_rate": 2.12605407758011e-05, + "loss": 5.5786, + "step": 22200 + }, + { + "epoch": 7.980251346499102, + "grad_norm": 6.856171131134033, + "learning_rate": 2.1246939774767425e-05, + "loss": 5.4664, + "step": 22225 + }, + { + "epoch": 7.989228007181328, + "grad_norm": 7.966314792633057, + "learning_rate": 2.1233338773733744e-05, + "loss": 5.7444, + "step": 22250 + }, + { + "epoch": 7.998204667863555, + "grad_norm": 6.466161727905273, + "learning_rate": 2.121973777270007e-05, + "loss": 5.6646, + "step": 22275 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.07719626525117904, + "eval_f1_macro": 0.004319658541904185, + "eval_f1_micro": 0.07719626525117904, + "eval_f1_weighted": 0.037328101391419254, + "eval_loss": 6.681356430053711, + "eval_precision_macro": 0.003813586666311267, + "eval_precision_micro": 0.07719626525117904, + "eval_precision_weighted": 0.029669233370008683, + "eval_recall_macro": 0.008761114362121286, + "eval_recall_micro": 0.07719626525117904, + "eval_recall_weighted": 0.07719626525117904, + "eval_runtime": 83.8966, + "eval_samples_per_second": 624.256, + "eval_steps_per_second": 9.762, + "step": 22280 + }, + { + "epoch": 8.007181328545782, + "grad_norm": 7.180635452270508, + "learning_rate": 2.1206136771666397e-05, + "loss": 5.3462, + "step": 22300 + }, + { + "epoch": 8.016157989228008, + "grad_norm": 6.882519721984863, + "learning_rate": 2.119253577063272e-05, + "loss": 5.3629, + "step": 22325 + }, + { + "epoch": 8.025134649910234, + "grad_norm": 6.949886322021484, + "learning_rate": 2.1178934769599043e-05, + "loss": 5.2687, + "step": 22350 + }, + { + "epoch": 8.03411131059246, + "grad_norm": 6.296696662902832, + "learning_rate": 2.1165333768565366e-05, + "loss": 5.4242, + "step": 22375 + }, + { + "epoch": 8.043087971274685, + "grad_norm": 6.325628280639648, + "learning_rate": 2.1151732767531692e-05, + "loss": 5.3774, + "step": 22400 + }, + { + "epoch": 8.052064631956911, + "grad_norm": 6.517385959625244, + "learning_rate": 2.1138131766498015e-05, + "loss": 5.4023, + "step": 22425 + }, + { + "epoch": 8.061041292639139, + "grad_norm": 7.241919994354248, + "learning_rate": 2.1124530765464338e-05, + "loss": 5.3725, + "step": 22450 + }, + { + "epoch": 8.070017953321365, + "grad_norm": 7.089277267456055, + "learning_rate": 2.1110929764430664e-05, + "loss": 5.3862, + "step": 22475 + }, + { + "epoch": 8.07899461400359, + "grad_norm": 7.187988758087158, + "learning_rate": 2.1097328763396987e-05, + "loss": 5.4048, + "step": 22500 + }, + { + "epoch": 8.087971274685817, + "grad_norm": 7.604076862335205, + "learning_rate": 2.108372776236331e-05, + "loss": 5.364, + "step": 22525 + }, + { + "epoch": 8.096947935368043, + "grad_norm": 6.850844383239746, + "learning_rate": 2.1070126761329633e-05, + "loss": 5.4433, + "step": 22550 + }, + { + "epoch": 8.105924596050269, + "grad_norm": 7.3492350578308105, + "learning_rate": 2.105652576029596e-05, + "loss": 5.4749, + "step": 22575 + }, + { + "epoch": 8.114901256732496, + "grad_norm": 7.082913398742676, + "learning_rate": 2.1042924759262282e-05, + "loss": 5.302, + "step": 22600 + }, + { + "epoch": 8.123877917414722, + "grad_norm": 7.320722579956055, + "learning_rate": 2.1029323758228605e-05, + "loss": 5.4154, + "step": 22625 + }, + { + "epoch": 8.132854578096948, + "grad_norm": 6.40757942199707, + "learning_rate": 2.101572275719493e-05, + "loss": 5.3994, + "step": 22650 + }, + { + "epoch": 8.141831238779174, + "grad_norm": 6.264216899871826, + "learning_rate": 2.1002121756161255e-05, + "loss": 5.4337, + "step": 22675 + }, + { + "epoch": 8.1508078994614, + "grad_norm": 6.885189533233643, + "learning_rate": 2.0988520755127577e-05, + "loss": 5.4803, + "step": 22700 + }, + { + "epoch": 8.159784560143626, + "grad_norm": 7.4632887840271, + "learning_rate": 2.09749197540939e-05, + "loss": 5.398, + "step": 22725 + }, + { + "epoch": 8.168761220825854, + "grad_norm": 7.446664810180664, + "learning_rate": 2.0961318753060227e-05, + "loss": 5.4975, + "step": 22750 + }, + { + "epoch": 8.17773788150808, + "grad_norm": 6.5246758460998535, + "learning_rate": 2.094771775202655e-05, + "loss": 5.3759, + "step": 22775 + }, + { + "epoch": 8.186714542190305, + "grad_norm": 7.547301769256592, + "learning_rate": 2.0934116750992873e-05, + "loss": 5.2747, + "step": 22800 + }, + { + "epoch": 8.195691202872531, + "grad_norm": 7.342228412628174, + "learning_rate": 2.09205157499592e-05, + "loss": 5.4452, + "step": 22825 + }, + { + "epoch": 8.204667863554757, + "grad_norm": 6.6894850730896, + "learning_rate": 2.0906914748925522e-05, + "loss": 5.3736, + "step": 22850 + }, + { + "epoch": 8.213644524236983, + "grad_norm": 6.4289021492004395, + "learning_rate": 2.0893313747891845e-05, + "loss": 5.3933, + "step": 22875 + }, + { + "epoch": 8.22262118491921, + "grad_norm": 6.857594013214111, + "learning_rate": 2.087971274685817e-05, + "loss": 5.4153, + "step": 22900 + }, + { + "epoch": 8.231597845601437, + "grad_norm": 6.968907833099365, + "learning_rate": 2.0866111745824494e-05, + "loss": 5.4272, + "step": 22925 + }, + { + "epoch": 8.240574506283663, + "grad_norm": 7.569032192230225, + "learning_rate": 2.0852510744790817e-05, + "loss": 5.4206, + "step": 22950 + }, + { + "epoch": 8.249551166965889, + "grad_norm": 6.4862213134765625, + "learning_rate": 2.083890974375714e-05, + "loss": 5.4838, + "step": 22975 + }, + { + "epoch": 8.258527827648114, + "grad_norm": 6.622523784637451, + "learning_rate": 2.0825308742723466e-05, + "loss": 5.4042, + "step": 23000 + }, + { + "epoch": 8.26750448833034, + "grad_norm": 6.56260347366333, + "learning_rate": 2.081170774168979e-05, + "loss": 5.2686, + "step": 23025 + }, + { + "epoch": 8.276481149012568, + "grad_norm": 6.283070087432861, + "learning_rate": 2.0798106740656112e-05, + "loss": 5.4654, + "step": 23050 + }, + { + "epoch": 8.285457809694794, + "grad_norm": 7.152722358703613, + "learning_rate": 2.078450573962244e-05, + "loss": 5.3554, + "step": 23075 + }, + { + "epoch": 8.29443447037702, + "grad_norm": 7.795180320739746, + "learning_rate": 2.077090473858876e-05, + "loss": 5.5185, + "step": 23100 + }, + { + "epoch": 8.303411131059246, + "grad_norm": 6.521443843841553, + "learning_rate": 2.0757303737555084e-05, + "loss": 5.4819, + "step": 23125 + }, + { + "epoch": 8.312387791741472, + "grad_norm": 6.7371649742126465, + "learning_rate": 2.0743702736521407e-05, + "loss": 5.3504, + "step": 23150 + }, + { + "epoch": 8.321364452423698, + "grad_norm": 6.809395790100098, + "learning_rate": 2.0730101735487733e-05, + "loss": 5.4815, + "step": 23175 + }, + { + "epoch": 8.330341113105925, + "grad_norm": 7.742832660675049, + "learning_rate": 2.0716500734454056e-05, + "loss": 5.3829, + "step": 23200 + }, + { + "epoch": 8.339317773788151, + "grad_norm": 6.835142135620117, + "learning_rate": 2.0703443773461725e-05, + "loss": 5.3554, + "step": 23225 + }, + { + "epoch": 8.348294434470377, + "grad_norm": 7.4149250984191895, + "learning_rate": 2.0689842772428052e-05, + "loss": 5.4279, + "step": 23250 + }, + { + "epoch": 8.357271095152603, + "grad_norm": 7.564754486083984, + "learning_rate": 2.0676241771394375e-05, + "loss": 5.4112, + "step": 23275 + }, + { + "epoch": 8.366247755834829, + "grad_norm": 7.517561912536621, + "learning_rate": 2.0662640770360698e-05, + "loss": 5.3774, + "step": 23300 + }, + { + "epoch": 8.375224416517055, + "grad_norm": 7.345132350921631, + "learning_rate": 2.0649039769327024e-05, + "loss": 5.3385, + "step": 23325 + }, + { + "epoch": 8.384201077199283, + "grad_norm": 7.545169353485107, + "learning_rate": 2.0635438768293347e-05, + "loss": 5.4301, + "step": 23350 + }, + { + "epoch": 8.393177737881508, + "grad_norm": 7.066343784332275, + "learning_rate": 2.062183776725967e-05, + "loss": 5.4561, + "step": 23375 + }, + { + "epoch": 8.402154398563734, + "grad_norm": 7.50927734375, + "learning_rate": 2.0608236766225993e-05, + "loss": 5.4233, + "step": 23400 + }, + { + "epoch": 8.41113105924596, + "grad_norm": 6.844120025634766, + "learning_rate": 2.059463576519232e-05, + "loss": 5.3883, + "step": 23425 + }, + { + "epoch": 8.420107719928186, + "grad_norm": 6.786802768707275, + "learning_rate": 2.0581034764158645e-05, + "loss": 5.3601, + "step": 23450 + }, + { + "epoch": 8.429084380610412, + "grad_norm": 7.06235408782959, + "learning_rate": 2.0567433763124965e-05, + "loss": 5.4243, + "step": 23475 + }, + { + "epoch": 8.43806104129264, + "grad_norm": 7.960052967071533, + "learning_rate": 2.055383276209129e-05, + "loss": 5.3483, + "step": 23500 + }, + { + "epoch": 8.447037701974866, + "grad_norm": 6.72224760055542, + "learning_rate": 2.0540231761057614e-05, + "loss": 5.3682, + "step": 23525 + }, + { + "epoch": 8.456014362657092, + "grad_norm": 6.8185648918151855, + "learning_rate": 2.0526630760023937e-05, + "loss": 5.3229, + "step": 23550 + }, + { + "epoch": 8.464991023339318, + "grad_norm": 7.835126876831055, + "learning_rate": 2.051302975899026e-05, + "loss": 5.3824, + "step": 23575 + }, + { + "epoch": 8.473967684021543, + "grad_norm": 7.367274284362793, + "learning_rate": 2.0499428757956586e-05, + "loss": 5.4727, + "step": 23600 + }, + { + "epoch": 8.48294434470377, + "grad_norm": 7.616043567657471, + "learning_rate": 2.0485827756922913e-05, + "loss": 5.3969, + "step": 23625 + }, + { + "epoch": 8.491921005385997, + "grad_norm": 6.751213073730469, + "learning_rate": 2.0472226755889232e-05, + "loss": 5.4083, + "step": 23650 + }, + { + "epoch": 8.500897666068223, + "grad_norm": 7.792051792144775, + "learning_rate": 2.045862575485556e-05, + "loss": 5.3752, + "step": 23675 + }, + { + "epoch": 8.509874326750449, + "grad_norm": 6.7318854331970215, + "learning_rate": 2.044502475382188e-05, + "loss": 5.5511, + "step": 23700 + }, + { + "epoch": 8.518850987432675, + "grad_norm": 8.695761680603027, + "learning_rate": 2.0431423752788204e-05, + "loss": 5.3765, + "step": 23725 + }, + { + "epoch": 8.5278276481149, + "grad_norm": 6.352445125579834, + "learning_rate": 2.041782275175453e-05, + "loss": 5.4355, + "step": 23750 + }, + { + "epoch": 8.536804308797127, + "grad_norm": 6.985531806945801, + "learning_rate": 2.0404221750720854e-05, + "loss": 5.4591, + "step": 23775 + }, + { + "epoch": 8.545780969479354, + "grad_norm": 8.362007141113281, + "learning_rate": 2.039062074968718e-05, + "loss": 5.4057, + "step": 23800 + }, + { + "epoch": 8.55475763016158, + "grad_norm": 7.9292192459106445, + "learning_rate": 2.03770197486535e-05, + "loss": 5.407, + "step": 23825 + }, + { + "epoch": 8.563734290843806, + "grad_norm": 7.74641227722168, + "learning_rate": 2.0363418747619826e-05, + "loss": 5.41, + "step": 23850 + }, + { + "epoch": 8.572710951526032, + "grad_norm": 7.601282596588135, + "learning_rate": 2.034981774658615e-05, + "loss": 5.4782, + "step": 23875 + }, + { + "epoch": 8.581687612208258, + "grad_norm": 6.74360990524292, + "learning_rate": 2.033621674555247e-05, + "loss": 5.4395, + "step": 23900 + }, + { + "epoch": 8.590664272890486, + "grad_norm": 6.1392364501953125, + "learning_rate": 2.0322615744518798e-05, + "loss": 5.4197, + "step": 23925 + }, + { + "epoch": 8.599640933572712, + "grad_norm": 7.156862258911133, + "learning_rate": 2.030901474348512e-05, + "loss": 5.3884, + "step": 23950 + }, + { + "epoch": 8.608617594254937, + "grad_norm": 7.271120071411133, + "learning_rate": 2.0295413742451447e-05, + "loss": 5.4937, + "step": 23975 + }, + { + "epoch": 8.617594254937163, + "grad_norm": 7.76527214050293, + "learning_rate": 2.0281812741417767e-05, + "loss": 5.3661, + "step": 24000 + }, + { + "epoch": 8.62657091561939, + "grad_norm": 6.986609935760498, + "learning_rate": 2.0268211740384093e-05, + "loss": 5.3995, + "step": 24025 + }, + { + "epoch": 8.635547576301615, + "grad_norm": 7.355526447296143, + "learning_rate": 2.0254610739350416e-05, + "loss": 5.3815, + "step": 24050 + }, + { + "epoch": 8.644524236983841, + "grad_norm": 7.273435115814209, + "learning_rate": 2.024100973831674e-05, + "loss": 5.4607, + "step": 24075 + }, + { + "epoch": 8.653500897666069, + "grad_norm": 7.41044282913208, + "learning_rate": 2.0227408737283065e-05, + "loss": 5.463, + "step": 24100 + }, + { + "epoch": 8.662477558348295, + "grad_norm": 8.142014503479004, + "learning_rate": 2.0213807736249388e-05, + "loss": 5.3004, + "step": 24125 + }, + { + "epoch": 8.67145421903052, + "grad_norm": 7.633316516876221, + "learning_rate": 2.0200206735215714e-05, + "loss": 5.4715, + "step": 24150 + }, + { + "epoch": 8.680430879712747, + "grad_norm": 7.225667953491211, + "learning_rate": 2.0186605734182034e-05, + "loss": 5.4314, + "step": 24175 + }, + { + "epoch": 8.689407540394972, + "grad_norm": 7.206058025360107, + "learning_rate": 2.017300473314836e-05, + "loss": 5.4719, + "step": 24200 + }, + { + "epoch": 8.6983842010772, + "grad_norm": 7.614555835723877, + "learning_rate": 2.0159403732114687e-05, + "loss": 5.451, + "step": 24225 + }, + { + "epoch": 8.707360861759426, + "grad_norm": 6.842143535614014, + "learning_rate": 2.0145802731081006e-05, + "loss": 5.4271, + "step": 24250 + }, + { + "epoch": 8.716337522441652, + "grad_norm": 8.072797775268555, + "learning_rate": 2.0132201730047332e-05, + "loss": 5.506, + "step": 24275 + }, + { + "epoch": 8.725314183123878, + "grad_norm": 7.208042144775391, + "learning_rate": 2.0118600729013655e-05, + "loss": 5.4739, + "step": 24300 + }, + { + "epoch": 8.734290843806104, + "grad_norm": 8.013249397277832, + "learning_rate": 2.0104999727979982e-05, + "loss": 5.4103, + "step": 24325 + }, + { + "epoch": 8.74326750448833, + "grad_norm": 7.246506690979004, + "learning_rate": 2.00913987269463e-05, + "loss": 5.4811, + "step": 24350 + }, + { + "epoch": 8.752244165170557, + "grad_norm": 8.653006553649902, + "learning_rate": 2.0077797725912628e-05, + "loss": 5.4462, + "step": 24375 + }, + { + "epoch": 8.761220825852783, + "grad_norm": 7.144380569458008, + "learning_rate": 2.0064196724878954e-05, + "loss": 5.4214, + "step": 24400 + }, + { + "epoch": 8.77019748653501, + "grad_norm": 6.843138694763184, + "learning_rate": 2.0050595723845273e-05, + "loss": 5.5229, + "step": 24425 + }, + { + "epoch": 8.779174147217235, + "grad_norm": 7.196107387542725, + "learning_rate": 2.00369947228116e-05, + "loss": 5.3835, + "step": 24450 + }, + { + "epoch": 8.788150807899461, + "grad_norm": 7.0579729080200195, + "learning_rate": 2.0023393721777923e-05, + "loss": 5.4245, + "step": 24475 + }, + { + "epoch": 8.797127468581687, + "grad_norm": 7.30932092666626, + "learning_rate": 2.000979272074425e-05, + "loss": 5.4639, + "step": 24500 + }, + { + "epoch": 8.806104129263915, + "grad_norm": 7.310079574584961, + "learning_rate": 1.9996191719710572e-05, + "loss": 5.452, + "step": 24525 + }, + { + "epoch": 8.81508078994614, + "grad_norm": 7.113363742828369, + "learning_rate": 1.9982590718676895e-05, + "loss": 5.3968, + "step": 24550 + }, + { + "epoch": 8.824057450628366, + "grad_norm": 7.0524749755859375, + "learning_rate": 1.996898971764322e-05, + "loss": 5.3912, + "step": 24575 + }, + { + "epoch": 8.833034111310592, + "grad_norm": 6.8501152992248535, + "learning_rate": 1.995538871660954e-05, + "loss": 5.4377, + "step": 24600 + }, + { + "epoch": 8.842010771992818, + "grad_norm": 7.207777976989746, + "learning_rate": 1.9941787715575867e-05, + "loss": 5.3794, + "step": 24625 + }, + { + "epoch": 8.850987432675044, + "grad_norm": 6.799304962158203, + "learning_rate": 1.992818671454219e-05, + "loss": 5.43, + "step": 24650 + }, + { + "epoch": 8.859964093357272, + "grad_norm": 7.337046146392822, + "learning_rate": 1.9914585713508516e-05, + "loss": 5.473, + "step": 24675 + }, + { + "epoch": 8.868940754039498, + "grad_norm": 7.812777042388916, + "learning_rate": 1.990098471247484e-05, + "loss": 5.4018, + "step": 24700 + }, + { + "epoch": 8.877917414721724, + "grad_norm": 7.4313764572143555, + "learning_rate": 1.9887383711441162e-05, + "loss": 5.4209, + "step": 24725 + }, + { + "epoch": 8.88689407540395, + "grad_norm": 7.536494255065918, + "learning_rate": 1.987378271040749e-05, + "loss": 5.4653, + "step": 24750 + }, + { + "epoch": 8.895870736086176, + "grad_norm": 7.593858242034912, + "learning_rate": 1.9860181709373808e-05, + "loss": 5.3517, + "step": 24775 + }, + { + "epoch": 8.904847396768401, + "grad_norm": 7.144144058227539, + "learning_rate": 1.9846580708340134e-05, + "loss": 5.3847, + "step": 24800 + }, + { + "epoch": 8.91382405745063, + "grad_norm": 6.925213813781738, + "learning_rate": 1.983297970730646e-05, + "loss": 5.3739, + "step": 24825 + }, + { + "epoch": 8.922800718132855, + "grad_norm": 6.93921422958374, + "learning_rate": 1.9819378706272784e-05, + "loss": 5.4187, + "step": 24850 + }, + { + "epoch": 8.931777378815081, + "grad_norm": 6.627919673919678, + "learning_rate": 1.9805777705239106e-05, + "loss": 5.4294, + "step": 24875 + }, + { + "epoch": 8.940754039497307, + "grad_norm": 7.075310707092285, + "learning_rate": 1.979217670420543e-05, + "loss": 5.4296, + "step": 24900 + }, + { + "epoch": 8.949730700179533, + "grad_norm": 7.26831579208374, + "learning_rate": 1.9778575703171756e-05, + "loss": 5.4272, + "step": 24925 + }, + { + "epoch": 8.958707360861759, + "grad_norm": 7.179161548614502, + "learning_rate": 1.9764974702138075e-05, + "loss": 5.4301, + "step": 24950 + }, + { + "epoch": 8.967684021543986, + "grad_norm": 7.256234169006348, + "learning_rate": 1.97513737011044e-05, + "loss": 5.4351, + "step": 24975 + }, + { + "epoch": 8.976660682226212, + "grad_norm": 7.236993312835693, + "learning_rate": 1.9737772700070728e-05, + "loss": 5.5183, + "step": 25000 + }, + { + "epoch": 8.985637342908438, + "grad_norm": 7.030118465423584, + "learning_rate": 1.9724171699037047e-05, + "loss": 5.41, + "step": 25025 + }, + { + "epoch": 8.994614003590664, + "grad_norm": 7.291460037231445, + "learning_rate": 1.9710570698003374e-05, + "loss": 5.4999, + "step": 25050 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.07738720332996009, + "eval_f1_macro": 0.005236317583075619, + "eval_f1_micro": 0.07738720332996009, + "eval_f1_weighted": 0.03965353841811621, + "eval_loss": 6.5938239097595215, + "eval_precision_macro": 0.005113655418440018, + "eval_precision_micro": 0.07738720332996009, + "eval_precision_weighted": 0.03312598089365064, + "eval_recall_macro": 0.009709605694008465, + "eval_recall_micro": 0.07738720332996009, + "eval_recall_weighted": 0.07738720332996009, + "eval_runtime": 84.3339, + "eval_samples_per_second": 621.02, + "eval_steps_per_second": 9.711, + "step": 25065 + }, + { + "epoch": 9.00359066427289, + "grad_norm": 7.011375904083252, + "learning_rate": 1.9696969696969697e-05, + "loss": 5.3236, + "step": 25075 + }, + { + "epoch": 9.012567324955116, + "grad_norm": 6.84488582611084, + "learning_rate": 1.9683368695936023e-05, + "loss": 5.2113, + "step": 25100 + }, + { + "epoch": 9.021543985637344, + "grad_norm": 7.331111431121826, + "learning_rate": 1.9669767694902343e-05, + "loss": 5.2443, + "step": 25125 + }, + { + "epoch": 9.03052064631957, + "grad_norm": 8.149158477783203, + "learning_rate": 1.965616669386867e-05, + "loss": 5.2389, + "step": 25150 + }, + { + "epoch": 9.039497307001795, + "grad_norm": 7.813643455505371, + "learning_rate": 1.9642565692834995e-05, + "loss": 5.1353, + "step": 25175 + }, + { + "epoch": 9.048473967684021, + "grad_norm": 6.49088716506958, + "learning_rate": 1.9628964691801315e-05, + "loss": 5.1247, + "step": 25200 + }, + { + "epoch": 9.057450628366247, + "grad_norm": 6.7675981521606445, + "learning_rate": 1.961536369076764e-05, + "loss": 5.2335, + "step": 25225 + }, + { + "epoch": 9.066427289048473, + "grad_norm": 7.7379350662231445, + "learning_rate": 1.9601762689733964e-05, + "loss": 5.1624, + "step": 25250 + }, + { + "epoch": 9.0754039497307, + "grad_norm": 7.139725685119629, + "learning_rate": 1.958816168870029e-05, + "loss": 5.15, + "step": 25275 + }, + { + "epoch": 9.084380610412927, + "grad_norm": 6.810644626617432, + "learning_rate": 1.9574560687666613e-05, + "loss": 5.131, + "step": 25300 + }, + { + "epoch": 9.093357271095153, + "grad_norm": 7.494154930114746, + "learning_rate": 1.9560959686632936e-05, + "loss": 5.159, + "step": 25325 + }, + { + "epoch": 9.102333931777379, + "grad_norm": 6.910736560821533, + "learning_rate": 1.9547358685599262e-05, + "loss": 5.1558, + "step": 25350 + }, + { + "epoch": 9.111310592459605, + "grad_norm": 6.4962158203125, + "learning_rate": 1.9533757684565582e-05, + "loss": 5.1714, + "step": 25375 + }, + { + "epoch": 9.12028725314183, + "grad_norm": 7.835927963256836, + "learning_rate": 1.952015668353191e-05, + "loss": 5.1718, + "step": 25400 + }, + { + "epoch": 9.129263913824058, + "grad_norm": 7.196147441864014, + "learning_rate": 1.950655568249823e-05, + "loss": 5.1143, + "step": 25425 + }, + { + "epoch": 9.138240574506284, + "grad_norm": 6.941309928894043, + "learning_rate": 1.9492954681464558e-05, + "loss": 5.143, + "step": 25450 + }, + { + "epoch": 9.14721723518851, + "grad_norm": 7.579328536987305, + "learning_rate": 1.947935368043088e-05, + "loss": 5.2287, + "step": 25475 + }, + { + "epoch": 9.156193895870736, + "grad_norm": 6.534302711486816, + "learning_rate": 1.9465752679397203e-05, + "loss": 5.2155, + "step": 25500 + }, + { + "epoch": 9.165170556552962, + "grad_norm": 6.851712703704834, + "learning_rate": 1.945215167836353e-05, + "loss": 5.0785, + "step": 25525 + }, + { + "epoch": 9.174147217235188, + "grad_norm": 8.27303409576416, + "learning_rate": 1.943855067732985e-05, + "loss": 5.2194, + "step": 25550 + }, + { + "epoch": 9.183123877917415, + "grad_norm": 8.029818534851074, + "learning_rate": 1.9424949676296176e-05, + "loss": 5.0778, + "step": 25575 + }, + { + "epoch": 9.192100538599641, + "grad_norm": 7.751465797424316, + "learning_rate": 1.9411348675262502e-05, + "loss": 5.2603, + "step": 25600 + }, + { + "epoch": 9.201077199281867, + "grad_norm": 7.175156593322754, + "learning_rate": 1.9397747674228825e-05, + "loss": 5.185, + "step": 25625 + }, + { + "epoch": 9.210053859964093, + "grad_norm": 7.4228997230529785, + "learning_rate": 1.9384146673195148e-05, + "loss": 5.2682, + "step": 25650 + }, + { + "epoch": 9.219030520646319, + "grad_norm": 6.863728046417236, + "learning_rate": 1.937054567216147e-05, + "loss": 5.2153, + "step": 25675 + }, + { + "epoch": 9.228007181328545, + "grad_norm": 8.26294994354248, + "learning_rate": 1.9356944671127797e-05, + "loss": 5.1219, + "step": 25700 + }, + { + "epoch": 9.236983842010773, + "grad_norm": 6.902092456817627, + "learning_rate": 1.9343343670094117e-05, + "loss": 5.2046, + "step": 25725 + }, + { + "epoch": 9.245960502692999, + "grad_norm": 7.2408833503723145, + "learning_rate": 1.9329742669060443e-05, + "loss": 5.226, + "step": 25750 + }, + { + "epoch": 9.254937163375224, + "grad_norm": 7.5615553855896, + "learning_rate": 1.931614166802677e-05, + "loss": 5.0717, + "step": 25775 + }, + { + "epoch": 9.26391382405745, + "grad_norm": 7.026217937469482, + "learning_rate": 1.9302540666993092e-05, + "loss": 5.2749, + "step": 25800 + }, + { + "epoch": 9.272890484739676, + "grad_norm": 6.913119316101074, + "learning_rate": 1.9288939665959415e-05, + "loss": 5.1648, + "step": 25825 + }, + { + "epoch": 9.281867145421902, + "grad_norm": 7.743864059448242, + "learning_rate": 1.9275338664925738e-05, + "loss": 5.1905, + "step": 25850 + }, + { + "epoch": 9.29084380610413, + "grad_norm": 7.068607807159424, + "learning_rate": 1.9261737663892064e-05, + "loss": 5.2855, + "step": 25875 + }, + { + "epoch": 9.299820466786356, + "grad_norm": 6.978683948516846, + "learning_rate": 1.9248136662858384e-05, + "loss": 5.2852, + "step": 25900 + }, + { + "epoch": 9.308797127468582, + "grad_norm": 6.531146049499512, + "learning_rate": 1.923453566182471e-05, + "loss": 5.205, + "step": 25925 + }, + { + "epoch": 9.317773788150808, + "grad_norm": 7.648620128631592, + "learning_rate": 1.9220934660791036e-05, + "loss": 5.2636, + "step": 25950 + }, + { + "epoch": 9.326750448833034, + "grad_norm": 7.748025894165039, + "learning_rate": 1.920733365975736e-05, + "loss": 5.1639, + "step": 25975 + }, + { + "epoch": 9.335727109515261, + "grad_norm": 6.796446323394775, + "learning_rate": 1.9193732658723682e-05, + "loss": 5.1376, + "step": 26000 + }, + { + "epoch": 9.344703770197487, + "grad_norm": 8.004226684570312, + "learning_rate": 1.9180131657690005e-05, + "loss": 5.2103, + "step": 26025 + }, + { + "epoch": 9.353680430879713, + "grad_norm": 7.240316390991211, + "learning_rate": 1.916653065665633e-05, + "loss": 5.2661, + "step": 26050 + }, + { + "epoch": 9.362657091561939, + "grad_norm": 7.607624530792236, + "learning_rate": 1.9152929655622654e-05, + "loss": 5.0907, + "step": 26075 + }, + { + "epoch": 9.371633752244165, + "grad_norm": 7.949883460998535, + "learning_rate": 1.9139328654588977e-05, + "loss": 5.1911, + "step": 26100 + }, + { + "epoch": 9.38061041292639, + "grad_norm": 7.459380149841309, + "learning_rate": 1.9125727653555304e-05, + "loss": 5.0948, + "step": 26125 + }, + { + "epoch": 9.389587073608618, + "grad_norm": 7.279307842254639, + "learning_rate": 1.9112126652521627e-05, + "loss": 5.1737, + "step": 26150 + }, + { + "epoch": 9.398563734290844, + "grad_norm": 6.836437225341797, + "learning_rate": 1.909852565148795e-05, + "loss": 5.2354, + "step": 26175 + }, + { + "epoch": 9.40754039497307, + "grad_norm": 7.26010274887085, + "learning_rate": 1.9084924650454273e-05, + "loss": 5.2619, + "step": 26200 + }, + { + "epoch": 9.416517055655296, + "grad_norm": 6.666034698486328, + "learning_rate": 1.90713236494206e-05, + "loss": 5.2121, + "step": 26225 + }, + { + "epoch": 9.425493716337522, + "grad_norm": 7.646721363067627, + "learning_rate": 1.9057722648386922e-05, + "loss": 5.2058, + "step": 26250 + }, + { + "epoch": 9.434470377019748, + "grad_norm": 7.3150835037231445, + "learning_rate": 1.9044121647353245e-05, + "loss": 5.2455, + "step": 26275 + }, + { + "epoch": 9.443447037701976, + "grad_norm": 8.005369186401367, + "learning_rate": 1.903052064631957e-05, + "loss": 5.1829, + "step": 26300 + }, + { + "epoch": 9.452423698384202, + "grad_norm": 7.337682723999023, + "learning_rate": 1.9016919645285894e-05, + "loss": 5.2141, + "step": 26325 + }, + { + "epoch": 9.461400359066428, + "grad_norm": 8.386185646057129, + "learning_rate": 1.9003318644252217e-05, + "loss": 5.1522, + "step": 26350 + }, + { + "epoch": 9.470377019748653, + "grad_norm": 7.949404716491699, + "learning_rate": 1.899026168325989e-05, + "loss": 5.1706, + "step": 26375 + }, + { + "epoch": 9.47935368043088, + "grad_norm": 7.1753058433532715, + "learning_rate": 1.8976660682226212e-05, + "loss": 5.126, + "step": 26400 + }, + { + "epoch": 9.488330341113105, + "grad_norm": 6.726853847503662, + "learning_rate": 1.8963059681192535e-05, + "loss": 5.1393, + "step": 26425 + }, + { + "epoch": 9.497307001795333, + "grad_norm": 7.278531074523926, + "learning_rate": 1.894945868015886e-05, + "loss": 5.1984, + "step": 26450 + }, + { + "epoch": 9.506283662477559, + "grad_norm": 8.56510066986084, + "learning_rate": 1.8935857679125184e-05, + "loss": 5.1939, + "step": 26475 + }, + { + "epoch": 9.515260323159785, + "grad_norm": 8.112578392028809, + "learning_rate": 1.892225667809151e-05, + "loss": 5.0645, + "step": 26500 + }, + { + "epoch": 9.52423698384201, + "grad_norm": 7.4344611167907715, + "learning_rate": 1.890865567705783e-05, + "loss": 5.3067, + "step": 26525 + }, + { + "epoch": 9.533213644524237, + "grad_norm": 8.008060455322266, + "learning_rate": 1.8895054676024157e-05, + "loss": 5.2895, + "step": 26550 + }, + { + "epoch": 9.542190305206462, + "grad_norm": 7.888736248016357, + "learning_rate": 1.888145367499048e-05, + "loss": 5.2383, + "step": 26575 + }, + { + "epoch": 9.55116696588869, + "grad_norm": 7.0138702392578125, + "learning_rate": 1.8867852673956802e-05, + "loss": 5.2683, + "step": 26600 + }, + { + "epoch": 9.560143626570916, + "grad_norm": 6.673938274383545, + "learning_rate": 1.885425167292313e-05, + "loss": 5.1264, + "step": 26625 + }, + { + "epoch": 9.569120287253142, + "grad_norm": 7.610721111297607, + "learning_rate": 1.8840650671889452e-05, + "loss": 5.1658, + "step": 26650 + }, + { + "epoch": 9.578096947935368, + "grad_norm": 7.488562107086182, + "learning_rate": 1.8827049670855778e-05, + "loss": 5.2203, + "step": 26675 + }, + { + "epoch": 9.587073608617594, + "grad_norm": 7.385153770446777, + "learning_rate": 1.8813448669822098e-05, + "loss": 5.196, + "step": 26700 + }, + { + "epoch": 9.59605026929982, + "grad_norm": 8.227118492126465, + "learning_rate": 1.8799847668788424e-05, + "loss": 5.1682, + "step": 26725 + }, + { + "epoch": 9.605026929982047, + "grad_norm": 7.123591423034668, + "learning_rate": 1.8786246667754747e-05, + "loss": 5.29, + "step": 26750 + }, + { + "epoch": 9.614003590664273, + "grad_norm": 7.516079902648926, + "learning_rate": 1.877264566672107e-05, + "loss": 5.2344, + "step": 26775 + }, + { + "epoch": 9.6229802513465, + "grad_norm": 7.574032783508301, + "learning_rate": 1.8759044665687396e-05, + "loss": 5.1696, + "step": 26800 + }, + { + "epoch": 9.631956912028725, + "grad_norm": 7.421360492706299, + "learning_rate": 1.874544366465372e-05, + "loss": 5.2244, + "step": 26825 + }, + { + "epoch": 9.640933572710951, + "grad_norm": 7.0887227058410645, + "learning_rate": 1.8731842663620042e-05, + "loss": 5.2377, + "step": 26850 + }, + { + "epoch": 9.649910233393177, + "grad_norm": 7.3065619468688965, + "learning_rate": 1.8718241662586365e-05, + "loss": 5.2594, + "step": 26875 + }, + { + "epoch": 9.658886894075405, + "grad_norm": 7.54917573928833, + "learning_rate": 1.870464066155269e-05, + "loss": 5.2942, + "step": 26900 + }, + { + "epoch": 9.66786355475763, + "grad_norm": 8.556507110595703, + "learning_rate": 1.8691039660519017e-05, + "loss": 5.2188, + "step": 26925 + }, + { + "epoch": 9.676840215439857, + "grad_norm": 7.3606462478637695, + "learning_rate": 1.8677438659485337e-05, + "loss": 5.1708, + "step": 26950 + }, + { + "epoch": 9.685816876122082, + "grad_norm": 7.604927062988281, + "learning_rate": 1.8663837658451663e-05, + "loss": 5.1979, + "step": 26975 + }, + { + "epoch": 9.694793536804308, + "grad_norm": 7.403592109680176, + "learning_rate": 1.8650236657417986e-05, + "loss": 5.3023, + "step": 27000 + }, + { + "epoch": 9.703770197486534, + "grad_norm": 7.249283313751221, + "learning_rate": 1.863663565638431e-05, + "loss": 5.3216, + "step": 27025 + }, + { + "epoch": 9.712746858168762, + "grad_norm": 7.660834312438965, + "learning_rate": 1.8623034655350632e-05, + "loss": 5.2699, + "step": 27050 + }, + { + "epoch": 9.721723518850988, + "grad_norm": 8.050627708435059, + "learning_rate": 1.860943365431696e-05, + "loss": 5.1371, + "step": 27075 + }, + { + "epoch": 9.730700179533214, + "grad_norm": 7.3413920402526855, + "learning_rate": 1.8595832653283285e-05, + "loss": 5.2026, + "step": 27100 + }, + { + "epoch": 9.73967684021544, + "grad_norm": 7.131037712097168, + "learning_rate": 1.8582231652249604e-05, + "loss": 5.1842, + "step": 27125 + }, + { + "epoch": 9.748653500897666, + "grad_norm": 8.408368110656738, + "learning_rate": 1.856863065121593e-05, + "loss": 5.2942, + "step": 27150 + }, + { + "epoch": 9.757630161579891, + "grad_norm": 6.732389450073242, + "learning_rate": 1.8555029650182254e-05, + "loss": 5.2096, + "step": 27175 + }, + { + "epoch": 9.76660682226212, + "grad_norm": 7.879339218139648, + "learning_rate": 1.8541428649148576e-05, + "loss": 5.2321, + "step": 27200 + }, + { + "epoch": 9.775583482944345, + "grad_norm": 8.148640632629395, + "learning_rate": 1.8527827648114903e-05, + "loss": 5.2583, + "step": 27225 + }, + { + "epoch": 9.784560143626571, + "grad_norm": 8.037172317504883, + "learning_rate": 1.8514226647081226e-05, + "loss": 5.2621, + "step": 27250 + }, + { + "epoch": 9.793536804308797, + "grad_norm": 7.659771919250488, + "learning_rate": 1.8500625646047552e-05, + "loss": 5.1544, + "step": 27275 + }, + { + "epoch": 9.802513464991023, + "grad_norm": 8.79022216796875, + "learning_rate": 1.848702464501387e-05, + "loss": 5.1111, + "step": 27300 + }, + { + "epoch": 9.811490125673249, + "grad_norm": 7.5442795753479, + "learning_rate": 1.8473423643980198e-05, + "loss": 5.1735, + "step": 27325 + }, + { + "epoch": 9.820466786355476, + "grad_norm": 8.588237762451172, + "learning_rate": 1.845982264294652e-05, + "loss": 5.2858, + "step": 27350 + }, + { + "epoch": 9.829443447037702, + "grad_norm": 7.225432872772217, + "learning_rate": 1.8446221641912844e-05, + "loss": 5.3278, + "step": 27375 + }, + { + "epoch": 9.838420107719928, + "grad_norm": 7.17155122756958, + "learning_rate": 1.843262064087917e-05, + "loss": 5.1761, + "step": 27400 + }, + { + "epoch": 9.847396768402154, + "grad_norm": 7.498125076293945, + "learning_rate": 1.8419019639845493e-05, + "loss": 5.2549, + "step": 27425 + }, + { + "epoch": 9.85637342908438, + "grad_norm": 8.352531433105469, + "learning_rate": 1.840541863881182e-05, + "loss": 5.2768, + "step": 27450 + }, + { + "epoch": 9.865350089766606, + "grad_norm": 7.20223331451416, + "learning_rate": 1.839181763777814e-05, + "loss": 5.2025, + "step": 27475 + }, + { + "epoch": 9.874326750448834, + "grad_norm": 9.017054557800293, + "learning_rate": 1.8378216636744465e-05, + "loss": 5.2389, + "step": 27500 + }, + { + "epoch": 9.88330341113106, + "grad_norm": 7.526697635650635, + "learning_rate": 1.8364615635710788e-05, + "loss": 5.2008, + "step": 27525 + }, + { + "epoch": 9.892280071813286, + "grad_norm": 7.77203893661499, + "learning_rate": 1.835101463467711e-05, + "loss": 5.1467, + "step": 27550 + }, + { + "epoch": 9.901256732495511, + "grad_norm": 7.317615032196045, + "learning_rate": 1.8337413633643437e-05, + "loss": 5.1913, + "step": 27575 + }, + { + "epoch": 9.910233393177737, + "grad_norm": 6.964707851409912, + "learning_rate": 1.832381263260976e-05, + "loss": 5.2664, + "step": 27600 + }, + { + "epoch": 9.919210053859963, + "grad_norm": 7.517217636108398, + "learning_rate": 1.8310211631576087e-05, + "loss": 5.1626, + "step": 27625 + }, + { + "epoch": 9.928186714542191, + "grad_norm": 8.242012023925781, + "learning_rate": 1.8296610630542406e-05, + "loss": 5.1806, + "step": 27650 + }, + { + "epoch": 9.937163375224417, + "grad_norm": 7.217047691345215, + "learning_rate": 1.8283009629508732e-05, + "loss": 5.3437, + "step": 27675 + }, + { + "epoch": 9.946140035906643, + "grad_norm": 6.790250778198242, + "learning_rate": 1.826940862847506e-05, + "loss": 5.2131, + "step": 27700 + }, + { + "epoch": 9.955116696588869, + "grad_norm": 7.725668430328369, + "learning_rate": 1.8255807627441378e-05, + "loss": 5.3043, + "step": 27725 + }, + { + "epoch": 9.964093357271095, + "grad_norm": 7.143831729888916, + "learning_rate": 1.8242206626407705e-05, + "loss": 5.1882, + "step": 27750 + }, + { + "epoch": 9.973070017953322, + "grad_norm": 7.224526882171631, + "learning_rate": 1.8228605625374028e-05, + "loss": 5.2941, + "step": 27775 + }, + { + "epoch": 9.982046678635548, + "grad_norm": 7.74325704574585, + "learning_rate": 1.8215004624340354e-05, + "loss": 5.2012, + "step": 27800 + }, + { + "epoch": 9.991023339317774, + "grad_norm": 7.4681010246276855, + "learning_rate": 1.8201403623306673e-05, + "loss": 5.1957, + "step": 27825 + }, + { + "epoch": 10.0, + "grad_norm": 8.227639198303223, + "learning_rate": 1.8187802622273e-05, + "loss": 5.2138, + "step": 27850 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.0766807324384702, + "eval_f1_macro": 0.005881966648615532, + "eval_f1_micro": 0.0766807324384702, + "eval_f1_weighted": 0.04105882210035551, + "eval_loss": 6.54189920425415, + "eval_precision_macro": 0.005382876379314217, + "eval_precision_micro": 0.0766807324384702, + "eval_precision_weighted": 0.03344441463652321, + "eval_recall_macro": 0.010776707049385745, + "eval_recall_micro": 0.0766807324384702, + "eval_recall_weighted": 0.0766807324384702, + "eval_runtime": 83.9161, + "eval_samples_per_second": 624.112, + "eval_steps_per_second": 9.76, + "step": 27850 + }, + { + "epoch": 10.008976660682226, + "grad_norm": 7.276993274688721, + "learning_rate": 1.8174201621239326e-05, + "loss": 4.939, + "step": 27875 + }, + { + "epoch": 10.017953321364452, + "grad_norm": 7.236202716827393, + "learning_rate": 1.8160600620205646e-05, + "loss": 4.8823, + "step": 27900 + }, + { + "epoch": 10.02692998204668, + "grad_norm": 7.216892242431641, + "learning_rate": 1.8146999619171972e-05, + "loss": 4.9623, + "step": 27925 + }, + { + "epoch": 10.035906642728905, + "grad_norm": 7.813600063323975, + "learning_rate": 1.8133398618138295e-05, + "loss": 4.9538, + "step": 27950 + }, + { + "epoch": 10.044883303411131, + "grad_norm": 7.846309185028076, + "learning_rate": 1.811979761710462e-05, + "loss": 5.0605, + "step": 27975 + }, + { + "epoch": 10.053859964093357, + "grad_norm": 6.994050979614258, + "learning_rate": 1.8106196616070944e-05, + "loss": 4.9402, + "step": 28000 + }, + { + "epoch": 10.062836624775583, + "grad_norm": 7.907347679138184, + "learning_rate": 1.8092595615037267e-05, + "loss": 5.0726, + "step": 28025 + }, + { + "epoch": 10.071813285457809, + "grad_norm": 7.17254114151001, + "learning_rate": 1.8078994614003593e-05, + "loss": 4.9497, + "step": 28050 + }, + { + "epoch": 10.080789946140037, + "grad_norm": 8.17119312286377, + "learning_rate": 1.8065393612969913e-05, + "loss": 4.9316, + "step": 28075 + }, + { + "epoch": 10.089766606822263, + "grad_norm": 7.243988990783691, + "learning_rate": 1.805179261193624e-05, + "loss": 4.9989, + "step": 28100 + }, + { + "epoch": 10.098743267504489, + "grad_norm": 8.219581604003906, + "learning_rate": 1.8038191610902562e-05, + "loss": 5.0118, + "step": 28125 + }, + { + "epoch": 10.107719928186714, + "grad_norm": 7.549900531768799, + "learning_rate": 1.802459060986889e-05, + "loss": 4.9528, + "step": 28150 + }, + { + "epoch": 10.11669658886894, + "grad_norm": 7.046352386474609, + "learning_rate": 1.801098960883521e-05, + "loss": 4.934, + "step": 28175 + }, + { + "epoch": 10.125673249551166, + "grad_norm": 7.900087356567383, + "learning_rate": 1.7997388607801534e-05, + "loss": 5.0368, + "step": 28200 + }, + { + "epoch": 10.134649910233394, + "grad_norm": 7.704578399658203, + "learning_rate": 1.798378760676786e-05, + "loss": 4.9989, + "step": 28225 + }, + { + "epoch": 10.14362657091562, + "grad_norm": 6.6125898361206055, + "learning_rate": 1.797018660573418e-05, + "loss": 5.0164, + "step": 28250 + }, + { + "epoch": 10.152603231597846, + "grad_norm": 8.289909362792969, + "learning_rate": 1.7956585604700506e-05, + "loss": 4.9847, + "step": 28275 + }, + { + "epoch": 10.161579892280072, + "grad_norm": 8.305643081665039, + "learning_rate": 1.794298460366683e-05, + "loss": 5.0201, + "step": 28300 + }, + { + "epoch": 10.170556552962298, + "grad_norm": 8.265864372253418, + "learning_rate": 1.7929383602633156e-05, + "loss": 4.946, + "step": 28325 + }, + { + "epoch": 10.179533213644524, + "grad_norm": 8.122498512268066, + "learning_rate": 1.791578260159948e-05, + "loss": 4.9684, + "step": 28350 + }, + { + "epoch": 10.188509874326751, + "grad_norm": 8.273490905761719, + "learning_rate": 1.79021816005658e-05, + "loss": 5.0407, + "step": 28375 + }, + { + "epoch": 10.197486535008977, + "grad_norm": 6.460000514984131, + "learning_rate": 1.7888580599532128e-05, + "loss": 4.9598, + "step": 28400 + }, + { + "epoch": 10.206463195691203, + "grad_norm": 7.623206615447998, + "learning_rate": 1.7874979598498447e-05, + "loss": 4.9417, + "step": 28425 + }, + { + "epoch": 10.215439856373429, + "grad_norm": 6.9796953201293945, + "learning_rate": 1.7861378597464774e-05, + "loss": 4.9872, + "step": 28450 + }, + { + "epoch": 10.224416517055655, + "grad_norm": 7.899072170257568, + "learning_rate": 1.78477775964311e-05, + "loss": 4.9633, + "step": 28475 + }, + { + "epoch": 10.23339317773788, + "grad_norm": 7.155921459197998, + "learning_rate": 1.783417659539742e-05, + "loss": 5.0636, + "step": 28500 + }, + { + "epoch": 10.242369838420109, + "grad_norm": 7.616751194000244, + "learning_rate": 1.7820575594363746e-05, + "loss": 5.0256, + "step": 28525 + }, + { + "epoch": 10.251346499102334, + "grad_norm": 7.987672328948975, + "learning_rate": 1.780697459333007e-05, + "loss": 5.0413, + "step": 28550 + }, + { + "epoch": 10.26032315978456, + "grad_norm": 7.827592849731445, + "learning_rate": 1.7793373592296395e-05, + "loss": 4.8632, + "step": 28575 + }, + { + "epoch": 10.269299820466786, + "grad_norm": 8.682778358459473, + "learning_rate": 1.7779772591262715e-05, + "loss": 5.0168, + "step": 28600 + }, + { + "epoch": 10.278276481149012, + "grad_norm": 7.714150905609131, + "learning_rate": 1.776617159022904e-05, + "loss": 5.0575, + "step": 28625 + }, + { + "epoch": 10.287253141831238, + "grad_norm": 7.102622032165527, + "learning_rate": 1.7752570589195367e-05, + "loss": 4.9852, + "step": 28650 + }, + { + "epoch": 10.296229802513466, + "grad_norm": 8.30379581451416, + "learning_rate": 1.7738969588161687e-05, + "loss": 5.0695, + "step": 28675 + }, + { + "epoch": 10.305206463195692, + "grad_norm": 7.784700393676758, + "learning_rate": 1.7725368587128013e-05, + "loss": 5.0777, + "step": 28700 + }, + { + "epoch": 10.314183123877918, + "grad_norm": 7.938167572021484, + "learning_rate": 1.7711767586094336e-05, + "loss": 5.0443, + "step": 28725 + }, + { + "epoch": 10.323159784560143, + "grad_norm": 7.890762805938721, + "learning_rate": 1.7698166585060662e-05, + "loss": 5.114, + "step": 28750 + }, + { + "epoch": 10.33213644524237, + "grad_norm": 7.677299499511719, + "learning_rate": 1.7684565584026985e-05, + "loss": 4.9514, + "step": 28775 + }, + { + "epoch": 10.341113105924595, + "grad_norm": 8.3126220703125, + "learning_rate": 1.7671508623034654e-05, + "loss": 4.9445, + "step": 28800 + }, + { + "epoch": 10.350089766606823, + "grad_norm": 8.443814277648926, + "learning_rate": 1.765790762200098e-05, + "loss": 4.9176, + "step": 28825 + }, + { + "epoch": 10.359066427289049, + "grad_norm": 8.087874412536621, + "learning_rate": 1.7644306620967304e-05, + "loss": 4.9607, + "step": 28850 + }, + { + "epoch": 10.368043087971275, + "grad_norm": 8.240647315979004, + "learning_rate": 1.7630705619933627e-05, + "loss": 4.9359, + "step": 28875 + }, + { + "epoch": 10.3770197486535, + "grad_norm": 7.977756977081299, + "learning_rate": 1.7617104618899953e-05, + "loss": 5.065, + "step": 28900 + }, + { + "epoch": 10.385996409335727, + "grad_norm": 8.636222839355469, + "learning_rate": 1.7603503617866276e-05, + "loss": 5.0336, + "step": 28925 + }, + { + "epoch": 10.394973070017953, + "grad_norm": 8.585232734680176, + "learning_rate": 1.75899026168326e-05, + "loss": 5.0054, + "step": 28950 + }, + { + "epoch": 10.40394973070018, + "grad_norm": 7.9171929359436035, + "learning_rate": 1.757630161579892e-05, + "loss": 5.0456, + "step": 28975 + }, + { + "epoch": 10.412926391382406, + "grad_norm": 7.4322829246521, + "learning_rate": 1.7562700614765248e-05, + "loss": 5.0695, + "step": 29000 + }, + { + "epoch": 10.421903052064632, + "grad_norm": 8.274045944213867, + "learning_rate": 1.754909961373157e-05, + "loss": 5.0174, + "step": 29025 + }, + { + "epoch": 10.430879712746858, + "grad_norm": 7.109821796417236, + "learning_rate": 1.7535498612697894e-05, + "loss": 5.0281, + "step": 29050 + }, + { + "epoch": 10.439856373429084, + "grad_norm": 6.920289516448975, + "learning_rate": 1.752189761166422e-05, + "loss": 5.0364, + "step": 29075 + }, + { + "epoch": 10.44883303411131, + "grad_norm": 7.733560562133789, + "learning_rate": 1.7508296610630543e-05, + "loss": 5.0145, + "step": 29100 + }, + { + "epoch": 10.457809694793538, + "grad_norm": 8.246260643005371, + "learning_rate": 1.7494695609596866e-05, + "loss": 5.1375, + "step": 29125 + }, + { + "epoch": 10.466786355475763, + "grad_norm": 8.865415573120117, + "learning_rate": 1.748109460856319e-05, + "loss": 5.0873, + "step": 29150 + }, + { + "epoch": 10.47576301615799, + "grad_norm": 8.248238563537598, + "learning_rate": 1.7467493607529515e-05, + "loss": 5.1112, + "step": 29175 + }, + { + "epoch": 10.484739676840215, + "grad_norm": 7.686662197113037, + "learning_rate": 1.7453892606495838e-05, + "loss": 4.9456, + "step": 29200 + }, + { + "epoch": 10.493716337522441, + "grad_norm": 7.759557247161865, + "learning_rate": 1.744029160546216e-05, + "loss": 4.9779, + "step": 29225 + }, + { + "epoch": 10.502692998204667, + "grad_norm": 7.4357008934021, + "learning_rate": 1.7426690604428487e-05, + "loss": 5.0908, + "step": 29250 + }, + { + "epoch": 10.511669658886895, + "grad_norm": 7.915367126464844, + "learning_rate": 1.741308960339481e-05, + "loss": 4.9314, + "step": 29275 + }, + { + "epoch": 10.52064631956912, + "grad_norm": 7.916555404663086, + "learning_rate": 1.7399488602361133e-05, + "loss": 5.0861, + "step": 29300 + }, + { + "epoch": 10.529622980251347, + "grad_norm": 7.079159259796143, + "learning_rate": 1.738588760132746e-05, + "loss": 5.0478, + "step": 29325 + }, + { + "epoch": 10.538599640933572, + "grad_norm": 7.84434175491333, + "learning_rate": 1.7372286600293783e-05, + "loss": 5.0682, + "step": 29350 + }, + { + "epoch": 10.547576301615798, + "grad_norm": 8.23287582397461, + "learning_rate": 1.7358685599260105e-05, + "loss": 5.0337, + "step": 29375 + }, + { + "epoch": 10.556552962298024, + "grad_norm": 8.18203353881836, + "learning_rate": 1.734508459822643e-05, + "loss": 4.902, + "step": 29400 + }, + { + "epoch": 10.565529622980252, + "grad_norm": 8.588555335998535, + "learning_rate": 1.7331483597192755e-05, + "loss": 5.0445, + "step": 29425 + }, + { + "epoch": 10.574506283662478, + "grad_norm": 7.672823429107666, + "learning_rate": 1.7317882596159078e-05, + "loss": 5.0024, + "step": 29450 + }, + { + "epoch": 10.583482944344704, + "grad_norm": 8.764299392700195, + "learning_rate": 1.73042815951254e-05, + "loss": 5.0329, + "step": 29475 + }, + { + "epoch": 10.59245960502693, + "grad_norm": 7.455743312835693, + "learning_rate": 1.7290680594091727e-05, + "loss": 5.0907, + "step": 29500 + }, + { + "epoch": 10.601436265709156, + "grad_norm": 7.188393592834473, + "learning_rate": 1.727707959305805e-05, + "loss": 5.0454, + "step": 29525 + }, + { + "epoch": 10.610412926391383, + "grad_norm": 8.448654174804688, + "learning_rate": 1.7263478592024373e-05, + "loss": 5.1182, + "step": 29550 + }, + { + "epoch": 10.61938958707361, + "grad_norm": 7.6640801429748535, + "learning_rate": 1.7249877590990696e-05, + "loss": 5.1112, + "step": 29575 + }, + { + "epoch": 10.628366247755835, + "grad_norm": 8.304560661315918, + "learning_rate": 1.7236276589957022e-05, + "loss": 5.0413, + "step": 29600 + }, + { + "epoch": 10.637342908438061, + "grad_norm": 9.096949577331543, + "learning_rate": 1.722267558892335e-05, + "loss": 5.0088, + "step": 29625 + }, + { + "epoch": 10.646319569120287, + "grad_norm": 8.072813034057617, + "learning_rate": 1.7209074587889668e-05, + "loss": 4.9998, + "step": 29650 + }, + { + "epoch": 10.655296229802513, + "grad_norm": 8.35558795928955, + "learning_rate": 1.7195473586855994e-05, + "loss": 4.9262, + "step": 29675 + }, + { + "epoch": 10.664272890484739, + "grad_norm": 8.07185173034668, + "learning_rate": 1.7181872585822317e-05, + "loss": 4.982, + "step": 29700 + }, + { + "epoch": 10.673249551166966, + "grad_norm": 9.148063659667969, + "learning_rate": 1.716827158478864e-05, + "loss": 5.0598, + "step": 29725 + }, + { + "epoch": 10.682226211849192, + "grad_norm": 7.212332725524902, + "learning_rate": 1.7154670583754963e-05, + "loss": 5.0303, + "step": 29750 + }, + { + "epoch": 10.691202872531418, + "grad_norm": 7.822390079498291, + "learning_rate": 1.714106958272129e-05, + "loss": 5.0627, + "step": 29775 + }, + { + "epoch": 10.700179533213644, + "grad_norm": 7.582313060760498, + "learning_rate": 1.7127468581687616e-05, + "loss": 5.086, + "step": 29800 + }, + { + "epoch": 10.70915619389587, + "grad_norm": 7.593709945678711, + "learning_rate": 1.7113867580653935e-05, + "loss": 5.0031, + "step": 29825 + }, + { + "epoch": 10.718132854578098, + "grad_norm": 8.269501686096191, + "learning_rate": 1.710026657962026e-05, + "loss": 5.0322, + "step": 29850 + }, + { + "epoch": 10.727109515260324, + "grad_norm": 7.703639984130859, + "learning_rate": 1.7086665578586584e-05, + "loss": 5.0514, + "step": 29875 + }, + { + "epoch": 10.73608617594255, + "grad_norm": 9.719590187072754, + "learning_rate": 1.7073064577552907e-05, + "loss": 4.9697, + "step": 29900 + }, + { + "epoch": 10.745062836624776, + "grad_norm": 7.571653366088867, + "learning_rate": 1.705946357651923e-05, + "loss": 4.8785, + "step": 29925 + }, + { + "epoch": 10.754039497307001, + "grad_norm": 8.096991539001465, + "learning_rate": 1.7045862575485557e-05, + "loss": 4.9724, + "step": 29950 + }, + { + "epoch": 10.763016157989227, + "grad_norm": 7.918676853179932, + "learning_rate": 1.7032261574451883e-05, + "loss": 5.0121, + "step": 29975 + }, + { + "epoch": 10.771992818671453, + "grad_norm": 8.695755958557129, + "learning_rate": 1.7018660573418202e-05, + "loss": 5.0289, + "step": 30000 + }, + { + "epoch": 10.780969479353681, + "grad_norm": 8.728045463562012, + "learning_rate": 1.700505957238453e-05, + "loss": 4.993, + "step": 30025 + }, + { + "epoch": 10.789946140035907, + "grad_norm": 7.1352362632751465, + "learning_rate": 1.699145857135085e-05, + "loss": 5.1395, + "step": 30050 + }, + { + "epoch": 10.798922800718133, + "grad_norm": 7.462558269500732, + "learning_rate": 1.6977857570317175e-05, + "loss": 5.1038, + "step": 30075 + }, + { + "epoch": 10.807899461400359, + "grad_norm": 7.070529937744141, + "learning_rate": 1.69642565692835e-05, + "loss": 5.0628, + "step": 30100 + }, + { + "epoch": 10.816876122082585, + "grad_norm": 7.522061824798584, + "learning_rate": 1.6950655568249824e-05, + "loss": 5.036, + "step": 30125 + }, + { + "epoch": 10.825852782764812, + "grad_norm": 8.275242805480957, + "learning_rate": 1.693705456721615e-05, + "loss": 5.0593, + "step": 30150 + }, + { + "epoch": 10.834829443447038, + "grad_norm": 7.643046855926514, + "learning_rate": 1.692345356618247e-05, + "loss": 5.1702, + "step": 30175 + }, + { + "epoch": 10.843806104129264, + "grad_norm": 7.2250142097473145, + "learning_rate": 1.6909852565148796e-05, + "loss": 4.9769, + "step": 30200 + }, + { + "epoch": 10.85278276481149, + "grad_norm": 7.88740348815918, + "learning_rate": 1.689625156411512e-05, + "loss": 5.0206, + "step": 30225 + }, + { + "epoch": 10.861759425493716, + "grad_norm": 8.478012084960938, + "learning_rate": 1.6882650563081442e-05, + "loss": 5.0604, + "step": 30250 + }, + { + "epoch": 10.870736086175942, + "grad_norm": 9.1868314743042, + "learning_rate": 1.6869049562047768e-05, + "loss": 5.0639, + "step": 30275 + }, + { + "epoch": 10.87971274685817, + "grad_norm": 7.54857063293457, + "learning_rate": 1.685544856101409e-05, + "loss": 4.9713, + "step": 30300 + }, + { + "epoch": 10.888689407540395, + "grad_norm": 8.36911392211914, + "learning_rate": 1.6841847559980414e-05, + "loss": 5.0068, + "step": 30325 + }, + { + "epoch": 10.897666068222621, + "grad_norm": 7.702461242675781, + "learning_rate": 1.6828246558946737e-05, + "loss": 5.1215, + "step": 30350 + }, + { + "epoch": 10.906642728904847, + "grad_norm": 8.160609245300293, + "learning_rate": 1.6814645557913063e-05, + "loss": 5.0604, + "step": 30375 + }, + { + "epoch": 10.915619389587073, + "grad_norm": 7.832348823547363, + "learning_rate": 1.680104455687939e-05, + "loss": 4.9942, + "step": 30400 + }, + { + "epoch": 10.9245960502693, + "grad_norm": 7.940217971801758, + "learning_rate": 1.678744355584571e-05, + "loss": 5.1235, + "step": 30425 + }, + { + "epoch": 10.933572710951527, + "grad_norm": 7.310400009155273, + "learning_rate": 1.6773842554812035e-05, + "loss": 5.0183, + "step": 30450 + }, + { + "epoch": 10.942549371633753, + "grad_norm": 7.305665969848633, + "learning_rate": 1.676024155377836e-05, + "loss": 5.1309, + "step": 30475 + }, + { + "epoch": 10.951526032315979, + "grad_norm": 7.888072967529297, + "learning_rate": 1.674664055274468e-05, + "loss": 5.0796, + "step": 30500 + }, + { + "epoch": 10.960502692998205, + "grad_norm": 8.254156112670898, + "learning_rate": 1.6733039551711004e-05, + "loss": 5.0336, + "step": 30525 + }, + { + "epoch": 10.96947935368043, + "grad_norm": 7.874939441680908, + "learning_rate": 1.671943855067733e-05, + "loss": 4.9987, + "step": 30550 + }, + { + "epoch": 10.978456014362656, + "grad_norm": 8.482148170471191, + "learning_rate": 1.6705837549643657e-05, + "loss": 5.0803, + "step": 30575 + }, + { + "epoch": 10.987432675044884, + "grad_norm": 8.02660083770752, + "learning_rate": 1.6692236548609976e-05, + "loss": 4.9727, + "step": 30600 + }, + { + "epoch": 10.99640933572711, + "grad_norm": 7.497283458709717, + "learning_rate": 1.6678635547576303e-05, + "loss": 5.0532, + "step": 30625 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.07314837798102075, + "eval_f1_macro": 0.006421033459222461, + "eval_f1_micro": 0.07314837798102075, + "eval_f1_weighted": 0.040269860452882486, + "eval_loss": 6.5203537940979, + "eval_precision_macro": 0.006061229613266478, + "eval_precision_micro": 0.07314837798102075, + "eval_precision_weighted": 0.033259023981108034, + "eval_recall_macro": 0.011246273078839693, + "eval_recall_micro": 0.07314837798102075, + "eval_recall_weighted": 0.07314837798102075, + "eval_runtime": 84.1294, + "eval_samples_per_second": 622.529, + "eval_steps_per_second": 9.735, + "step": 30635 + }, + { + "epoch": 11.005385996409336, + "grad_norm": 8.898094177246094, + "learning_rate": 1.6665034546542626e-05, + "loss": 4.8768, + "step": 30650 + }, + { + "epoch": 11.014362657091562, + "grad_norm": 8.0846586227417, + "learning_rate": 1.665143354550895e-05, + "loss": 4.8092, + "step": 30675 + }, + { + "epoch": 11.023339317773788, + "grad_norm": 7.904754161834717, + "learning_rate": 1.6637832544475275e-05, + "loss": 4.7553, + "step": 30700 + }, + { + "epoch": 11.032315978456014, + "grad_norm": 7.309874057769775, + "learning_rate": 1.6624231543441598e-05, + "loss": 4.7558, + "step": 30725 + }, + { + "epoch": 11.041292639138241, + "grad_norm": 7.7641825675964355, + "learning_rate": 1.6610630542407924e-05, + "loss": 4.7054, + "step": 30750 + }, + { + "epoch": 11.050269299820467, + "grad_norm": 8.40953254699707, + "learning_rate": 1.6597029541374244e-05, + "loss": 4.784, + "step": 30775 + }, + { + "epoch": 11.059245960502693, + "grad_norm": 7.054280757904053, + "learning_rate": 1.658342854034057e-05, + "loss": 4.7936, + "step": 30800 + }, + { + "epoch": 11.068222621184919, + "grad_norm": 8.159821510314941, + "learning_rate": 1.6569827539306893e-05, + "loss": 4.8276, + "step": 30825 + }, + { + "epoch": 11.077199281867145, + "grad_norm": 7.990320205688477, + "learning_rate": 1.6556226538273216e-05, + "loss": 4.8266, + "step": 30850 + }, + { + "epoch": 11.08617594254937, + "grad_norm": 7.418392658233643, + "learning_rate": 1.6542625537239542e-05, + "loss": 4.8449, + "step": 30875 + }, + { + "epoch": 11.095152603231599, + "grad_norm": 7.260545253753662, + "learning_rate": 1.6529024536205865e-05, + "loss": 4.8607, + "step": 30900 + }, + { + "epoch": 11.104129263913824, + "grad_norm": 8.180327415466309, + "learning_rate": 1.651542353517219e-05, + "loss": 4.7826, + "step": 30925 + }, + { + "epoch": 11.11310592459605, + "grad_norm": 8.261492729187012, + "learning_rate": 1.650182253413851e-05, + "loss": 4.8081, + "step": 30950 + }, + { + "epoch": 11.122082585278276, + "grad_norm": 8.875771522521973, + "learning_rate": 1.6488221533104837e-05, + "loss": 4.7243, + "step": 30975 + }, + { + "epoch": 11.131059245960502, + "grad_norm": 7.999340534210205, + "learning_rate": 1.647462053207116e-05, + "loss": 4.7089, + "step": 31000 + }, + { + "epoch": 11.140035906642728, + "grad_norm": 9.002677917480469, + "learning_rate": 1.6461019531037483e-05, + "loss": 4.755, + "step": 31025 + }, + { + "epoch": 11.149012567324956, + "grad_norm": 9.103066444396973, + "learning_rate": 1.644741853000381e-05, + "loss": 4.8387, + "step": 31050 + }, + { + "epoch": 11.157989228007182, + "grad_norm": 7.989136695861816, + "learning_rate": 1.6433817528970132e-05, + "loss": 4.7813, + "step": 31075 + }, + { + "epoch": 11.166965888689408, + "grad_norm": 8.806028366088867, + "learning_rate": 1.6420760567977805e-05, + "loss": 4.8914, + "step": 31100 + }, + { + "epoch": 11.175942549371634, + "grad_norm": 7.491745948791504, + "learning_rate": 1.6407159566944128e-05, + "loss": 4.7561, + "step": 31125 + }, + { + "epoch": 11.18491921005386, + "grad_norm": 8.100341796875, + "learning_rate": 1.639355856591045e-05, + "loss": 4.8512, + "step": 31150 + }, + { + "epoch": 11.193895870736085, + "grad_norm": 7.613574981689453, + "learning_rate": 1.6379957564876777e-05, + "loss": 4.8608, + "step": 31175 + }, + { + "epoch": 11.202872531418313, + "grad_norm": 8.9463529586792, + "learning_rate": 1.6366356563843097e-05, + "loss": 4.791, + "step": 31200 + }, + { + "epoch": 11.211849192100539, + "grad_norm": 8.524858474731445, + "learning_rate": 1.6352755562809423e-05, + "loss": 4.8849, + "step": 31225 + }, + { + "epoch": 11.220825852782765, + "grad_norm": 8.140938758850098, + "learning_rate": 1.633915456177575e-05, + "loss": 4.8177, + "step": 31250 + }, + { + "epoch": 11.22980251346499, + "grad_norm": 8.40640640258789, + "learning_rate": 1.6325553560742072e-05, + "loss": 4.8879, + "step": 31275 + }, + { + "epoch": 11.238779174147217, + "grad_norm": 8.89106559753418, + "learning_rate": 1.6311952559708395e-05, + "loss": 4.887, + "step": 31300 + }, + { + "epoch": 11.247755834829443, + "grad_norm": 7.773661136627197, + "learning_rate": 1.6298351558674718e-05, + "loss": 4.7402, + "step": 31325 + }, + { + "epoch": 11.25673249551167, + "grad_norm": 8.786142349243164, + "learning_rate": 1.6284750557641044e-05, + "loss": 4.8224, + "step": 31350 + }, + { + "epoch": 11.265709156193896, + "grad_norm": 7.951003551483154, + "learning_rate": 1.6271149556607364e-05, + "loss": 4.9239, + "step": 31375 + }, + { + "epoch": 11.274685816876122, + "grad_norm": 8.316591262817383, + "learning_rate": 1.625754855557369e-05, + "loss": 4.7942, + "step": 31400 + }, + { + "epoch": 11.283662477558348, + "grad_norm": 8.03123950958252, + "learning_rate": 1.6243947554540016e-05, + "loss": 4.8127, + "step": 31425 + }, + { + "epoch": 11.292639138240574, + "grad_norm": 8.406017303466797, + "learning_rate": 1.623034655350634e-05, + "loss": 4.7697, + "step": 31450 + }, + { + "epoch": 11.3016157989228, + "grad_norm": 7.728913307189941, + "learning_rate": 1.6216745552472662e-05, + "loss": 4.8782, + "step": 31475 + }, + { + "epoch": 11.310592459605028, + "grad_norm": 8.47448444366455, + "learning_rate": 1.6203144551438985e-05, + "loss": 4.8343, + "step": 31500 + }, + { + "epoch": 11.319569120287253, + "grad_norm": 8.40689468383789, + "learning_rate": 1.618954355040531e-05, + "loss": 4.7931, + "step": 31525 + }, + { + "epoch": 11.32854578096948, + "grad_norm": 7.780486106872559, + "learning_rate": 1.6175942549371635e-05, + "loss": 4.8747, + "step": 31550 + }, + { + "epoch": 11.337522441651705, + "grad_norm": 8.036104202270508, + "learning_rate": 1.6162341548337957e-05, + "loss": 4.8387, + "step": 31575 + }, + { + "epoch": 11.346499102333931, + "grad_norm": 7.894997596740723, + "learning_rate": 1.6148740547304284e-05, + "loss": 4.9925, + "step": 31600 + }, + { + "epoch": 11.355475763016157, + "grad_norm": 8.173030853271484, + "learning_rate": 1.6135139546270607e-05, + "loss": 4.8427, + "step": 31625 + }, + { + "epoch": 11.364452423698385, + "grad_norm": 7.987740516662598, + "learning_rate": 1.612153854523693e-05, + "loss": 4.7836, + "step": 31650 + }, + { + "epoch": 11.37342908438061, + "grad_norm": 7.833039283752441, + "learning_rate": 1.6107937544203253e-05, + "loss": 4.8766, + "step": 31675 + }, + { + "epoch": 11.382405745062837, + "grad_norm": 8.374659538269043, + "learning_rate": 1.609433654316958e-05, + "loss": 4.8383, + "step": 31700 + }, + { + "epoch": 11.391382405745063, + "grad_norm": 7.795012474060059, + "learning_rate": 1.6080735542135902e-05, + "loss": 4.9061, + "step": 31725 + }, + { + "epoch": 11.400359066427288, + "grad_norm": 8.98693561553955, + "learning_rate": 1.6067134541102225e-05, + "loss": 4.7357, + "step": 31750 + }, + { + "epoch": 11.409335727109514, + "grad_norm": 8.513385772705078, + "learning_rate": 1.605353354006855e-05, + "loss": 4.8678, + "step": 31775 + }, + { + "epoch": 11.418312387791742, + "grad_norm": 8.177802085876465, + "learning_rate": 1.6039932539034874e-05, + "loss": 4.8053, + "step": 31800 + }, + { + "epoch": 11.427289048473968, + "grad_norm": 7.711091995239258, + "learning_rate": 1.6026331538001197e-05, + "loss": 4.8752, + "step": 31825 + }, + { + "epoch": 11.436265709156194, + "grad_norm": 8.212480545043945, + "learning_rate": 1.601273053696752e-05, + "loss": 4.8509, + "step": 31850 + }, + { + "epoch": 11.44524236983842, + "grad_norm": 7.493651866912842, + "learning_rate": 1.5999129535933846e-05, + "loss": 4.9481, + "step": 31875 + }, + { + "epoch": 11.454219030520646, + "grad_norm": 7.9831461906433105, + "learning_rate": 1.598552853490017e-05, + "loss": 4.8449, + "step": 31900 + }, + { + "epoch": 11.463195691202873, + "grad_norm": 7.775557041168213, + "learning_rate": 1.5971927533866492e-05, + "loss": 4.8082, + "step": 31925 + }, + { + "epoch": 11.4721723518851, + "grad_norm": 7.9648356437683105, + "learning_rate": 1.595832653283282e-05, + "loss": 4.9417, + "step": 31950 + }, + { + "epoch": 11.481149012567325, + "grad_norm": 8.533836364746094, + "learning_rate": 1.5944725531799138e-05, + "loss": 4.9428, + "step": 31975 + }, + { + "epoch": 11.490125673249551, + "grad_norm": 8.788683891296387, + "learning_rate": 1.5931124530765464e-05, + "loss": 4.8762, + "step": 32000 + }, + { + "epoch": 11.499102333931777, + "grad_norm": 7.544762134552002, + "learning_rate": 1.591752352973179e-05, + "loss": 4.8514, + "step": 32025 + }, + { + "epoch": 11.508078994614003, + "grad_norm": 8.294242858886719, + "learning_rate": 1.5903922528698113e-05, + "loss": 4.8517, + "step": 32050 + }, + { + "epoch": 11.517055655296229, + "grad_norm": 8.281182289123535, + "learning_rate": 1.5890321527664436e-05, + "loss": 4.7636, + "step": 32075 + }, + { + "epoch": 11.526032315978457, + "grad_norm": 8.654154777526855, + "learning_rate": 1.587672052663076e-05, + "loss": 4.8507, + "step": 32100 + }, + { + "epoch": 11.535008976660682, + "grad_norm": 8.767759323120117, + "learning_rate": 1.5863119525597086e-05, + "loss": 4.8539, + "step": 32125 + }, + { + "epoch": 11.543985637342908, + "grad_norm": 8.36111831665039, + "learning_rate": 1.5849518524563405e-05, + "loss": 4.9447, + "step": 32150 + }, + { + "epoch": 11.552962298025134, + "grad_norm": 7.766678810119629, + "learning_rate": 1.583591752352973e-05, + "loss": 4.8802, + "step": 32175 + }, + { + "epoch": 11.56193895870736, + "grad_norm": 8.54783821105957, + "learning_rate": 1.5822316522496058e-05, + "loss": 4.8536, + "step": 32200 + }, + { + "epoch": 11.570915619389588, + "grad_norm": 8.367302894592285, + "learning_rate": 1.580871552146238e-05, + "loss": 4.9505, + "step": 32225 + }, + { + "epoch": 11.579892280071814, + "grad_norm": 7.6094160079956055, + "learning_rate": 1.5795114520428704e-05, + "loss": 4.8756, + "step": 32250 + }, + { + "epoch": 11.58886894075404, + "grad_norm": 8.87820816040039, + "learning_rate": 1.5781513519395027e-05, + "loss": 4.8263, + "step": 32275 + }, + { + "epoch": 11.597845601436266, + "grad_norm": 7.780447006225586, + "learning_rate": 1.57684565584027e-05, + "loss": 4.8342, + "step": 32300 + }, + { + "epoch": 11.606822262118492, + "grad_norm": 8.519152641296387, + "learning_rate": 1.5754855557369022e-05, + "loss": 4.8254, + "step": 32325 + }, + { + "epoch": 11.615798922800717, + "grad_norm": 8.022672653198242, + "learning_rate": 1.5741254556335345e-05, + "loss": 4.8677, + "step": 32350 + }, + { + "epoch": 11.624775583482945, + "grad_norm": 8.010677337646484, + "learning_rate": 1.572765355530167e-05, + "loss": 4.8563, + "step": 32375 + }, + { + "epoch": 11.633752244165171, + "grad_norm": 8.195467948913574, + "learning_rate": 1.5714052554267997e-05, + "loss": 4.7971, + "step": 32400 + }, + { + "epoch": 11.642728904847397, + "grad_norm": 8.235554695129395, + "learning_rate": 1.5700451553234317e-05, + "loss": 4.8436, + "step": 32425 + }, + { + "epoch": 11.651705565529623, + "grad_norm": 9.141916275024414, + "learning_rate": 1.5686850552200643e-05, + "loss": 4.956, + "step": 32450 + }, + { + "epoch": 11.660682226211849, + "grad_norm": 8.497591018676758, + "learning_rate": 1.5673249551166966e-05, + "loss": 4.8374, + "step": 32475 + }, + { + "epoch": 11.669658886894075, + "grad_norm": 8.142252922058105, + "learning_rate": 1.565964855013329e-05, + "loss": 4.9598, + "step": 32500 + }, + { + "epoch": 11.678635547576302, + "grad_norm": 8.242039680480957, + "learning_rate": 1.5646047549099612e-05, + "loss": 4.8601, + "step": 32525 + }, + { + "epoch": 11.687612208258528, + "grad_norm": 7.958847999572754, + "learning_rate": 1.563244654806594e-05, + "loss": 4.8613, + "step": 32550 + }, + { + "epoch": 11.696588868940754, + "grad_norm": 8.301851272583008, + "learning_rate": 1.5618845547032265e-05, + "loss": 4.868, + "step": 32575 + }, + { + "epoch": 11.70556552962298, + "grad_norm": 9.009356498718262, + "learning_rate": 1.5605244545998584e-05, + "loss": 4.9444, + "step": 32600 + }, + { + "epoch": 11.714542190305206, + "grad_norm": 7.792937278747559, + "learning_rate": 1.559164354496491e-05, + "loss": 4.8718, + "step": 32625 + }, + { + "epoch": 11.723518850987432, + "grad_norm": 8.687966346740723, + "learning_rate": 1.5578042543931234e-05, + "loss": 4.8907, + "step": 32650 + }, + { + "epoch": 11.73249551166966, + "grad_norm": 7.966274261474609, + "learning_rate": 1.5564441542897556e-05, + "loss": 4.8476, + "step": 32675 + }, + { + "epoch": 11.741472172351886, + "grad_norm": 9.449064254760742, + "learning_rate": 1.555084054186388e-05, + "loss": 4.9233, + "step": 32700 + }, + { + "epoch": 11.750448833034111, + "grad_norm": 7.465609550476074, + "learning_rate": 1.5537239540830206e-05, + "loss": 4.802, + "step": 32725 + }, + { + "epoch": 11.759425493716337, + "grad_norm": 8.09386920928955, + "learning_rate": 1.5523638539796532e-05, + "loss": 4.8298, + "step": 32750 + }, + { + "epoch": 11.768402154398563, + "grad_norm": 8.106112480163574, + "learning_rate": 1.551003753876285e-05, + "loss": 4.8012, + "step": 32775 + }, + { + "epoch": 11.77737881508079, + "grad_norm": 8.228652954101562, + "learning_rate": 1.5496436537729178e-05, + "loss": 4.8801, + "step": 32800 + }, + { + "epoch": 11.786355475763017, + "grad_norm": 7.642451286315918, + "learning_rate": 1.54828355366955e-05, + "loss": 4.8919, + "step": 32825 + }, + { + "epoch": 11.795332136445243, + "grad_norm": 8.064610481262207, + "learning_rate": 1.5469234535661824e-05, + "loss": 4.8428, + "step": 32850 + }, + { + "epoch": 11.804308797127469, + "grad_norm": 8.48971176147461, + "learning_rate": 1.545563353462815e-05, + "loss": 4.9439, + "step": 32875 + }, + { + "epoch": 11.813285457809695, + "grad_norm": 7.698769569396973, + "learning_rate": 1.5442032533594473e-05, + "loss": 4.8217, + "step": 32900 + }, + { + "epoch": 11.82226211849192, + "grad_norm": 8.456120491027832, + "learning_rate": 1.54284315325608e-05, + "loss": 4.8259, + "step": 32925 + }, + { + "epoch": 11.831238779174146, + "grad_norm": 8.672999382019043, + "learning_rate": 1.541483053152712e-05, + "loss": 4.8166, + "step": 32950 + }, + { + "epoch": 11.840215439856374, + "grad_norm": 7.601571559906006, + "learning_rate": 1.5401229530493445e-05, + "loss": 4.9438, + "step": 32975 + }, + { + "epoch": 11.8491921005386, + "grad_norm": 8.868193626403809, + "learning_rate": 1.5387628529459768e-05, + "loss": 4.7487, + "step": 33000 + }, + { + "epoch": 11.858168761220826, + "grad_norm": 9.143499374389648, + "learning_rate": 1.537402752842609e-05, + "loss": 4.8602, + "step": 33025 + }, + { + "epoch": 11.867145421903052, + "grad_norm": 7.738324165344238, + "learning_rate": 1.5360426527392417e-05, + "loss": 4.9124, + "step": 33050 + }, + { + "epoch": 11.876122082585278, + "grad_norm": 7.978902339935303, + "learning_rate": 1.534682552635874e-05, + "loss": 4.9049, + "step": 33075 + }, + { + "epoch": 11.885098743267504, + "grad_norm": 8.905729293823242, + "learning_rate": 1.5333224525325067e-05, + "loss": 4.8811, + "step": 33100 + }, + { + "epoch": 11.894075403949731, + "grad_norm": 8.805572509765625, + "learning_rate": 1.5319623524291386e-05, + "loss": 4.9235, + "step": 33125 + }, + { + "epoch": 11.903052064631957, + "grad_norm": 8.164290428161621, + "learning_rate": 1.5306022523257712e-05, + "loss": 4.9654, + "step": 33150 + }, + { + "epoch": 11.912028725314183, + "grad_norm": 8.080095291137695, + "learning_rate": 1.529242152222404e-05, + "loss": 4.9264, + "step": 33175 + }, + { + "epoch": 11.92100538599641, + "grad_norm": 8.312941551208496, + "learning_rate": 1.527882052119036e-05, + "loss": 4.8658, + "step": 33200 + }, + { + "epoch": 11.929982046678635, + "grad_norm": 8.584061622619629, + "learning_rate": 1.5265219520156685e-05, + "loss": 4.8807, + "step": 33225 + }, + { + "epoch": 11.938958707360861, + "grad_norm": 8.36880874633789, + "learning_rate": 1.525161851912301e-05, + "loss": 4.9241, + "step": 33250 + }, + { + "epoch": 11.947935368043089, + "grad_norm": 8.83968734741211, + "learning_rate": 1.5238017518089334e-05, + "loss": 4.909, + "step": 33275 + }, + { + "epoch": 11.956912028725315, + "grad_norm": 8.05284309387207, + "learning_rate": 1.5224416517055655e-05, + "loss": 4.92, + "step": 33300 + }, + { + "epoch": 11.96588868940754, + "grad_norm": 8.612296104431152, + "learning_rate": 1.521081551602198e-05, + "loss": 4.9934, + "step": 33325 + }, + { + "epoch": 11.974865350089766, + "grad_norm": 8.416522979736328, + "learning_rate": 1.5197214514988304e-05, + "loss": 4.9173, + "step": 33350 + }, + { + "epoch": 11.983842010771992, + "grad_norm": 8.248833656311035, + "learning_rate": 1.5183613513954626e-05, + "loss": 4.8937, + "step": 33375 + }, + { + "epoch": 11.992818671454218, + "grad_norm": 8.20731258392334, + "learning_rate": 1.517001251292095e-05, + "loss": 4.8932, + "step": 33400 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.07209821854772497, + "eval_f1_macro": 0.007216699606688768, + "eval_f1_micro": 0.07209821854772497, + "eval_f1_weighted": 0.041330168217731, + "eval_loss": 6.500277996063232, + "eval_precision_macro": 0.006631849650295026, + "eval_precision_micro": 0.07209821854772497, + "eval_precision_weighted": 0.03408674379657757, + "eval_recall_macro": 0.012460278505332953, + "eval_recall_micro": 0.07209821854772497, + "eval_recall_weighted": 0.07209821854772497, + "eval_runtime": 83.9414, + "eval_samples_per_second": 623.923, + "eval_steps_per_second": 9.757, + "step": 33420 + }, + { + "epoch": 12.001795332136446, + "grad_norm": 8.642326354980469, + "learning_rate": 1.5156411511887277e-05, + "loss": 4.8163, + "step": 33425 + }, + { + "epoch": 12.010771992818672, + "grad_norm": 8.684854507446289, + "learning_rate": 1.5142810510853601e-05, + "loss": 4.7008, + "step": 33450 + }, + { + "epoch": 12.019748653500898, + "grad_norm": 7.741324424743652, + "learning_rate": 1.5129209509819922e-05, + "loss": 4.6123, + "step": 33475 + }, + { + "epoch": 12.028725314183124, + "grad_norm": 7.780877590179443, + "learning_rate": 1.5115608508786247e-05, + "loss": 4.5226, + "step": 33500 + }, + { + "epoch": 12.03770197486535, + "grad_norm": 8.698841094970703, + "learning_rate": 1.5102007507752572e-05, + "loss": 4.6065, + "step": 33525 + }, + { + "epoch": 12.046678635547575, + "grad_norm": 8.49246597290039, + "learning_rate": 1.5088406506718893e-05, + "loss": 4.631, + "step": 33550 + }, + { + "epoch": 12.055655296229803, + "grad_norm": 8.557188987731934, + "learning_rate": 1.507480550568522e-05, + "loss": 4.8242, + "step": 33575 + }, + { + "epoch": 12.064631956912029, + "grad_norm": 8.640410423278809, + "learning_rate": 1.5061204504651544e-05, + "loss": 4.6509, + "step": 33600 + }, + { + "epoch": 12.073608617594255, + "grad_norm": 8.739526748657227, + "learning_rate": 1.5047603503617868e-05, + "loss": 4.6621, + "step": 33625 + }, + { + "epoch": 12.08258527827648, + "grad_norm": 8.510025978088379, + "learning_rate": 1.503400250258419e-05, + "loss": 4.5852, + "step": 33650 + }, + { + "epoch": 12.091561938958707, + "grad_norm": 8.565057754516602, + "learning_rate": 1.5020401501550514e-05, + "loss": 4.6589, + "step": 33675 + }, + { + "epoch": 12.100538599640933, + "grad_norm": 7.9765825271606445, + "learning_rate": 1.5006800500516839e-05, + "loss": 4.7044, + "step": 33700 + }, + { + "epoch": 12.10951526032316, + "grad_norm": 8.809619903564453, + "learning_rate": 1.4993199499483164e-05, + "loss": 4.6858, + "step": 33725 + }, + { + "epoch": 12.118491921005386, + "grad_norm": 8.890826225280762, + "learning_rate": 1.4979598498449486e-05, + "loss": 4.6303, + "step": 33750 + }, + { + "epoch": 12.127468581687612, + "grad_norm": 8.3714599609375, + "learning_rate": 1.4965997497415811e-05, + "loss": 4.6781, + "step": 33775 + }, + { + "epoch": 12.136445242369838, + "grad_norm": 8.766016960144043, + "learning_rate": 1.4952396496382134e-05, + "loss": 4.7083, + "step": 33800 + }, + { + "epoch": 12.145421903052064, + "grad_norm": 9.24152946472168, + "learning_rate": 1.4938795495348457e-05, + "loss": 4.7265, + "step": 33825 + }, + { + "epoch": 12.15439856373429, + "grad_norm": 9.245137214660645, + "learning_rate": 1.4925194494314782e-05, + "loss": 4.5839, + "step": 33850 + }, + { + "epoch": 12.163375224416518, + "grad_norm": 9.182629585266113, + "learning_rate": 1.4911593493281106e-05, + "loss": 4.735, + "step": 33875 + }, + { + "epoch": 12.172351885098744, + "grad_norm": 8.134419441223145, + "learning_rate": 1.489799249224743e-05, + "loss": 4.7339, + "step": 33900 + }, + { + "epoch": 12.18132854578097, + "grad_norm": 8.665135383605957, + "learning_rate": 1.4884391491213754e-05, + "loss": 4.606, + "step": 33925 + }, + { + "epoch": 12.190305206463195, + "grad_norm": 7.821691036224365, + "learning_rate": 1.4870790490180078e-05, + "loss": 4.5459, + "step": 33950 + }, + { + "epoch": 12.199281867145421, + "grad_norm": 7.441476345062256, + "learning_rate": 1.4857189489146401e-05, + "loss": 4.646, + "step": 33975 + }, + { + "epoch": 12.208258527827649, + "grad_norm": 8.2256498336792, + "learning_rate": 1.4843588488112724e-05, + "loss": 4.6312, + "step": 34000 + }, + { + "epoch": 12.217235188509875, + "grad_norm": 9.038784980773926, + "learning_rate": 1.482998748707905e-05, + "loss": 4.8181, + "step": 34025 + }, + { + "epoch": 12.2262118491921, + "grad_norm": 7.884214401245117, + "learning_rate": 1.4816386486045373e-05, + "loss": 4.6976, + "step": 34050 + }, + { + "epoch": 12.235188509874327, + "grad_norm": 8.428956031799316, + "learning_rate": 1.4802785485011698e-05, + "loss": 4.7047, + "step": 34075 + }, + { + "epoch": 12.244165170556553, + "grad_norm": 8.247714042663574, + "learning_rate": 1.4789184483978021e-05, + "loss": 4.666, + "step": 34100 + }, + { + "epoch": 12.253141831238779, + "grad_norm": 8.428367614746094, + "learning_rate": 1.4775583482944344e-05, + "loss": 4.7415, + "step": 34125 + }, + { + "epoch": 12.262118491921006, + "grad_norm": 8.919046401977539, + "learning_rate": 1.4761982481910669e-05, + "loss": 4.7935, + "step": 34150 + }, + { + "epoch": 12.271095152603232, + "grad_norm": 8.127045631408691, + "learning_rate": 1.4748381480876992e-05, + "loss": 4.6196, + "step": 34175 + }, + { + "epoch": 12.280071813285458, + "grad_norm": 8.258234977722168, + "learning_rate": 1.4734780479843318e-05, + "loss": 4.6884, + "step": 34200 + }, + { + "epoch": 12.289048473967684, + "grad_norm": 7.902827262878418, + "learning_rate": 1.472117947880964e-05, + "loss": 4.6345, + "step": 34225 + }, + { + "epoch": 12.29802513464991, + "grad_norm": 7.8772501945495605, + "learning_rate": 1.4707578477775965e-05, + "loss": 4.6473, + "step": 34250 + }, + { + "epoch": 12.307001795332136, + "grad_norm": 8.022130012512207, + "learning_rate": 1.4693977476742288e-05, + "loss": 4.576, + "step": 34275 + }, + { + "epoch": 12.315978456014363, + "grad_norm": 8.064008712768555, + "learning_rate": 1.4680376475708611e-05, + "loss": 4.6654, + "step": 34300 + }, + { + "epoch": 12.32495511669659, + "grad_norm": 9.221813201904297, + "learning_rate": 1.4666775474674936e-05, + "loss": 4.6492, + "step": 34325 + }, + { + "epoch": 12.333931777378815, + "grad_norm": 7.877941131591797, + "learning_rate": 1.465317447364126e-05, + "loss": 4.751, + "step": 34350 + }, + { + "epoch": 12.342908438061041, + "grad_norm": 8.346281051635742, + "learning_rate": 1.4639573472607585e-05, + "loss": 4.7046, + "step": 34375 + }, + { + "epoch": 12.351885098743267, + "grad_norm": 8.351161003112793, + "learning_rate": 1.4625972471573908e-05, + "loss": 4.6569, + "step": 34400 + }, + { + "epoch": 12.360861759425493, + "grad_norm": 9.28866958618164, + "learning_rate": 1.4612371470540233e-05, + "loss": 4.6847, + "step": 34425 + }, + { + "epoch": 12.36983842010772, + "grad_norm": 9.501315116882324, + "learning_rate": 1.4598770469506556e-05, + "loss": 4.7445, + "step": 34450 + }, + { + "epoch": 12.378815080789947, + "grad_norm": 8.60990047454834, + "learning_rate": 1.4585169468472879e-05, + "loss": 4.8007, + "step": 34475 + }, + { + "epoch": 12.387791741472173, + "grad_norm": 8.14642333984375, + "learning_rate": 1.4571568467439205e-05, + "loss": 4.6577, + "step": 34500 + }, + { + "epoch": 12.396768402154398, + "grad_norm": 8.0878324508667, + "learning_rate": 1.4557967466405528e-05, + "loss": 4.6818, + "step": 34525 + }, + { + "epoch": 12.405745062836624, + "grad_norm": 8.100197792053223, + "learning_rate": 1.4544366465371852e-05, + "loss": 4.7214, + "step": 34550 + }, + { + "epoch": 12.41472172351885, + "grad_norm": 8.402783393859863, + "learning_rate": 1.4530765464338175e-05, + "loss": 4.7036, + "step": 34575 + }, + { + "epoch": 12.423698384201078, + "grad_norm": 7.980328559875488, + "learning_rate": 1.45171644633045e-05, + "loss": 4.5793, + "step": 34600 + }, + { + "epoch": 12.432675044883304, + "grad_norm": 8.462631225585938, + "learning_rate": 1.4503563462270823e-05, + "loss": 4.7704, + "step": 34625 + }, + { + "epoch": 12.44165170556553, + "grad_norm": 9.38621711730957, + "learning_rate": 1.4489962461237147e-05, + "loss": 4.6966, + "step": 34650 + }, + { + "epoch": 12.450628366247756, + "grad_norm": 10.412318229675293, + "learning_rate": 1.4476361460203472e-05, + "loss": 4.772, + "step": 34675 + }, + { + "epoch": 12.459605026929982, + "grad_norm": 7.9969072341918945, + "learning_rate": 1.4462760459169795e-05, + "loss": 4.7298, + "step": 34700 + }, + { + "epoch": 12.468581687612208, + "grad_norm": 8.923830032348633, + "learning_rate": 1.444915945813612e-05, + "loss": 4.7761, + "step": 34725 + }, + { + "epoch": 12.477558348294435, + "grad_norm": 7.824152946472168, + "learning_rate": 1.4435558457102443e-05, + "loss": 4.7039, + "step": 34750 + }, + { + "epoch": 12.486535008976661, + "grad_norm": 7.9384942054748535, + "learning_rate": 1.4421957456068767e-05, + "loss": 4.7317, + "step": 34775 + }, + { + "epoch": 12.495511669658887, + "grad_norm": 8.450387001037598, + "learning_rate": 1.4408356455035092e-05, + "loss": 4.7323, + "step": 34800 + }, + { + "epoch": 12.504488330341113, + "grad_norm": 7.7471113204956055, + "learning_rate": 1.4394755454001415e-05, + "loss": 4.6987, + "step": 34825 + }, + { + "epoch": 12.513464991023339, + "grad_norm": 8.074797630310059, + "learning_rate": 1.438115445296774e-05, + "loss": 4.7648, + "step": 34850 + }, + { + "epoch": 12.522441651705565, + "grad_norm": 9.347793579101562, + "learning_rate": 1.4367553451934062e-05, + "loss": 4.776, + "step": 34875 + }, + { + "epoch": 12.531418312387792, + "grad_norm": 7.997150897979736, + "learning_rate": 1.4353952450900387e-05, + "loss": 4.6506, + "step": 34900 + }, + { + "epoch": 12.540394973070018, + "grad_norm": 8.90993881225586, + "learning_rate": 1.434035144986671e-05, + "loss": 4.5933, + "step": 34925 + }, + { + "epoch": 12.549371633752244, + "grad_norm": 8.780791282653809, + "learning_rate": 1.4326750448833034e-05, + "loss": 4.7952, + "step": 34950 + }, + { + "epoch": 12.55834829443447, + "grad_norm": 7.791647911071777, + "learning_rate": 1.4313149447799359e-05, + "loss": 4.8505, + "step": 34975 + }, + { + "epoch": 12.567324955116696, + "grad_norm": 8.633427619934082, + "learning_rate": 1.4299548446765682e-05, + "loss": 4.7511, + "step": 35000 + }, + { + "epoch": 12.576301615798922, + "grad_norm": 8.751585960388184, + "learning_rate": 1.4285947445732007e-05, + "loss": 4.7811, + "step": 35025 + }, + { + "epoch": 12.58527827648115, + "grad_norm": 8.686301231384277, + "learning_rate": 1.427234644469833e-05, + "loss": 4.7341, + "step": 35050 + }, + { + "epoch": 12.594254937163376, + "grad_norm": 8.417878150939941, + "learning_rate": 1.4258745443664654e-05, + "loss": 4.6998, + "step": 35075 + }, + { + "epoch": 12.603231597845602, + "grad_norm": 9.090867042541504, + "learning_rate": 1.4245144442630977e-05, + "loss": 4.7294, + "step": 35100 + }, + { + "epoch": 12.612208258527827, + "grad_norm": 7.993725776672363, + "learning_rate": 1.4231543441597302e-05, + "loss": 4.7463, + "step": 35125 + }, + { + "epoch": 12.621184919210053, + "grad_norm": 7.698258876800537, + "learning_rate": 1.4217942440563626e-05, + "loss": 4.7491, + "step": 35150 + }, + { + "epoch": 12.63016157989228, + "grad_norm": 9.09363079071045, + "learning_rate": 1.420434143952995e-05, + "loss": 4.7705, + "step": 35175 + }, + { + "epoch": 12.639138240574507, + "grad_norm": 8.536531448364258, + "learning_rate": 1.4190740438496274e-05, + "loss": 4.6576, + "step": 35200 + }, + { + "epoch": 12.648114901256733, + "grad_norm": 8.457048416137695, + "learning_rate": 1.4177139437462597e-05, + "loss": 4.7441, + "step": 35225 + }, + { + "epoch": 12.657091561938959, + "grad_norm": 8.237467765808105, + "learning_rate": 1.4163538436428921e-05, + "loss": 4.7563, + "step": 35250 + }, + { + "epoch": 12.666068222621185, + "grad_norm": 8.298392295837402, + "learning_rate": 1.4149937435395246e-05, + "loss": 4.7755, + "step": 35275 + }, + { + "epoch": 12.67504488330341, + "grad_norm": 8.675517082214355, + "learning_rate": 1.4136336434361569e-05, + "loss": 4.756, + "step": 35300 + }, + { + "epoch": 12.684021543985637, + "grad_norm": 9.810907363891602, + "learning_rate": 1.4122735433327894e-05, + "loss": 4.7385, + "step": 35325 + }, + { + "epoch": 12.692998204667864, + "grad_norm": 8.97633171081543, + "learning_rate": 1.4109134432294217e-05, + "loss": 4.6796, + "step": 35350 + }, + { + "epoch": 12.70197486535009, + "grad_norm": 8.664902687072754, + "learning_rate": 1.4095533431260541e-05, + "loss": 4.7459, + "step": 35375 + }, + { + "epoch": 12.710951526032316, + "grad_norm": 9.40893268585205, + "learning_rate": 1.4081932430226864e-05, + "loss": 4.7951, + "step": 35400 + }, + { + "epoch": 12.719928186714542, + "grad_norm": 8.80351448059082, + "learning_rate": 1.406833142919319e-05, + "loss": 4.72, + "step": 35425 + }, + { + "epoch": 12.728904847396768, + "grad_norm": 9.225421905517578, + "learning_rate": 1.4054730428159513e-05, + "loss": 4.795, + "step": 35450 + }, + { + "epoch": 12.737881508078996, + "grad_norm": 8.377726554870605, + "learning_rate": 1.4041129427125836e-05, + "loss": 4.6235, + "step": 35475 + }, + { + "epoch": 12.746858168761221, + "grad_norm": 8.286016464233398, + "learning_rate": 1.4027528426092161e-05, + "loss": 4.7279, + "step": 35500 + }, + { + "epoch": 12.755834829443447, + "grad_norm": 8.480469703674316, + "learning_rate": 1.4013927425058484e-05, + "loss": 4.7166, + "step": 35525 + }, + { + "epoch": 12.764811490125673, + "grad_norm": 8.793001174926758, + "learning_rate": 1.4000326424024808e-05, + "loss": 4.7182, + "step": 35550 + }, + { + "epoch": 12.7737881508079, + "grad_norm": 8.343633651733398, + "learning_rate": 1.3986725422991133e-05, + "loss": 4.7926, + "step": 35575 + }, + { + "epoch": 12.782764811490125, + "grad_norm": 9.458440780639648, + "learning_rate": 1.3973124421957458e-05, + "loss": 4.7571, + "step": 35600 + }, + { + "epoch": 12.791741472172351, + "grad_norm": 8.578448295593262, + "learning_rate": 1.395952342092378e-05, + "loss": 4.7207, + "step": 35625 + }, + { + "epoch": 12.800718132854579, + "grad_norm": 7.723859786987305, + "learning_rate": 1.3945922419890104e-05, + "loss": 4.7603, + "step": 35650 + }, + { + "epoch": 12.809694793536805, + "grad_norm": 8.46157169342041, + "learning_rate": 1.3932321418856428e-05, + "loss": 4.8012, + "step": 35675 + }, + { + "epoch": 12.81867145421903, + "grad_norm": 9.227755546569824, + "learning_rate": 1.3918720417822751e-05, + "loss": 4.5773, + "step": 35700 + }, + { + "epoch": 12.827648114901256, + "grad_norm": 9.501360893249512, + "learning_rate": 1.3905119416789076e-05, + "loss": 4.7102, + "step": 35725 + }, + { + "epoch": 12.836624775583482, + "grad_norm": 8.756332397460938, + "learning_rate": 1.38915184157554e-05, + "loss": 4.6886, + "step": 35750 + }, + { + "epoch": 12.84560143626571, + "grad_norm": 8.747485160827637, + "learning_rate": 1.3877917414721723e-05, + "loss": 4.7554, + "step": 35775 + }, + { + "epoch": 12.854578096947936, + "grad_norm": 8.358922004699707, + "learning_rate": 1.3864316413688048e-05, + "loss": 4.6958, + "step": 35800 + }, + { + "epoch": 12.863554757630162, + "grad_norm": 9.115659713745117, + "learning_rate": 1.3850715412654371e-05, + "loss": 4.6392, + "step": 35825 + }, + { + "epoch": 12.872531418312388, + "grad_norm": 8.072962760925293, + "learning_rate": 1.3837114411620695e-05, + "loss": 4.66, + "step": 35850 + }, + { + "epoch": 12.881508078994614, + "grad_norm": 8.361124038696289, + "learning_rate": 1.3823513410587018e-05, + "loss": 4.7277, + "step": 35875 + }, + { + "epoch": 12.89048473967684, + "grad_norm": 9.09039306640625, + "learning_rate": 1.3809912409553345e-05, + "loss": 4.7094, + "step": 35900 + }, + { + "epoch": 12.899461400359066, + "grad_norm": 8.404677391052246, + "learning_rate": 1.3796311408519668e-05, + "loss": 4.7873, + "step": 35925 + }, + { + "epoch": 12.908438061041293, + "grad_norm": 7.930601596832275, + "learning_rate": 1.378271040748599e-05, + "loss": 4.7198, + "step": 35950 + }, + { + "epoch": 12.91741472172352, + "grad_norm": 8.490427017211914, + "learning_rate": 1.3769109406452315e-05, + "loss": 4.7817, + "step": 35975 + }, + { + "epoch": 12.926391382405745, + "grad_norm": 8.246726036071777, + "learning_rate": 1.3755508405418638e-05, + "loss": 4.8131, + "step": 36000 + }, + { + "epoch": 12.935368043087971, + "grad_norm": 8.320513725280762, + "learning_rate": 1.3741907404384963e-05, + "loss": 4.6997, + "step": 36025 + }, + { + "epoch": 12.944344703770197, + "grad_norm": 8.516702651977539, + "learning_rate": 1.3728306403351287e-05, + "loss": 4.799, + "step": 36050 + }, + { + "epoch": 12.953321364452425, + "grad_norm": 8.674857139587402, + "learning_rate": 1.3714705402317612e-05, + "loss": 4.6671, + "step": 36075 + }, + { + "epoch": 12.96229802513465, + "grad_norm": 8.49460506439209, + "learning_rate": 1.3701104401283935e-05, + "loss": 4.7574, + "step": 36100 + }, + { + "epoch": 12.971274685816876, + "grad_norm": 8.891939163208008, + "learning_rate": 1.3687503400250258e-05, + "loss": 4.7409, + "step": 36125 + }, + { + "epoch": 12.980251346499102, + "grad_norm": 8.170284271240234, + "learning_rate": 1.3673902399216582e-05, + "loss": 4.7468, + "step": 36150 + }, + { + "epoch": 12.989228007181328, + "grad_norm": 8.349102020263672, + "learning_rate": 1.3660301398182905e-05, + "loss": 4.7184, + "step": 36175 + }, + { + "epoch": 12.998204667863554, + "grad_norm": 9.01333236694336, + "learning_rate": 1.3646700397149232e-05, + "loss": 4.8404, + "step": 36200 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.07013155633628014, + "eval_f1_macro": 0.007423226920121419, + "eval_f1_micro": 0.07013155633628014, + "eval_f1_weighted": 0.04079314115107041, + "eval_loss": 6.497941493988037, + "eval_precision_macro": 0.00678828814248053, + "eval_precision_micro": 0.07013155633628014, + "eval_precision_weighted": 0.03338266501241234, + "eval_recall_macro": 0.012496507863710996, + "eval_recall_micro": 0.07013155633628014, + "eval_recall_weighted": 0.07013155633628014, + "eval_runtime": 84.1978, + "eval_samples_per_second": 622.023, + "eval_steps_per_second": 9.727, + "step": 36205 + }, + { + "epoch": 13.007181328545782, + "grad_norm": 8.079082489013672, + "learning_rate": 1.3633099396115555e-05, + "loss": 4.5537, + "step": 36225 + }, + { + "epoch": 13.016157989228008, + "grad_norm": 8.781037330627441, + "learning_rate": 1.361949839508188e-05, + "loss": 4.4396, + "step": 36250 + }, + { + "epoch": 13.025134649910234, + "grad_norm": 8.027755737304688, + "learning_rate": 1.3605897394048202e-05, + "loss": 4.4955, + "step": 36275 + }, + { + "epoch": 13.03411131059246, + "grad_norm": 9.31299114227295, + "learning_rate": 1.3592840433055873e-05, + "loss": 4.5914, + "step": 36300 + }, + { + "epoch": 13.043087971274685, + "grad_norm": 9.019867897033691, + "learning_rate": 1.3579239432022198e-05, + "loss": 4.5681, + "step": 36325 + }, + { + "epoch": 13.052064631956911, + "grad_norm": 8.437155723571777, + "learning_rate": 1.356563843098852e-05, + "loss": 4.5153, + "step": 36350 + }, + { + "epoch": 13.061041292639139, + "grad_norm": 8.990568161010742, + "learning_rate": 1.3552037429954845e-05, + "loss": 4.5329, + "step": 36375 + }, + { + "epoch": 13.070017953321365, + "grad_norm": 9.145463943481445, + "learning_rate": 1.3538436428921168e-05, + "loss": 4.5887, + "step": 36400 + }, + { + "epoch": 13.07899461400359, + "grad_norm": 8.789735794067383, + "learning_rate": 1.3524835427887494e-05, + "loss": 4.4365, + "step": 36425 + }, + { + "epoch": 13.087971274685817, + "grad_norm": 7.869734287261963, + "learning_rate": 1.3511234426853817e-05, + "loss": 4.619, + "step": 36450 + }, + { + "epoch": 13.096947935368043, + "grad_norm": 8.222962379455566, + "learning_rate": 1.349763342582014e-05, + "loss": 4.5291, + "step": 36475 + }, + { + "epoch": 13.105924596050269, + "grad_norm": 7.90725564956665, + "learning_rate": 1.3484032424786465e-05, + "loss": 4.5288, + "step": 36500 + }, + { + "epoch": 13.114901256732496, + "grad_norm": 9.294393539428711, + "learning_rate": 1.3470431423752788e-05, + "loss": 4.4727, + "step": 36525 + }, + { + "epoch": 13.123877917414722, + "grad_norm": 8.67839527130127, + "learning_rate": 1.3456830422719112e-05, + "loss": 4.538, + "step": 36550 + }, + { + "epoch": 13.132854578096948, + "grad_norm": 8.350264549255371, + "learning_rate": 1.3443229421685437e-05, + "loss": 4.4934, + "step": 36575 + }, + { + "epoch": 13.141831238779174, + "grad_norm": 8.605304718017578, + "learning_rate": 1.3429628420651762e-05, + "loss": 4.5316, + "step": 36600 + }, + { + "epoch": 13.1508078994614, + "grad_norm": 8.53140640258789, + "learning_rate": 1.3416027419618085e-05, + "loss": 4.4808, + "step": 36625 + }, + { + "epoch": 13.159784560143626, + "grad_norm": 9.274230003356934, + "learning_rate": 1.3402426418584408e-05, + "loss": 4.3906, + "step": 36650 + }, + { + "epoch": 13.168761220825854, + "grad_norm": 9.448626518249512, + "learning_rate": 1.3388825417550732e-05, + "loss": 4.6181, + "step": 36675 + }, + { + "epoch": 13.17773788150808, + "grad_norm": 8.278011322021484, + "learning_rate": 1.3375224416517055e-05, + "loss": 4.5232, + "step": 36700 + }, + { + "epoch": 13.186714542190305, + "grad_norm": 8.573385238647461, + "learning_rate": 1.336162341548338e-05, + "loss": 4.5553, + "step": 36725 + }, + { + "epoch": 13.195691202872531, + "grad_norm": 8.12607479095459, + "learning_rate": 1.3348022414449704e-05, + "loss": 4.5506, + "step": 36750 + }, + { + "epoch": 13.204667863554757, + "grad_norm": 9.291418075561523, + "learning_rate": 1.3334421413416027e-05, + "loss": 4.6253, + "step": 36775 + }, + { + "epoch": 13.213644524236983, + "grad_norm": 9.165974617004395, + "learning_rate": 1.3320820412382352e-05, + "loss": 4.5758, + "step": 36800 + }, + { + "epoch": 13.22262118491921, + "grad_norm": 8.43138599395752, + "learning_rate": 1.3307219411348675e-05, + "loss": 4.597, + "step": 36825 + }, + { + "epoch": 13.231597845601437, + "grad_norm": 9.016468048095703, + "learning_rate": 1.3293618410315e-05, + "loss": 4.5307, + "step": 36850 + }, + { + "epoch": 13.240574506283663, + "grad_norm": 9.219759941101074, + "learning_rate": 1.3280017409281322e-05, + "loss": 4.6712, + "step": 36875 + }, + { + "epoch": 13.249551166965889, + "grad_norm": 8.513641357421875, + "learning_rate": 1.3266416408247649e-05, + "loss": 4.4395, + "step": 36900 + }, + { + "epoch": 13.258527827648114, + "grad_norm": 8.99570369720459, + "learning_rate": 1.3252815407213972e-05, + "loss": 4.6153, + "step": 36925 + }, + { + "epoch": 13.26750448833034, + "grad_norm": 8.83564567565918, + "learning_rate": 1.3239214406180295e-05, + "loss": 4.534, + "step": 36950 + }, + { + "epoch": 13.276481149012568, + "grad_norm": 8.472003936767578, + "learning_rate": 1.322561340514662e-05, + "loss": 4.5427, + "step": 36975 + }, + { + "epoch": 13.285457809694794, + "grad_norm": 8.60023021697998, + "learning_rate": 1.3212012404112942e-05, + "loss": 4.6282, + "step": 37000 + }, + { + "epoch": 13.29443447037702, + "grad_norm": 8.003705978393555, + "learning_rate": 1.3198955443120615e-05, + "loss": 4.5675, + "step": 37025 + }, + { + "epoch": 13.303411131059246, + "grad_norm": 8.369465827941895, + "learning_rate": 1.3185354442086937e-05, + "loss": 4.553, + "step": 37050 + }, + { + "epoch": 13.312387791741472, + "grad_norm": 9.668798446655273, + "learning_rate": 1.3171753441053262e-05, + "loss": 4.5104, + "step": 37075 + }, + { + "epoch": 13.321364452423698, + "grad_norm": 8.98563289642334, + "learning_rate": 1.3158152440019585e-05, + "loss": 4.5826, + "step": 37100 + }, + { + "epoch": 13.330341113105925, + "grad_norm": 8.2998628616333, + "learning_rate": 1.314455143898591e-05, + "loss": 4.6072, + "step": 37125 + }, + { + "epoch": 13.339317773788151, + "grad_norm": 9.007369995117188, + "learning_rate": 1.3130950437952234e-05, + "loss": 4.6141, + "step": 37150 + }, + { + "epoch": 13.348294434470377, + "grad_norm": 9.122861862182617, + "learning_rate": 1.3117349436918557e-05, + "loss": 4.5293, + "step": 37175 + }, + { + "epoch": 13.357271095152603, + "grad_norm": 8.923768043518066, + "learning_rate": 1.3103748435884882e-05, + "loss": 4.5812, + "step": 37200 + }, + { + "epoch": 13.366247755834829, + "grad_norm": 9.46606159210205, + "learning_rate": 1.3090147434851205e-05, + "loss": 4.5485, + "step": 37225 + }, + { + "epoch": 13.375224416517055, + "grad_norm": 8.991482734680176, + "learning_rate": 1.307654643381753e-05, + "loss": 4.6577, + "step": 37250 + }, + { + "epoch": 13.384201077199283, + "grad_norm": 9.03668212890625, + "learning_rate": 1.3062945432783854e-05, + "loss": 4.5438, + "step": 37275 + }, + { + "epoch": 13.393177737881508, + "grad_norm": 8.112563133239746, + "learning_rate": 1.3049344431750177e-05, + "loss": 4.5462, + "step": 37300 + }, + { + "epoch": 13.402154398563734, + "grad_norm": 8.304248809814453, + "learning_rate": 1.3035743430716502e-05, + "loss": 4.5418, + "step": 37325 + }, + { + "epoch": 13.41113105924596, + "grad_norm": 9.283486366271973, + "learning_rate": 1.3022142429682824e-05, + "loss": 4.6311, + "step": 37350 + }, + { + "epoch": 13.420107719928186, + "grad_norm": 9.362588882446289, + "learning_rate": 1.3008541428649149e-05, + "loss": 4.6242, + "step": 37375 + }, + { + "epoch": 13.429084380610412, + "grad_norm": 8.31550407409668, + "learning_rate": 1.2994940427615472e-05, + "loss": 4.5662, + "step": 37400 + }, + { + "epoch": 13.43806104129264, + "grad_norm": 8.85837459564209, + "learning_rate": 1.2981339426581798e-05, + "loss": 4.667, + "step": 37425 + }, + { + "epoch": 13.447037701974866, + "grad_norm": 8.494302749633789, + "learning_rate": 1.2967738425548121e-05, + "loss": 4.561, + "step": 37450 + }, + { + "epoch": 13.456014362657092, + "grad_norm": 9.329377174377441, + "learning_rate": 1.2954137424514444e-05, + "loss": 4.5644, + "step": 37475 + }, + { + "epoch": 13.464991023339318, + "grad_norm": 9.476177215576172, + "learning_rate": 1.2940536423480769e-05, + "loss": 4.5972, + "step": 37500 + }, + { + "epoch": 13.473967684021543, + "grad_norm": 9.223791122436523, + "learning_rate": 1.2926935422447092e-05, + "loss": 4.5912, + "step": 37525 + }, + { + "epoch": 13.48294434470377, + "grad_norm": 9.954957008361816, + "learning_rate": 1.2913334421413416e-05, + "loss": 4.5479, + "step": 37550 + }, + { + "epoch": 13.491921005385997, + "grad_norm": 8.478102684020996, + "learning_rate": 1.289973342037974e-05, + "loss": 4.6691, + "step": 37575 + }, + { + "epoch": 13.500897666068223, + "grad_norm": 8.475785255432129, + "learning_rate": 1.2886132419346066e-05, + "loss": 4.6316, + "step": 37600 + }, + { + "epoch": 13.509874326750449, + "grad_norm": 7.56096887588501, + "learning_rate": 1.2872531418312389e-05, + "loss": 4.5335, + "step": 37625 + }, + { + "epoch": 13.518850987432675, + "grad_norm": 8.83825969696045, + "learning_rate": 1.2858930417278711e-05, + "loss": 4.683, + "step": 37650 + }, + { + "epoch": 13.5278276481149, + "grad_norm": 9.021821975708008, + "learning_rate": 1.2845329416245036e-05, + "loss": 4.5733, + "step": 37675 + }, + { + "epoch": 13.536804308797127, + "grad_norm": 8.656534194946289, + "learning_rate": 1.2831728415211359e-05, + "loss": 4.6902, + "step": 37700 + }, + { + "epoch": 13.545780969479354, + "grad_norm": 7.87501859664917, + "learning_rate": 1.2818127414177684e-05, + "loss": 4.5146, + "step": 37725 + }, + { + "epoch": 13.55475763016158, + "grad_norm": 8.688074111938477, + "learning_rate": 1.2804526413144008e-05, + "loss": 4.6279, + "step": 37750 + }, + { + "epoch": 13.563734290843806, + "grad_norm": 9.38176155090332, + "learning_rate": 1.2790925412110331e-05, + "loss": 4.6628, + "step": 37775 + }, + { + "epoch": 13.572710951526032, + "grad_norm": 7.962253570556641, + "learning_rate": 1.2777324411076656e-05, + "loss": 4.4794, + "step": 37800 + }, + { + "epoch": 13.581687612208258, + "grad_norm": 8.74090576171875, + "learning_rate": 1.2763723410042979e-05, + "loss": 4.6126, + "step": 37825 + }, + { + "epoch": 13.590664272890486, + "grad_norm": 9.216059684753418, + "learning_rate": 1.2750122409009303e-05, + "loss": 4.6874, + "step": 37850 + }, + { + "epoch": 13.599640933572712, + "grad_norm": 9.242759704589844, + "learning_rate": 1.2736521407975626e-05, + "loss": 4.5625, + "step": 37875 + }, + { + "epoch": 13.608617594254937, + "grad_norm": 8.749385833740234, + "learning_rate": 1.2722920406941953e-05, + "loss": 4.5615, + "step": 37900 + }, + { + "epoch": 13.617594254937163, + "grad_norm": 8.273270606994629, + "learning_rate": 1.2709319405908276e-05, + "loss": 4.599, + "step": 37925 + }, + { + "epoch": 13.62657091561939, + "grad_norm": 8.589452743530273, + "learning_rate": 1.2695718404874598e-05, + "loss": 4.5718, + "step": 37950 + }, + { + "epoch": 13.635547576301615, + "grad_norm": 8.548748970031738, + "learning_rate": 1.2682117403840923e-05, + "loss": 4.5218, + "step": 37975 + }, + { + "epoch": 13.644524236983841, + "grad_norm": 9.635135650634766, + "learning_rate": 1.2668516402807246e-05, + "loss": 4.5523, + "step": 38000 + }, + { + "epoch": 13.653500897666069, + "grad_norm": 7.883180141448975, + "learning_rate": 1.265491540177357e-05, + "loss": 4.689, + "step": 38025 + }, + { + "epoch": 13.662477558348295, + "grad_norm": 8.654051780700684, + "learning_rate": 1.2641314400739895e-05, + "loss": 4.6438, + "step": 38050 + }, + { + "epoch": 13.67145421903052, + "grad_norm": 8.135859489440918, + "learning_rate": 1.262771339970622e-05, + "loss": 4.5589, + "step": 38075 + }, + { + "epoch": 13.680430879712747, + "grad_norm": 8.720865249633789, + "learning_rate": 1.2614112398672543e-05, + "loss": 4.5506, + "step": 38100 + }, + { + "epoch": 13.689407540394972, + "grad_norm": 9.848489761352539, + "learning_rate": 1.2600511397638866e-05, + "loss": 4.5464, + "step": 38125 + }, + { + "epoch": 13.6983842010772, + "grad_norm": 8.547608375549316, + "learning_rate": 1.258691039660519e-05, + "loss": 4.6558, + "step": 38150 + }, + { + "epoch": 13.707360861759426, + "grad_norm": 8.46160888671875, + "learning_rate": 1.2573309395571513e-05, + "loss": 4.5244, + "step": 38175 + }, + { + "epoch": 13.716337522441652, + "grad_norm": 9.56635570526123, + "learning_rate": 1.255970839453784e-05, + "loss": 4.6065, + "step": 38200 + }, + { + "epoch": 13.725314183123878, + "grad_norm": 8.369549751281738, + "learning_rate": 1.2546107393504163e-05, + "loss": 4.6751, + "step": 38225 + }, + { + "epoch": 13.734290843806104, + "grad_norm": 8.65282917022705, + "learning_rate": 1.2532506392470487e-05, + "loss": 4.5543, + "step": 38250 + }, + { + "epoch": 13.74326750448833, + "grad_norm": 8.250898361206055, + "learning_rate": 1.251890539143681e-05, + "loss": 4.6677, + "step": 38275 + }, + { + "epoch": 13.752244165170557, + "grad_norm": 9.037845611572266, + "learning_rate": 1.2505304390403133e-05, + "loss": 4.5988, + "step": 38300 + }, + { + "epoch": 13.761220825852783, + "grad_norm": 9.47874641418457, + "learning_rate": 1.2491703389369458e-05, + "loss": 4.518, + "step": 38325 + }, + { + "epoch": 13.77019748653501, + "grad_norm": 9.029143333435059, + "learning_rate": 1.2478102388335782e-05, + "loss": 4.5942, + "step": 38350 + }, + { + "epoch": 13.779174147217235, + "grad_norm": 9.369366645812988, + "learning_rate": 1.2464501387302107e-05, + "loss": 4.5937, + "step": 38375 + }, + { + "epoch": 13.788150807899461, + "grad_norm": 8.809001922607422, + "learning_rate": 1.245090038626843e-05, + "loss": 4.6101, + "step": 38400 + }, + { + "epoch": 13.797127468581687, + "grad_norm": 8.304353713989258, + "learning_rate": 1.2437299385234754e-05, + "loss": 4.6961, + "step": 38425 + }, + { + "epoch": 13.806104129263915, + "grad_norm": 8.451216697692871, + "learning_rate": 1.2423698384201077e-05, + "loss": 4.6225, + "step": 38450 + }, + { + "epoch": 13.81508078994614, + "grad_norm": 9.672684669494629, + "learning_rate": 1.24100973831674e-05, + "loss": 4.6851, + "step": 38475 + }, + { + "epoch": 13.824057450628366, + "grad_norm": 8.246593475341797, + "learning_rate": 1.2396496382133725e-05, + "loss": 4.5367, + "step": 38500 + }, + { + "epoch": 13.833034111310592, + "grad_norm": 7.920393466949463, + "learning_rate": 1.238289538110005e-05, + "loss": 4.6141, + "step": 38525 + }, + { + "epoch": 13.842010771992818, + "grad_norm": 8.747411727905273, + "learning_rate": 1.2369294380066374e-05, + "loss": 4.5159, + "step": 38550 + }, + { + "epoch": 13.850987432675044, + "grad_norm": 9.214733123779297, + "learning_rate": 1.2355693379032697e-05, + "loss": 4.6744, + "step": 38575 + }, + { + "epoch": 13.859964093357272, + "grad_norm": 8.393248558044434, + "learning_rate": 1.234209237799902e-05, + "loss": 4.6188, + "step": 38600 + }, + { + "epoch": 13.868940754039498, + "grad_norm": 9.319880485534668, + "learning_rate": 1.2328491376965345e-05, + "loss": 4.6003, + "step": 38625 + }, + { + "epoch": 13.877917414721724, + "grad_norm": 8.42130184173584, + "learning_rate": 1.2314890375931668e-05, + "loss": 4.6475, + "step": 38650 + }, + { + "epoch": 13.88689407540395, + "grad_norm": 9.778843879699707, + "learning_rate": 1.2301289374897994e-05, + "loss": 4.6026, + "step": 38675 + }, + { + "epoch": 13.895870736086176, + "grad_norm": 9.009313583374023, + "learning_rate": 1.2287688373864317e-05, + "loss": 4.5933, + "step": 38700 + }, + { + "epoch": 13.904847396768401, + "grad_norm": 9.482985496520996, + "learning_rate": 1.2274087372830641e-05, + "loss": 4.6045, + "step": 38725 + }, + { + "epoch": 13.91382405745063, + "grad_norm": 8.961365699768066, + "learning_rate": 1.2260486371796964e-05, + "loss": 4.6985, + "step": 38750 + }, + { + "epoch": 13.922800718132855, + "grad_norm": 8.35599136352539, + "learning_rate": 1.2246885370763287e-05, + "loss": 4.5267, + "step": 38775 + }, + { + "epoch": 13.931777378815081, + "grad_norm": 8.62751579284668, + "learning_rate": 1.2233284369729612e-05, + "loss": 4.5347, + "step": 38800 + }, + { + "epoch": 13.940754039497307, + "grad_norm": 9.593770027160645, + "learning_rate": 1.2219683368695937e-05, + "loss": 4.5126, + "step": 38825 + }, + { + "epoch": 13.949730700179533, + "grad_norm": 8.730130195617676, + "learning_rate": 1.2206082367662261e-05, + "loss": 4.6229, + "step": 38850 + }, + { + "epoch": 13.958707360861759, + "grad_norm": 9.035167694091797, + "learning_rate": 1.2192481366628584e-05, + "loss": 4.5882, + "step": 38875 + }, + { + "epoch": 13.967684021543986, + "grad_norm": 8.879999160766602, + "learning_rate": 1.2178880365594909e-05, + "loss": 4.6384, + "step": 38900 + }, + { + "epoch": 13.976660682226212, + "grad_norm": 8.937557220458984, + "learning_rate": 1.2165279364561232e-05, + "loss": 4.6028, + "step": 38925 + }, + { + "epoch": 13.985637342908438, + "grad_norm": 9.831676483154297, + "learning_rate": 1.2151678363527555e-05, + "loss": 4.5947, + "step": 38950 + }, + { + "epoch": 13.994614003590664, + "grad_norm": 8.50274658203125, + "learning_rate": 1.2138077362493881e-05, + "loss": 4.6355, + "step": 38975 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.07011246252840204, + "eval_f1_macro": 0.007992604964137721, + "eval_f1_micro": 0.07011246252840204, + "eval_f1_weighted": 0.04183764503985617, + "eval_loss": 6.490649223327637, + "eval_precision_macro": 0.0073942764257469195, + "eval_precision_micro": 0.07011246252840204, + "eval_precision_weighted": 0.03468543814682837, + "eval_recall_macro": 0.013223066940829191, + "eval_recall_micro": 0.07011246252840204, + "eval_recall_weighted": 0.07011246252840204, + "eval_runtime": 83.5793, + "eval_samples_per_second": 626.627, + "eval_steps_per_second": 9.799, + "step": 38990 + }, + { + "epoch": 14.00359066427289, + "grad_norm": 8.373591423034668, + "learning_rate": 1.2124476361460204e-05, + "loss": 4.5587, + "step": 39000 + }, + { + "epoch": 14.012567324955116, + "grad_norm": 8.947854042053223, + "learning_rate": 1.2110875360426528e-05, + "loss": 4.427, + "step": 39025 + }, + { + "epoch": 14.021543985637344, + "grad_norm": 9.161550521850586, + "learning_rate": 1.2097274359392851e-05, + "loss": 4.3102, + "step": 39050 + }, + { + "epoch": 14.03052064631957, + "grad_norm": 9.519538879394531, + "learning_rate": 1.2083673358359176e-05, + "loss": 4.3993, + "step": 39075 + }, + { + "epoch": 14.039497307001795, + "grad_norm": 9.080506324768066, + "learning_rate": 1.2070072357325499e-05, + "loss": 4.3068, + "step": 39100 + }, + { + "epoch": 14.048473967684021, + "grad_norm": 7.933554649353027, + "learning_rate": 1.2056471356291824e-05, + "loss": 4.425, + "step": 39125 + }, + { + "epoch": 14.057450628366247, + "grad_norm": 9.747654914855957, + "learning_rate": 1.2042870355258148e-05, + "loss": 4.4811, + "step": 39150 + }, + { + "epoch": 14.066427289048473, + "grad_norm": 8.539299964904785, + "learning_rate": 1.2029269354224471e-05, + "loss": 4.3853, + "step": 39175 + }, + { + "epoch": 14.0754039497307, + "grad_norm": 8.048534393310547, + "learning_rate": 1.2015668353190796e-05, + "loss": 4.4952, + "step": 39200 + }, + { + "epoch": 14.084380610412927, + "grad_norm": 9.13365364074707, + "learning_rate": 1.2002067352157119e-05, + "loss": 4.3236, + "step": 39225 + }, + { + "epoch": 14.093357271095153, + "grad_norm": 7.705849647521973, + "learning_rate": 1.1988466351123443e-05, + "loss": 4.4454, + "step": 39250 + }, + { + "epoch": 14.102333931777379, + "grad_norm": 8.917954444885254, + "learning_rate": 1.1974865350089766e-05, + "loss": 4.3335, + "step": 39275 + }, + { + "epoch": 14.111310592459605, + "grad_norm": 9.290776252746582, + "learning_rate": 1.196126434905609e-05, + "loss": 4.4383, + "step": 39300 + }, + { + "epoch": 14.12028725314183, + "grad_norm": 8.443875312805176, + "learning_rate": 1.1947663348022415e-05, + "loss": 4.321, + "step": 39325 + }, + { + "epoch": 14.129263913824058, + "grad_norm": 8.886764526367188, + "learning_rate": 1.1934062346988738e-05, + "loss": 4.446, + "step": 39350 + }, + { + "epoch": 14.138240574506284, + "grad_norm": 8.980121612548828, + "learning_rate": 1.1920461345955063e-05, + "loss": 4.4883, + "step": 39375 + }, + { + "epoch": 14.14721723518851, + "grad_norm": 9.789945602416992, + "learning_rate": 1.1906860344921386e-05, + "loss": 4.439, + "step": 39400 + }, + { + "epoch": 14.156193895870736, + "grad_norm": 8.822226524353027, + "learning_rate": 1.1893259343887709e-05, + "loss": 4.539, + "step": 39425 + }, + { + "epoch": 14.165170556552962, + "grad_norm": 8.249542236328125, + "learning_rate": 1.1879658342854035e-05, + "loss": 4.4905, + "step": 39450 + }, + { + "epoch": 14.174147217235188, + "grad_norm": 7.767251014709473, + "learning_rate": 1.1866057341820358e-05, + "loss": 4.3575, + "step": 39475 + }, + { + "epoch": 14.183123877917415, + "grad_norm": 9.260034561157227, + "learning_rate": 1.1852456340786683e-05, + "loss": 4.4469, + "step": 39500 + }, + { + "epoch": 14.192100538599641, + "grad_norm": 9.303133964538574, + "learning_rate": 1.1838855339753006e-05, + "loss": 4.4322, + "step": 39525 + }, + { + "epoch": 14.201077199281867, + "grad_norm": 8.556343078613281, + "learning_rate": 1.182525433871933e-05, + "loss": 4.3789, + "step": 39550 + }, + { + "epoch": 14.210053859964093, + "grad_norm": 8.9046630859375, + "learning_rate": 1.1811653337685653e-05, + "loss": 4.507, + "step": 39575 + }, + { + "epoch": 14.219030520646319, + "grad_norm": 8.076767921447754, + "learning_rate": 1.1798052336651978e-05, + "loss": 4.3465, + "step": 39600 + }, + { + "epoch": 14.228007181328545, + "grad_norm": 8.443426132202148, + "learning_rate": 1.1784451335618302e-05, + "loss": 4.4597, + "step": 39625 + }, + { + "epoch": 14.236983842010773, + "grad_norm": 8.700423240661621, + "learning_rate": 1.1770850334584625e-05, + "loss": 4.3847, + "step": 39650 + }, + { + "epoch": 14.245960502692999, + "grad_norm": 8.439496040344238, + "learning_rate": 1.175724933355095e-05, + "loss": 4.4134, + "step": 39675 + }, + { + "epoch": 14.254937163375224, + "grad_norm": 8.44229507446289, + "learning_rate": 1.1743648332517273e-05, + "loss": 4.4392, + "step": 39700 + }, + { + "epoch": 14.26391382405745, + "grad_norm": 9.566526412963867, + "learning_rate": 1.1730047331483598e-05, + "loss": 4.3758, + "step": 39725 + }, + { + "epoch": 14.272890484739676, + "grad_norm": 8.694412231445312, + "learning_rate": 1.1716446330449922e-05, + "loss": 4.477, + "step": 39750 + }, + { + "epoch": 14.281867145421902, + "grad_norm": 9.04251766204834, + "learning_rate": 1.1702845329416245e-05, + "loss": 4.5301, + "step": 39775 + }, + { + "epoch": 14.29084380610413, + "grad_norm": 9.293773651123047, + "learning_rate": 1.1689788368423916e-05, + "loss": 4.464, + "step": 39800 + }, + { + "epoch": 14.299820466786356, + "grad_norm": 8.759017944335938, + "learning_rate": 1.167618736739024e-05, + "loss": 4.4326, + "step": 39825 + }, + { + "epoch": 14.308797127468582, + "grad_norm": 9.9410982131958, + "learning_rate": 1.1662586366356565e-05, + "loss": 4.3889, + "step": 39850 + }, + { + "epoch": 14.317773788150808, + "grad_norm": 9.382207870483398, + "learning_rate": 1.1648985365322888e-05, + "loss": 4.4472, + "step": 39875 + }, + { + "epoch": 14.326750448833034, + "grad_norm": 9.338412284851074, + "learning_rate": 1.1635384364289213e-05, + "loss": 4.5001, + "step": 39900 + }, + { + "epoch": 14.335727109515261, + "grad_norm": 8.856274604797363, + "learning_rate": 1.1621783363255536e-05, + "loss": 4.4726, + "step": 39925 + }, + { + "epoch": 14.344703770197487, + "grad_norm": 10.750558853149414, + "learning_rate": 1.1608182362221859e-05, + "loss": 4.4317, + "step": 39950 + }, + { + "epoch": 14.353680430879713, + "grad_norm": 8.004633903503418, + "learning_rate": 1.1594581361188185e-05, + "loss": 4.4788, + "step": 39975 + }, + { + "epoch": 14.362657091561939, + "grad_norm": 9.424314498901367, + "learning_rate": 1.1580980360154508e-05, + "loss": 4.5757, + "step": 40000 + }, + { + "epoch": 14.371633752244165, + "grad_norm": 8.849565505981445, + "learning_rate": 1.1567379359120832e-05, + "loss": 4.5459, + "step": 40025 + }, + { + "epoch": 14.38061041292639, + "grad_norm": 8.247509956359863, + "learning_rate": 1.1553778358087155e-05, + "loss": 4.4287, + "step": 40050 + }, + { + "epoch": 14.389587073608618, + "grad_norm": 8.719013214111328, + "learning_rate": 1.154017735705348e-05, + "loss": 4.5093, + "step": 40075 + }, + { + "epoch": 14.398563734290844, + "grad_norm": 9.674192428588867, + "learning_rate": 1.1526576356019803e-05, + "loss": 4.4331, + "step": 40100 + }, + { + "epoch": 14.40754039497307, + "grad_norm": 9.600411415100098, + "learning_rate": 1.1512975354986126e-05, + "loss": 4.5051, + "step": 40125 + }, + { + "epoch": 14.416517055655296, + "grad_norm": 8.349175453186035, + "learning_rate": 1.1499374353952452e-05, + "loss": 4.4017, + "step": 40150 + }, + { + "epoch": 14.425493716337522, + "grad_norm": 9.106510162353516, + "learning_rate": 1.1485773352918775e-05, + "loss": 4.4127, + "step": 40175 + }, + { + "epoch": 14.434470377019748, + "grad_norm": 8.864956855773926, + "learning_rate": 1.14721723518851e-05, + "loss": 4.4137, + "step": 40200 + }, + { + "epoch": 14.443447037701976, + "grad_norm": 8.271056175231934, + "learning_rate": 1.1458571350851423e-05, + "loss": 4.4704, + "step": 40225 + }, + { + "epoch": 14.452423698384202, + "grad_norm": 8.878592491149902, + "learning_rate": 1.1444970349817747e-05, + "loss": 4.5052, + "step": 40250 + }, + { + "epoch": 14.461400359066428, + "grad_norm": 8.016226768493652, + "learning_rate": 1.143136934878407e-05, + "loss": 4.4137, + "step": 40275 + }, + { + "epoch": 14.470377019748653, + "grad_norm": 7.894835472106934, + "learning_rate": 1.1417768347750395e-05, + "loss": 4.4441, + "step": 40300 + }, + { + "epoch": 14.47935368043088, + "grad_norm": 8.803479194641113, + "learning_rate": 1.140416734671672e-05, + "loss": 4.5084, + "step": 40325 + }, + { + "epoch": 14.488330341113105, + "grad_norm": 8.828130722045898, + "learning_rate": 1.1390566345683042e-05, + "loss": 4.5144, + "step": 40350 + }, + { + "epoch": 14.497307001795333, + "grad_norm": 9.534722328186035, + "learning_rate": 1.1376965344649367e-05, + "loss": 4.4549, + "step": 40375 + }, + { + "epoch": 14.506283662477559, + "grad_norm": 9.427480697631836, + "learning_rate": 1.136336434361569e-05, + "loss": 4.4229, + "step": 40400 + }, + { + "epoch": 14.515260323159785, + "grad_norm": 9.051004409790039, + "learning_rate": 1.1349763342582013e-05, + "loss": 4.5078, + "step": 40425 + }, + { + "epoch": 14.52423698384201, + "grad_norm": 10.096264839172363, + "learning_rate": 1.1336162341548339e-05, + "loss": 4.452, + "step": 40450 + }, + { + "epoch": 14.533213644524237, + "grad_norm": 9.130829811096191, + "learning_rate": 1.1322561340514662e-05, + "loss": 4.5189, + "step": 40475 + }, + { + "epoch": 14.542190305206462, + "grad_norm": 8.732491493225098, + "learning_rate": 1.1308960339480987e-05, + "loss": 4.4126, + "step": 40500 + }, + { + "epoch": 14.55116696588869, + "grad_norm": 9.292389869689941, + "learning_rate": 1.129535933844731e-05, + "loss": 4.4194, + "step": 40525 + }, + { + "epoch": 14.560143626570916, + "grad_norm": 8.675069808959961, + "learning_rate": 1.1281758337413634e-05, + "loss": 4.4601, + "step": 40550 + }, + { + "epoch": 14.569120287253142, + "grad_norm": 9.699023246765137, + "learning_rate": 1.1268157336379957e-05, + "loss": 4.4397, + "step": 40575 + }, + { + "epoch": 14.578096947935368, + "grad_norm": 8.753130912780762, + "learning_rate": 1.1254556335346282e-05, + "loss": 4.5528, + "step": 40600 + }, + { + "epoch": 14.587073608617594, + "grad_norm": 9.10558032989502, + "learning_rate": 1.1240955334312606e-05, + "loss": 4.4702, + "step": 40625 + }, + { + "epoch": 14.59605026929982, + "grad_norm": 9.85875415802002, + "learning_rate": 1.122735433327893e-05, + "loss": 4.5358, + "step": 40650 + }, + { + "epoch": 14.605026929982047, + "grad_norm": 8.65063190460205, + "learning_rate": 1.1213753332245254e-05, + "loss": 4.434, + "step": 40675 + }, + { + "epoch": 14.614003590664273, + "grad_norm": 9.225478172302246, + "learning_rate": 1.1200152331211577e-05, + "loss": 4.4471, + "step": 40700 + }, + { + "epoch": 14.6229802513465, + "grad_norm": 8.572860717773438, + "learning_rate": 1.1186551330177902e-05, + "loss": 4.431, + "step": 40725 + }, + { + "epoch": 14.631956912028725, + "grad_norm": 9.102144241333008, + "learning_rate": 1.1172950329144226e-05, + "loss": 4.5364, + "step": 40750 + }, + { + "epoch": 14.640933572710951, + "grad_norm": 9.898483276367188, + "learning_rate": 1.1159349328110549e-05, + "loss": 4.5156, + "step": 40775 + }, + { + "epoch": 14.649910233393177, + "grad_norm": 8.119232177734375, + "learning_rate": 1.1145748327076874e-05, + "loss": 4.5307, + "step": 40800 + }, + { + "epoch": 14.658886894075405, + "grad_norm": 9.283289909362793, + "learning_rate": 1.1132147326043197e-05, + "loss": 4.4107, + "step": 40825 + }, + { + "epoch": 14.66786355475763, + "grad_norm": 8.553637504577637, + "learning_rate": 1.1118546325009521e-05, + "loss": 4.4421, + "step": 40850 + }, + { + "epoch": 14.676840215439857, + "grad_norm": 9.08562183380127, + "learning_rate": 1.1104945323975844e-05, + "loss": 4.4701, + "step": 40875 + }, + { + "epoch": 14.685816876122082, + "grad_norm": 9.49377155303955, + "learning_rate": 1.1091344322942169e-05, + "loss": 4.4864, + "step": 40900 + }, + { + "epoch": 14.694793536804308, + "grad_norm": 9.933831214904785, + "learning_rate": 1.1077743321908493e-05, + "loss": 4.4162, + "step": 40925 + }, + { + "epoch": 14.703770197486534, + "grad_norm": 8.81182861328125, + "learning_rate": 1.1064142320874816e-05, + "loss": 4.4862, + "step": 40950 + }, + { + "epoch": 14.712746858168762, + "grad_norm": 8.13880729675293, + "learning_rate": 1.1050541319841141e-05, + "loss": 4.4636, + "step": 40975 + }, + { + "epoch": 14.721723518850988, + "grad_norm": 9.509178161621094, + "learning_rate": 1.1036940318807464e-05, + "loss": 4.382, + "step": 41000 + }, + { + "epoch": 14.730700179533214, + "grad_norm": 9.193818092346191, + "learning_rate": 1.1023339317773789e-05, + "loss": 4.4195, + "step": 41025 + }, + { + "epoch": 14.73967684021544, + "grad_norm": 8.95386791229248, + "learning_rate": 1.1009738316740111e-05, + "loss": 4.534, + "step": 41050 + }, + { + "epoch": 14.748653500897666, + "grad_norm": 9.563989639282227, + "learning_rate": 1.0996137315706438e-05, + "loss": 4.5324, + "step": 41075 + }, + { + "epoch": 14.757630161579891, + "grad_norm": 10.995915412902832, + "learning_rate": 1.098253631467276e-05, + "loss": 4.6058, + "step": 41100 + }, + { + "epoch": 14.76660682226212, + "grad_norm": 9.87260627746582, + "learning_rate": 1.0968935313639084e-05, + "loss": 4.4774, + "step": 41125 + }, + { + "epoch": 14.775583482944345, + "grad_norm": 9.211396217346191, + "learning_rate": 1.0955334312605408e-05, + "loss": 4.5247, + "step": 41150 + }, + { + "epoch": 14.784560143626571, + "grad_norm": 9.71726131439209, + "learning_rate": 1.0941733311571731e-05, + "loss": 4.5486, + "step": 41175 + }, + { + "epoch": 14.793536804308797, + "grad_norm": 8.689336776733398, + "learning_rate": 1.0928132310538056e-05, + "loss": 4.4818, + "step": 41200 + }, + { + "epoch": 14.802513464991023, + "grad_norm": 9.063653945922852, + "learning_rate": 1.091453130950438e-05, + "loss": 4.5421, + "step": 41225 + }, + { + "epoch": 14.811490125673249, + "grad_norm": 9.422538757324219, + "learning_rate": 1.0900930308470703e-05, + "loss": 4.3704, + "step": 41250 + }, + { + "epoch": 14.820466786355476, + "grad_norm": 8.747773170471191, + "learning_rate": 1.0887329307437028e-05, + "loss": 4.3744, + "step": 41275 + }, + { + "epoch": 14.829443447037702, + "grad_norm": 9.641063690185547, + "learning_rate": 1.0873728306403351e-05, + "loss": 4.5311, + "step": 41300 + }, + { + "epoch": 14.838420107719928, + "grad_norm": 9.079813003540039, + "learning_rate": 1.0860127305369676e-05, + "loss": 4.5492, + "step": 41325 + }, + { + "epoch": 14.847396768402154, + "grad_norm": 8.651043891906738, + "learning_rate": 1.0846526304335998e-05, + "loss": 4.4907, + "step": 41350 + }, + { + "epoch": 14.85637342908438, + "grad_norm": 9.031432151794434, + "learning_rate": 1.0832925303302325e-05, + "loss": 4.3416, + "step": 41375 + }, + { + "epoch": 14.865350089766606, + "grad_norm": 9.331707954406738, + "learning_rate": 1.0819324302268648e-05, + "loss": 4.4667, + "step": 41400 + }, + { + "epoch": 14.874326750448834, + "grad_norm": 9.369437217712402, + "learning_rate": 1.080572330123497e-05, + "loss": 4.5518, + "step": 41425 + }, + { + "epoch": 14.88330341113106, + "grad_norm": 9.094755172729492, + "learning_rate": 1.0792122300201295e-05, + "loss": 4.4276, + "step": 41450 + }, + { + "epoch": 14.892280071813286, + "grad_norm": 8.740516662597656, + "learning_rate": 1.0778521299167618e-05, + "loss": 4.5403, + "step": 41475 + }, + { + "epoch": 14.901256732495511, + "grad_norm": 8.99415397644043, + "learning_rate": 1.0764920298133943e-05, + "loss": 4.4952, + "step": 41500 + }, + { + "epoch": 14.910233393177737, + "grad_norm": 8.970183372497559, + "learning_rate": 1.0751319297100267e-05, + "loss": 4.5569, + "step": 41525 + }, + { + "epoch": 14.919210053859963, + "grad_norm": 9.016188621520996, + "learning_rate": 1.0737718296066592e-05, + "loss": 4.428, + "step": 41550 + }, + { + "epoch": 14.928186714542191, + "grad_norm": 8.780765533447266, + "learning_rate": 1.0724117295032915e-05, + "loss": 4.5416, + "step": 41575 + }, + { + "epoch": 14.937163375224417, + "grad_norm": 9.519654273986816, + "learning_rate": 1.0710516293999238e-05, + "loss": 4.4336, + "step": 41600 + }, + { + "epoch": 14.946140035906643, + "grad_norm": 8.24411392211914, + "learning_rate": 1.0696915292965563e-05, + "loss": 4.4828, + "step": 41625 + }, + { + "epoch": 14.955116696588869, + "grad_norm": 9.481698989868164, + "learning_rate": 1.0683314291931885e-05, + "loss": 4.4205, + "step": 41650 + }, + { + "epoch": 14.964093357271095, + "grad_norm": 8.782504081726074, + "learning_rate": 1.066971329089821e-05, + "loss": 4.5291, + "step": 41675 + }, + { + "epoch": 14.973070017953322, + "grad_norm": 9.562821388244629, + "learning_rate": 1.0656112289864535e-05, + "loss": 4.5891, + "step": 41700 + }, + { + "epoch": 14.982046678635548, + "grad_norm": 9.296073913574219, + "learning_rate": 1.064251128883086e-05, + "loss": 4.4311, + "step": 41725 + }, + { + "epoch": 14.991023339317774, + "grad_norm": 9.72317123413086, + "learning_rate": 1.0628910287797182e-05, + "loss": 4.5584, + "step": 41750 + }, + { + "epoch": 15.0, + "grad_norm": 8.771120071411133, + "learning_rate": 1.0615309286763505e-05, + "loss": 4.5077, + "step": 41775 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.06831764458786016, + "eval_f1_macro": 0.008544458810644008, + "eval_f1_micro": 0.06831764458786016, + "eval_f1_weighted": 0.04166472605237575, + "eval_loss": 6.482921600341797, + "eval_precision_macro": 0.007774664645041587, + "eval_precision_micro": 0.06831764458786016, + "eval_precision_weighted": 0.034648853845008584, + "eval_recall_macro": 0.013970747034737791, + "eval_recall_micro": 0.06831764458786016, + "eval_recall_weighted": 0.06831764458786016, + "eval_runtime": 84.1239, + "eval_samples_per_second": 622.57, + "eval_steps_per_second": 9.736, + "step": 41775 + } + ], + "logging_steps": 25, + "max_steps": 61270, + "num_input_tokens_seen": 0, + "num_train_epochs": 22, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.01 + }, + "attributes": { + "early_stopping_patience_counter": 3 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.641204557901988e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}