{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2941, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00034002040122407346, "grad_norm": 7.957215845157106, "learning_rate": 1.1235955056179776e-07, "loss": 1.265, "step": 1 }, { "epoch": 0.0006800408024481469, "grad_norm": 13.156657121104576, "learning_rate": 2.247191011235955e-07, "loss": 1.1163, "step": 2 }, { "epoch": 0.0010200612036722204, "grad_norm": 8.234411807150657, "learning_rate": 3.3707865168539325e-07, "loss": 1.1117, "step": 3 }, { "epoch": 0.0013600816048962938, "grad_norm": 8.378852124626784, "learning_rate": 4.49438202247191e-07, "loss": 1.0045, "step": 4 }, { "epoch": 0.0017001020061203672, "grad_norm": 5.0871456226741785, "learning_rate": 5.617977528089888e-07, "loss": 1.1061, "step": 5 }, { "epoch": 0.002040122407344441, "grad_norm": 12.113028237990287, "learning_rate": 6.741573033707865e-07, "loss": 0.978, "step": 6 }, { "epoch": 0.0023801428085685142, "grad_norm": 7.182446599540591, "learning_rate": 7.865168539325843e-07, "loss": 1.2117, "step": 7 }, { "epoch": 0.0027201632097925877, "grad_norm": 8.794165320435372, "learning_rate": 8.98876404494382e-07, "loss": 1.0302, "step": 8 }, { "epoch": 0.003060183611016661, "grad_norm": 6.19432478948948, "learning_rate": 1.01123595505618e-06, "loss": 1.1328, "step": 9 }, { "epoch": 0.0034002040122407345, "grad_norm": 8.815917265181259, "learning_rate": 1.1235955056179777e-06, "loss": 1.0271, "step": 10 }, { "epoch": 0.003740224413464808, "grad_norm": 3.4552395124739923, "learning_rate": 1.2359550561797752e-06, "loss": 1.1775, "step": 11 }, { "epoch": 0.004080244814688882, "grad_norm": 3.920780003164372, "learning_rate": 1.348314606741573e-06, "loss": 1.1197, "step": 12 }, { "epoch": 0.004420265215912955, "grad_norm": 9.503601873541784, "learning_rate": 1.4606741573033708e-06, "loss": 1.0117, "step": 13 }, { "epoch": 0.0047602856171370285, "grad_norm": 3.8392791998777698, "learning_rate": 1.5730337078651686e-06, "loss": 1.1943, "step": 14 }, { "epoch": 0.0051003060183611015, "grad_norm": 16.880294672259833, "learning_rate": 1.6853932584269663e-06, "loss": 0.9484, "step": 15 }, { "epoch": 0.005440326419585175, "grad_norm": 3.7585745209663135, "learning_rate": 1.797752808988764e-06, "loss": 1.0903, "step": 16 }, { "epoch": 0.005780346820809248, "grad_norm": 4.0811771911781705, "learning_rate": 1.910112359550562e-06, "loss": 0.9225, "step": 17 }, { "epoch": 0.006120367222033322, "grad_norm": 5.5863711220456835, "learning_rate": 2.02247191011236e-06, "loss": 1.0673, "step": 18 }, { "epoch": 0.006460387623257395, "grad_norm": 3.1676452672990454, "learning_rate": 2.1348314606741574e-06, "loss": 0.9291, "step": 19 }, { "epoch": 0.006800408024481469, "grad_norm": 3.611139125930507, "learning_rate": 2.2471910112359554e-06, "loss": 0.8766, "step": 20 }, { "epoch": 0.007140428425705543, "grad_norm": 2.4985925611214093, "learning_rate": 2.359550561797753e-06, "loss": 0.8919, "step": 21 }, { "epoch": 0.007480448826929616, "grad_norm": 3.1107777199577735, "learning_rate": 2.4719101123595505e-06, "loss": 0.9591, "step": 22 }, { "epoch": 0.007820469228153689, "grad_norm": 2.4206200369165054, "learning_rate": 2.584269662921349e-06, "loss": 0.9344, "step": 23 }, { "epoch": 0.008160489629377763, "grad_norm": 3.006651205729068, "learning_rate": 2.696629213483146e-06, "loss": 0.929, "step": 24 }, { "epoch": 0.008500510030601836, "grad_norm": 4.211242050455176, "learning_rate": 2.8089887640449444e-06, "loss": 0.922, "step": 25 }, { "epoch": 0.00884053043182591, "grad_norm": 2.928334204696846, "learning_rate": 2.9213483146067416e-06, "loss": 0.8109, "step": 26 }, { "epoch": 0.009180550833049982, "grad_norm": 2.920501950315103, "learning_rate": 3.03370786516854e-06, "loss": 0.9918, "step": 27 }, { "epoch": 0.009520571234274057, "grad_norm": 2.6627931747349463, "learning_rate": 3.146067415730337e-06, "loss": 1.0249, "step": 28 }, { "epoch": 0.00986059163549813, "grad_norm": 2.2391523692025945, "learning_rate": 3.258426966292135e-06, "loss": 0.8657, "step": 29 }, { "epoch": 0.010200612036722203, "grad_norm": 2.3439476348012924, "learning_rate": 3.3707865168539327e-06, "loss": 0.8669, "step": 30 }, { "epoch": 0.010540632437946278, "grad_norm": 2.6299632657140646, "learning_rate": 3.4831460674157306e-06, "loss": 0.9279, "step": 31 }, { "epoch": 0.01088065283917035, "grad_norm": 2.5299601625034702, "learning_rate": 3.595505617977528e-06, "loss": 0.9859, "step": 32 }, { "epoch": 0.011220673240394424, "grad_norm": 1.869054216542114, "learning_rate": 3.707865168539326e-06, "loss": 0.9683, "step": 33 }, { "epoch": 0.011560693641618497, "grad_norm": 2.641701125076694, "learning_rate": 3.820224719101124e-06, "loss": 0.9049, "step": 34 }, { "epoch": 0.011900714042842571, "grad_norm": 2.3075105518731167, "learning_rate": 3.932584269662922e-06, "loss": 0.95, "step": 35 }, { "epoch": 0.012240734444066644, "grad_norm": 2.3217690220079943, "learning_rate": 4.04494382022472e-06, "loss": 0.9225, "step": 36 }, { "epoch": 0.012580754845290717, "grad_norm": 2.480896383955115, "learning_rate": 4.157303370786518e-06, "loss": 0.8839, "step": 37 }, { "epoch": 0.01292077524651479, "grad_norm": 2.151833792105812, "learning_rate": 4.269662921348315e-06, "loss": 0.7775, "step": 38 }, { "epoch": 0.013260795647738865, "grad_norm": 4.211040389687315, "learning_rate": 4.382022471910113e-06, "loss": 0.9096, "step": 39 }, { "epoch": 0.013600816048962938, "grad_norm": 2.064947551303069, "learning_rate": 4.494382022471911e-06, "loss": 0.9284, "step": 40 }, { "epoch": 0.01394083645018701, "grad_norm": 2.0660909038422526, "learning_rate": 4.606741573033709e-06, "loss": 0.925, "step": 41 }, { "epoch": 0.014280856851411085, "grad_norm": 4.901236022148853, "learning_rate": 4.719101123595506e-06, "loss": 0.8714, "step": 42 }, { "epoch": 0.014620877252635158, "grad_norm": 4.502266868053889, "learning_rate": 4.831460674157304e-06, "loss": 0.8047, "step": 43 }, { "epoch": 0.014960897653859231, "grad_norm": 2.4077409650266337, "learning_rate": 4.943820224719101e-06, "loss": 0.913, "step": 44 }, { "epoch": 0.015300918055083304, "grad_norm": 4.793105624055668, "learning_rate": 5.0561797752809e-06, "loss": 0.8195, "step": 45 }, { "epoch": 0.015640938456307377, "grad_norm": 4.106559075152421, "learning_rate": 5.168539325842698e-06, "loss": 0.771, "step": 46 }, { "epoch": 0.015980958857531452, "grad_norm": 2.95021911633751, "learning_rate": 5.280898876404494e-06, "loss": 0.8879, "step": 47 }, { "epoch": 0.016320979258755527, "grad_norm": 2.5286647211984956, "learning_rate": 5.393258426966292e-06, "loss": 0.8658, "step": 48 }, { "epoch": 0.016660999659979598, "grad_norm": 2.8336405976774373, "learning_rate": 5.50561797752809e-06, "loss": 0.8408, "step": 49 }, { "epoch": 0.017001020061203673, "grad_norm": 2.1850180954836187, "learning_rate": 5.617977528089889e-06, "loss": 0.9112, "step": 50 }, { "epoch": 0.017341040462427744, "grad_norm": 2.06887498219481, "learning_rate": 5.730337078651685e-06, "loss": 0.7897, "step": 51 }, { "epoch": 0.01768106086365182, "grad_norm": 4.3341123167435125, "learning_rate": 5.842696629213483e-06, "loss": 0.811, "step": 52 }, { "epoch": 0.018021081264875893, "grad_norm": 2.14419920475312, "learning_rate": 5.955056179775281e-06, "loss": 0.9512, "step": 53 }, { "epoch": 0.018361101666099965, "grad_norm": 2.5786480499173177, "learning_rate": 6.06741573033708e-06, "loss": 0.9029, "step": 54 }, { "epoch": 0.01870112206732404, "grad_norm": 3.3621255490379833, "learning_rate": 6.179775280898876e-06, "loss": 0.8179, "step": 55 }, { "epoch": 0.019041142468548114, "grad_norm": 2.0145825477143675, "learning_rate": 6.292134831460674e-06, "loss": 0.924, "step": 56 }, { "epoch": 0.019381162869772185, "grad_norm": 2.1614514347624625, "learning_rate": 6.404494382022472e-06, "loss": 0.8009, "step": 57 }, { "epoch": 0.01972118327099626, "grad_norm": 2.056785775033519, "learning_rate": 6.51685393258427e-06, "loss": 0.8702, "step": 58 }, { "epoch": 0.020061203672220335, "grad_norm": 2.3474912782953172, "learning_rate": 6.629213483146067e-06, "loss": 0.8707, "step": 59 }, { "epoch": 0.020401224073444406, "grad_norm": 2.735017646466919, "learning_rate": 6.741573033707865e-06, "loss": 0.8172, "step": 60 }, { "epoch": 0.02074124447466848, "grad_norm": 2.422521293220476, "learning_rate": 6.853932584269663e-06, "loss": 0.9169, "step": 61 }, { "epoch": 0.021081264875892555, "grad_norm": 3.808006273477038, "learning_rate": 6.966292134831461e-06, "loss": 0.9115, "step": 62 }, { "epoch": 0.021421285277116626, "grad_norm": 6.385476467042873, "learning_rate": 7.078651685393258e-06, "loss": 0.9251, "step": 63 }, { "epoch": 0.0217613056783407, "grad_norm": 2.056529939521199, "learning_rate": 7.191011235955056e-06, "loss": 0.871, "step": 64 }, { "epoch": 0.022101326079564772, "grad_norm": 3.488639073828488, "learning_rate": 7.303370786516854e-06, "loss": 0.8088, "step": 65 }, { "epoch": 0.022441346480788847, "grad_norm": 2.085309229849527, "learning_rate": 7.415730337078652e-06, "loss": 0.9016, "step": 66 }, { "epoch": 0.022781366882012922, "grad_norm": 5.044997989735368, "learning_rate": 7.5280898876404495e-06, "loss": 1.0085, "step": 67 }, { "epoch": 0.023121387283236993, "grad_norm": 4.002898882880522, "learning_rate": 7.640449438202247e-06, "loss": 0.7977, "step": 68 }, { "epoch": 0.023461407684461068, "grad_norm": 2.189417532279016, "learning_rate": 7.752808988764046e-06, "loss": 0.749, "step": 69 }, { "epoch": 0.023801428085685142, "grad_norm": 1.9250509612156395, "learning_rate": 7.865168539325843e-06, "loss": 0.9402, "step": 70 }, { "epoch": 0.024141448486909214, "grad_norm": 8.213532430857596, "learning_rate": 7.97752808988764e-06, "loss": 0.8401, "step": 71 }, { "epoch": 0.02448146888813329, "grad_norm": 2.0693669751843085, "learning_rate": 8.08988764044944e-06, "loss": 0.8317, "step": 72 }, { "epoch": 0.024821489289357363, "grad_norm": 4.854843546953979, "learning_rate": 8.202247191011237e-06, "loss": 0.7864, "step": 73 }, { "epoch": 0.025161509690581434, "grad_norm": 1.9256245489125812, "learning_rate": 8.314606741573035e-06, "loss": 0.8605, "step": 74 }, { "epoch": 0.02550153009180551, "grad_norm": 3.3854334242945736, "learning_rate": 8.426966292134832e-06, "loss": 0.994, "step": 75 }, { "epoch": 0.02584155049302958, "grad_norm": 3.4183811142773135, "learning_rate": 8.53932584269663e-06, "loss": 0.8632, "step": 76 }, { "epoch": 0.026181570894253655, "grad_norm": 2.2762352809156416, "learning_rate": 8.651685393258428e-06, "loss": 0.8118, "step": 77 }, { "epoch": 0.02652159129547773, "grad_norm": 2.297584618749765, "learning_rate": 8.764044943820226e-06, "loss": 0.8672, "step": 78 }, { "epoch": 0.0268616116967018, "grad_norm": 3.23414736176019, "learning_rate": 8.876404494382023e-06, "loss": 0.6984, "step": 79 }, { "epoch": 0.027201632097925876, "grad_norm": 2.970866674193053, "learning_rate": 8.988764044943822e-06, "loss": 0.9025, "step": 80 }, { "epoch": 0.02754165249914995, "grad_norm": 2.5731909072039727, "learning_rate": 9.101123595505619e-06, "loss": 0.8281, "step": 81 }, { "epoch": 0.02788167290037402, "grad_norm": 2.087322150347932, "learning_rate": 9.213483146067417e-06, "loss": 0.9687, "step": 82 }, { "epoch": 0.028221693301598096, "grad_norm": 2.1556977588073867, "learning_rate": 9.325842696629213e-06, "loss": 0.801, "step": 83 }, { "epoch": 0.02856171370282217, "grad_norm": 2.0239695578983237, "learning_rate": 9.438202247191012e-06, "loss": 0.8507, "step": 84 }, { "epoch": 0.028901734104046242, "grad_norm": 2.8375743876638633, "learning_rate": 9.55056179775281e-06, "loss": 0.8769, "step": 85 }, { "epoch": 0.029241754505270317, "grad_norm": 2.258009150645209, "learning_rate": 9.662921348314608e-06, "loss": 0.7804, "step": 86 }, { "epoch": 0.029581774906494388, "grad_norm": 2.468189158455857, "learning_rate": 9.775280898876405e-06, "loss": 0.879, "step": 87 }, { "epoch": 0.029921795307718463, "grad_norm": 2.5014523937082482, "learning_rate": 9.887640449438202e-06, "loss": 0.8627, "step": 88 }, { "epoch": 0.030261815708942538, "grad_norm": 2.134005133865966, "learning_rate": 1e-05, "loss": 0.7324, "step": 89 }, { "epoch": 0.03060183611016661, "grad_norm": 3.2988329246550396, "learning_rate": 9.999996966523272e-06, "loss": 0.8279, "step": 90 }, { "epoch": 0.030941856511390683, "grad_norm": 3.35459533797809, "learning_rate": 9.999987866096762e-06, "loss": 0.8842, "step": 91 }, { "epoch": 0.031281876912614755, "grad_norm": 2.346746709615949, "learning_rate": 9.999972698731516e-06, "loss": 0.8541, "step": 92 }, { "epoch": 0.03162189731383883, "grad_norm": 2.5077318815610816, "learning_rate": 9.999951464445938e-06, "loss": 0.874, "step": 93 }, { "epoch": 0.031961917715062904, "grad_norm": 6.007848319671871, "learning_rate": 9.99992416326579e-06, "loss": 0.7933, "step": 94 }, { "epoch": 0.032301938116286975, "grad_norm": 3.1450126786992882, "learning_rate": 9.999890795224206e-06, "loss": 0.8713, "step": 95 }, { "epoch": 0.032641958517511054, "grad_norm": 2.0402024490282633, "learning_rate": 9.999851360361666e-06, "loss": 0.801, "step": 96 }, { "epoch": 0.032981978918735125, "grad_norm": 4.045944747267887, "learning_rate": 9.999805858726026e-06, "loss": 0.8282, "step": 97 }, { "epoch": 0.033321999319959196, "grad_norm": 2.7667039685581947, "learning_rate": 9.999754290372496e-06, "loss": 0.9823, "step": 98 }, { "epoch": 0.033662019721183274, "grad_norm": 1.9693833148302338, "learning_rate": 9.999696655363646e-06, "loss": 0.8958, "step": 99 }, { "epoch": 0.034002040122407345, "grad_norm": 1.8144739667930376, "learning_rate": 9.999632953769413e-06, "loss": 0.865, "step": 100 }, { "epoch": 0.03434206052363142, "grad_norm": 2.054769480683066, "learning_rate": 9.99956318566709e-06, "loss": 0.7375, "step": 101 }, { "epoch": 0.03468208092485549, "grad_norm": 2.3634788615414535, "learning_rate": 9.999487351141333e-06, "loss": 0.7926, "step": 102 }, { "epoch": 0.035022101326079566, "grad_norm": 2.432946102833199, "learning_rate": 9.999405450284161e-06, "loss": 0.9227, "step": 103 }, { "epoch": 0.03536212172730364, "grad_norm": 2.01036703041841, "learning_rate": 9.999317483194948e-06, "loss": 0.836, "step": 104 }, { "epoch": 0.03570214212852771, "grad_norm": 11.065759310951304, "learning_rate": 9.999223449980434e-06, "loss": 0.8741, "step": 105 }, { "epoch": 0.03604216252975179, "grad_norm": 3.1475568662058984, "learning_rate": 9.999123350754722e-06, "loss": 0.8076, "step": 106 }, { "epoch": 0.03638218293097586, "grad_norm": 2.087445939358465, "learning_rate": 9.999017185639266e-06, "loss": 0.9328, "step": 107 }, { "epoch": 0.03672220333219993, "grad_norm": 5.085608104666051, "learning_rate": 9.99890495476289e-06, "loss": 0.8641, "step": 108 }, { "epoch": 0.03706222373342401, "grad_norm": 3.1741923963644827, "learning_rate": 9.99878665826177e-06, "loss": 0.8019, "step": 109 }, { "epoch": 0.03740224413464808, "grad_norm": 2.355452993498913, "learning_rate": 9.998662296279447e-06, "loss": 0.8653, "step": 110 }, { "epoch": 0.03774226453587215, "grad_norm": 2.413762001760075, "learning_rate": 9.998531868966822e-06, "loss": 0.7411, "step": 111 }, { "epoch": 0.03808228493709623, "grad_norm": 2.0339220046950524, "learning_rate": 9.998395376482152e-06, "loss": 0.8552, "step": 112 }, { "epoch": 0.0384223053383203, "grad_norm": 2.079277005071041, "learning_rate": 9.998252818991062e-06, "loss": 0.8222, "step": 113 }, { "epoch": 0.03876232573954437, "grad_norm": 5.632785104475984, "learning_rate": 9.99810419666652e-06, "loss": 0.8776, "step": 114 }, { "epoch": 0.03910234614076845, "grad_norm": 1.8509337002126818, "learning_rate": 9.997949509688871e-06, "loss": 0.8431, "step": 115 }, { "epoch": 0.03944236654199252, "grad_norm": 2.573221870698461, "learning_rate": 9.997788758245808e-06, "loss": 0.9841, "step": 116 }, { "epoch": 0.03978238694321659, "grad_norm": 2.572525731553236, "learning_rate": 9.997621942532383e-06, "loss": 0.8367, "step": 117 }, { "epoch": 0.04012240734444067, "grad_norm": 2.2742345681308938, "learning_rate": 9.997449062751012e-06, "loss": 0.7897, "step": 118 }, { "epoch": 0.04046242774566474, "grad_norm": 4.327044372060793, "learning_rate": 9.997270119111467e-06, "loss": 0.8457, "step": 119 }, { "epoch": 0.04080244814688881, "grad_norm": 2.2772066238104376, "learning_rate": 9.99708511183087e-06, "loss": 0.8247, "step": 120 }, { "epoch": 0.04114246854811289, "grad_norm": 2.601789407750634, "learning_rate": 9.996894041133715e-06, "loss": 0.825, "step": 121 }, { "epoch": 0.04148248894933696, "grad_norm": 2.0585977344964284, "learning_rate": 9.99669690725184e-06, "loss": 0.7796, "step": 122 }, { "epoch": 0.04182250935056103, "grad_norm": 1.8855361410264628, "learning_rate": 9.996493710424447e-06, "loss": 0.8304, "step": 123 }, { "epoch": 0.04216252975178511, "grad_norm": 1.837257670311997, "learning_rate": 9.996284450898093e-06, "loss": 0.9445, "step": 124 }, { "epoch": 0.04250255015300918, "grad_norm": 3.0656012239893964, "learning_rate": 9.996069128926691e-06, "loss": 0.8702, "step": 125 }, { "epoch": 0.04284257055423325, "grad_norm": 2.438953538165285, "learning_rate": 9.995847744771514e-06, "loss": 0.7872, "step": 126 }, { "epoch": 0.043182590955457324, "grad_norm": 2.279269747200056, "learning_rate": 9.995620298701183e-06, "loss": 0.8613, "step": 127 }, { "epoch": 0.0435226113566814, "grad_norm": 3.213489993275219, "learning_rate": 9.99538679099168e-06, "loss": 0.7127, "step": 128 }, { "epoch": 0.043862631757905474, "grad_norm": 2.5232149812128637, "learning_rate": 9.995147221926343e-06, "loss": 0.8698, "step": 129 }, { "epoch": 0.044202652159129545, "grad_norm": 2.839717095850297, "learning_rate": 9.994901591795863e-06, "loss": 0.85, "step": 130 }, { "epoch": 0.04454267256035362, "grad_norm": 7.648211408560256, "learning_rate": 9.994649900898283e-06, "loss": 0.9204, "step": 131 }, { "epoch": 0.044882692961577694, "grad_norm": 1.8745457296274581, "learning_rate": 9.994392149539003e-06, "loss": 0.8267, "step": 132 }, { "epoch": 0.045222713362801766, "grad_norm": 1.9836289186862535, "learning_rate": 9.994128338030778e-06, "loss": 0.9781, "step": 133 }, { "epoch": 0.045562733764025844, "grad_norm": 2.126401684741174, "learning_rate": 9.993858466693712e-06, "loss": 1.0093, "step": 134 }, { "epoch": 0.045902754165249915, "grad_norm": 1.983933252004279, "learning_rate": 9.993582535855265e-06, "loss": 0.8227, "step": 135 }, { "epoch": 0.046242774566473986, "grad_norm": 2.088706279304247, "learning_rate": 9.99330054585025e-06, "loss": 0.8529, "step": 136 }, { "epoch": 0.046582794967698064, "grad_norm": 2.4156135313351363, "learning_rate": 9.993012497020831e-06, "loss": 0.8666, "step": 137 }, { "epoch": 0.046922815368922136, "grad_norm": 2.4388488859277584, "learning_rate": 9.992718389716521e-06, "loss": 0.7969, "step": 138 }, { "epoch": 0.04726283577014621, "grad_norm": 2.169555997324188, "learning_rate": 9.992418224294191e-06, "loss": 0.893, "step": 139 }, { "epoch": 0.047602856171370285, "grad_norm": 1.9240543034696431, "learning_rate": 9.992112001118058e-06, "loss": 0.9161, "step": 140 }, { "epoch": 0.047942876572594356, "grad_norm": 3.4020949955907636, "learning_rate": 9.991799720559687e-06, "loss": 0.7416, "step": 141 }, { "epoch": 0.04828289697381843, "grad_norm": 2.4604945354615153, "learning_rate": 9.991481382998001e-06, "loss": 0.9075, "step": 142 }, { "epoch": 0.048622917375042506, "grad_norm": 2.255946457618047, "learning_rate": 9.991156988819264e-06, "loss": 0.9905, "step": 143 }, { "epoch": 0.04896293777626658, "grad_norm": 2.024969438240585, "learning_rate": 9.990826538417095e-06, "loss": 0.9332, "step": 144 }, { "epoch": 0.04930295817749065, "grad_norm": 2.2370360110433025, "learning_rate": 9.99049003219246e-06, "loss": 0.8148, "step": 145 }, { "epoch": 0.049642978578714726, "grad_norm": 2.2743555041984407, "learning_rate": 9.99014747055367e-06, "loss": 0.8756, "step": 146 }, { "epoch": 0.0499829989799388, "grad_norm": 3.95316960278758, "learning_rate": 9.989798853916388e-06, "loss": 0.8292, "step": 147 }, { "epoch": 0.05032301938116287, "grad_norm": 3.6392856446793833, "learning_rate": 9.989444182703623e-06, "loss": 0.7666, "step": 148 }, { "epoch": 0.05066303978238694, "grad_norm": 2.230929282152931, "learning_rate": 9.989083457345727e-06, "loss": 0.8501, "step": 149 }, { "epoch": 0.05100306018361102, "grad_norm": 3.0061135776554826, "learning_rate": 9.988716678280403e-06, "loss": 0.9064, "step": 150 }, { "epoch": 0.05134308058483509, "grad_norm": 2.1445457506712824, "learning_rate": 9.988343845952697e-06, "loss": 0.8971, "step": 151 }, { "epoch": 0.05168310098605916, "grad_norm": 2.437139941905991, "learning_rate": 9.987964960815e-06, "loss": 0.9558, "step": 152 }, { "epoch": 0.05202312138728324, "grad_norm": 4.873429755031727, "learning_rate": 9.987580023327046e-06, "loss": 0.8671, "step": 153 }, { "epoch": 0.05236314178850731, "grad_norm": 3.944162605474988, "learning_rate": 9.987189033955918e-06, "loss": 0.8272, "step": 154 }, { "epoch": 0.05270316218973138, "grad_norm": 2.8250974202447985, "learning_rate": 9.986791993176035e-06, "loss": 0.8342, "step": 155 }, { "epoch": 0.05304318259095546, "grad_norm": 2.3354440033626345, "learning_rate": 9.986388901469167e-06, "loss": 0.7322, "step": 156 }, { "epoch": 0.05338320299217953, "grad_norm": 3.5022852363593198, "learning_rate": 9.985979759324418e-06, "loss": 0.9182, "step": 157 }, { "epoch": 0.0537232233934036, "grad_norm": 2.022608573939002, "learning_rate": 9.985564567238237e-06, "loss": 0.8555, "step": 158 }, { "epoch": 0.05406324379462768, "grad_norm": 2.4546285068255744, "learning_rate": 9.985143325714419e-06, "loss": 0.8571, "step": 159 }, { "epoch": 0.05440326419585175, "grad_norm": 1.7140624575103907, "learning_rate": 9.984716035264089e-06, "loss": 0.7834, "step": 160 }, { "epoch": 0.05474328459707582, "grad_norm": 2.007966583007289, "learning_rate": 9.98428269640572e-06, "loss": 0.8762, "step": 161 }, { "epoch": 0.0550833049982999, "grad_norm": 2.543305674356758, "learning_rate": 9.983843309665122e-06, "loss": 0.8685, "step": 162 }, { "epoch": 0.05542332539952397, "grad_norm": 2.080393891117657, "learning_rate": 9.983397875575442e-06, "loss": 0.8911, "step": 163 }, { "epoch": 0.05576334580074804, "grad_norm": 2.294684609497304, "learning_rate": 9.982946394677165e-06, "loss": 0.9734, "step": 164 }, { "epoch": 0.05610336620197212, "grad_norm": 1.804144176865855, "learning_rate": 9.982488867518112e-06, "loss": 0.819, "step": 165 }, { "epoch": 0.05644338660319619, "grad_norm": 3.657476774208594, "learning_rate": 9.982025294653445e-06, "loss": 0.8307, "step": 166 }, { "epoch": 0.056783407004420264, "grad_norm": 2.074991736382199, "learning_rate": 9.98155567664566e-06, "loss": 0.8015, "step": 167 }, { "epoch": 0.05712342740564434, "grad_norm": 2.9051744319346255, "learning_rate": 9.981080014064584e-06, "loss": 0.7693, "step": 168 }, { "epoch": 0.05746344780686841, "grad_norm": 1.9099484180793749, "learning_rate": 9.980598307487383e-06, "loss": 0.7955, "step": 169 }, { "epoch": 0.057803468208092484, "grad_norm": 2.146831708028932, "learning_rate": 9.980110557498556e-06, "loss": 0.8993, "step": 170 }, { "epoch": 0.058143488609316556, "grad_norm": 2.61617139387018, "learning_rate": 9.979616764689932e-06, "loss": 0.8702, "step": 171 }, { "epoch": 0.058483509010540634, "grad_norm": 2.1196184076767057, "learning_rate": 9.979116929660677e-06, "loss": 0.9441, "step": 172 }, { "epoch": 0.058823529411764705, "grad_norm": 3.7671698090532866, "learning_rate": 9.978611053017286e-06, "loss": 0.9364, "step": 173 }, { "epoch": 0.059163549812988776, "grad_norm": 2.6799358731624268, "learning_rate": 9.978099135373584e-06, "loss": 0.8686, "step": 174 }, { "epoch": 0.059503570214212854, "grad_norm": 1.8432414885158408, "learning_rate": 9.977581177350726e-06, "loss": 0.8512, "step": 175 }, { "epoch": 0.059843590615436926, "grad_norm": 2.6747034963076963, "learning_rate": 9.977057179577199e-06, "loss": 0.7921, "step": 176 }, { "epoch": 0.060183611016661, "grad_norm": 3.038911682651883, "learning_rate": 9.976527142688818e-06, "loss": 0.849, "step": 177 }, { "epoch": 0.060523631417885075, "grad_norm": 3.497473396206265, "learning_rate": 9.975991067328722e-06, "loss": 0.8514, "step": 178 }, { "epoch": 0.060863651819109146, "grad_norm": 2.872664876776745, "learning_rate": 9.975448954147383e-06, "loss": 0.76, "step": 179 }, { "epoch": 0.06120367222033322, "grad_norm": 2.265420234730481, "learning_rate": 9.974900803802595e-06, "loss": 0.8893, "step": 180 }, { "epoch": 0.061543692621557296, "grad_norm": 2.2490770407309273, "learning_rate": 9.974346616959476e-06, "loss": 0.9017, "step": 181 }, { "epoch": 0.06188371302278137, "grad_norm": 2.279036329894871, "learning_rate": 9.973786394290475e-06, "loss": 0.7729, "step": 182 }, { "epoch": 0.06222373342400544, "grad_norm": 2.2273111686177693, "learning_rate": 9.973220136475359e-06, "loss": 0.9491, "step": 183 }, { "epoch": 0.06256375382522951, "grad_norm": 1.8442525974273993, "learning_rate": 9.97264784420122e-06, "loss": 0.8584, "step": 184 }, { "epoch": 0.06290377422645359, "grad_norm": 2.0159433278766876, "learning_rate": 9.972069518162472e-06, "loss": 0.855, "step": 185 }, { "epoch": 0.06324379462767767, "grad_norm": 2.2639684727572336, "learning_rate": 9.971485159060851e-06, "loss": 0.9352, "step": 186 }, { "epoch": 0.06358381502890173, "grad_norm": 2.5527740776494574, "learning_rate": 9.970894767605412e-06, "loss": 0.7912, "step": 187 }, { "epoch": 0.06392383543012581, "grad_norm": 1.9379101706278465, "learning_rate": 9.970298344512533e-06, "loss": 0.8189, "step": 188 }, { "epoch": 0.06426385583134989, "grad_norm": 3.1376756923399824, "learning_rate": 9.969695890505904e-06, "loss": 0.9007, "step": 189 }, { "epoch": 0.06460387623257395, "grad_norm": 2.270221919510738, "learning_rate": 9.96908740631654e-06, "loss": 0.8274, "step": 190 }, { "epoch": 0.06494389663379803, "grad_norm": 10.152532884466371, "learning_rate": 9.96847289268277e-06, "loss": 0.9628, "step": 191 }, { "epoch": 0.06528391703502211, "grad_norm": 2.2780092143082395, "learning_rate": 9.967852350350239e-06, "loss": 0.8497, "step": 192 }, { "epoch": 0.06562393743624617, "grad_norm": 2.212416833604225, "learning_rate": 9.967225780071908e-06, "loss": 0.8529, "step": 193 }, { "epoch": 0.06596395783747025, "grad_norm": 2.5787177735483833, "learning_rate": 9.966593182608048e-06, "loss": 0.8931, "step": 194 }, { "epoch": 0.06630397823869433, "grad_norm": 2.222235355514418, "learning_rate": 9.965954558726249e-06, "loss": 0.8334, "step": 195 }, { "epoch": 0.06664399863991839, "grad_norm": 3.0045508316166742, "learning_rate": 9.965309909201414e-06, "loss": 0.8262, "step": 196 }, { "epoch": 0.06698401904114247, "grad_norm": 2.071849506932709, "learning_rate": 9.964659234815752e-06, "loss": 0.9124, "step": 197 }, { "epoch": 0.06732403944236655, "grad_norm": 2.67102771314228, "learning_rate": 9.964002536358784e-06, "loss": 0.8469, "step": 198 }, { "epoch": 0.06766405984359061, "grad_norm": 3.45508048201135, "learning_rate": 9.963339814627344e-06, "loss": 0.862, "step": 199 }, { "epoch": 0.06800408024481469, "grad_norm": 1.95482979584485, "learning_rate": 9.962671070425573e-06, "loss": 0.832, "step": 200 }, { "epoch": 0.06834410064603877, "grad_norm": 2.6045722790965, "learning_rate": 9.961996304564916e-06, "loss": 0.8735, "step": 201 }, { "epoch": 0.06868412104726283, "grad_norm": 2.198649716123807, "learning_rate": 9.961315517864131e-06, "loss": 0.8463, "step": 202 }, { "epoch": 0.06902414144848691, "grad_norm": 3.00819531461859, "learning_rate": 9.960628711149276e-06, "loss": 0.7847, "step": 203 }, { "epoch": 0.06936416184971098, "grad_norm": 2.7123733460067068, "learning_rate": 9.959935885253715e-06, "loss": 0.8133, "step": 204 }, { "epoch": 0.06970418225093505, "grad_norm": 1.9582131537045544, "learning_rate": 9.95923704101812e-06, "loss": 0.9498, "step": 205 }, { "epoch": 0.07004420265215913, "grad_norm": 2.0898364433787577, "learning_rate": 9.958532179290458e-06, "loss": 0.7157, "step": 206 }, { "epoch": 0.0703842230533832, "grad_norm": 2.0874129936819688, "learning_rate": 9.957821300926007e-06, "loss": 0.8845, "step": 207 }, { "epoch": 0.07072424345460727, "grad_norm": 2.565402516349025, "learning_rate": 9.957104406787335e-06, "loss": 0.8621, "step": 208 }, { "epoch": 0.07106426385583135, "grad_norm": 3.070517613442607, "learning_rate": 9.956381497744317e-06, "loss": 0.8068, "step": 209 }, { "epoch": 0.07140428425705542, "grad_norm": 1.7745161730599397, "learning_rate": 9.955652574674122e-06, "loss": 0.774, "step": 210 }, { "epoch": 0.0717443046582795, "grad_norm": 2.2664861311135835, "learning_rate": 9.954917638461221e-06, "loss": 0.7881, "step": 211 }, { "epoch": 0.07208432505950357, "grad_norm": 2.8326430149609103, "learning_rate": 9.954176689997379e-06, "loss": 0.8248, "step": 212 }, { "epoch": 0.07242434546072764, "grad_norm": 5.345259266036334, "learning_rate": 9.953429730181653e-06, "loss": 0.8863, "step": 213 }, { "epoch": 0.07276436586195172, "grad_norm": 2.0087383584289404, "learning_rate": 9.952676759920401e-06, "loss": 0.9046, "step": 214 }, { "epoch": 0.0731043862631758, "grad_norm": 2.7067250680131503, "learning_rate": 9.951917780127268e-06, "loss": 0.7835, "step": 215 }, { "epoch": 0.07344440666439986, "grad_norm": 2.0141720241519963, "learning_rate": 9.951152791723193e-06, "loss": 0.8934, "step": 216 }, { "epoch": 0.07378442706562394, "grad_norm": 1.8777488263439555, "learning_rate": 9.950381795636406e-06, "loss": 0.8121, "step": 217 }, { "epoch": 0.07412444746684801, "grad_norm": 3.255807583902176, "learning_rate": 9.949604792802425e-06, "loss": 0.8676, "step": 218 }, { "epoch": 0.07446446786807208, "grad_norm": 2.67736933247142, "learning_rate": 9.94882178416406e-06, "loss": 0.7393, "step": 219 }, { "epoch": 0.07480448826929616, "grad_norm": 4.008487179200106, "learning_rate": 9.948032770671405e-06, "loss": 0.8465, "step": 220 }, { "epoch": 0.07514450867052024, "grad_norm": 2.940928392932845, "learning_rate": 9.947237753281845e-06, "loss": 0.8339, "step": 221 }, { "epoch": 0.0754845290717443, "grad_norm": 2.051172980166151, "learning_rate": 9.946436732960042e-06, "loss": 0.9295, "step": 222 }, { "epoch": 0.07582454947296838, "grad_norm": 2.424428707199302, "learning_rate": 9.945629710677949e-06, "loss": 0.8197, "step": 223 }, { "epoch": 0.07616456987419246, "grad_norm": 2.0727299530697763, "learning_rate": 9.9448166874148e-06, "loss": 0.8643, "step": 224 }, { "epoch": 0.07650459027541652, "grad_norm": 2.770127723386738, "learning_rate": 9.943997664157108e-06, "loss": 0.8465, "step": 225 }, { "epoch": 0.0768446106766406, "grad_norm": 2.555973204555607, "learning_rate": 9.943172641898669e-06, "loss": 0.8517, "step": 226 }, { "epoch": 0.07718463107786468, "grad_norm": 1.842390623944307, "learning_rate": 9.942341621640558e-06, "loss": 0.85, "step": 227 }, { "epoch": 0.07752465147908874, "grad_norm": 2.128091117333538, "learning_rate": 9.941504604391126e-06, "loss": 0.8292, "step": 228 }, { "epoch": 0.07786467188031282, "grad_norm": 1.7281395666728367, "learning_rate": 9.940661591166003e-06, "loss": 0.8231, "step": 229 }, { "epoch": 0.0782046922815369, "grad_norm": 2.9385281630654285, "learning_rate": 9.939812582988094e-06, "loss": 0.7502, "step": 230 }, { "epoch": 0.07854471268276096, "grad_norm": 2.1296804812289603, "learning_rate": 9.938957580887575e-06, "loss": 0.8717, "step": 231 }, { "epoch": 0.07888473308398504, "grad_norm": 2.14717718646388, "learning_rate": 9.9380965859019e-06, "loss": 0.8894, "step": 232 }, { "epoch": 0.07922475348520912, "grad_norm": 1.9538584630326372, "learning_rate": 9.937229599075791e-06, "loss": 0.8824, "step": 233 }, { "epoch": 0.07956477388643318, "grad_norm": 1.7865365933867121, "learning_rate": 9.936356621461243e-06, "loss": 0.8454, "step": 234 }, { "epoch": 0.07990479428765726, "grad_norm": 1.7416149475546014, "learning_rate": 9.935477654117518e-06, "loss": 0.8576, "step": 235 }, { "epoch": 0.08024481468888134, "grad_norm": 1.768841666763347, "learning_rate": 9.934592698111148e-06, "loss": 0.9265, "step": 236 }, { "epoch": 0.0805848350901054, "grad_norm": 2.4152772171644656, "learning_rate": 9.933701754515928e-06, "loss": 0.8519, "step": 237 }, { "epoch": 0.08092485549132948, "grad_norm": 2.2519504055783814, "learning_rate": 9.932804824412922e-06, "loss": 0.9161, "step": 238 }, { "epoch": 0.08126487589255356, "grad_norm": 2.450596425363767, "learning_rate": 9.931901908890457e-06, "loss": 0.8091, "step": 239 }, { "epoch": 0.08160489629377762, "grad_norm": 1.8041419356227175, "learning_rate": 9.930993009044123e-06, "loss": 0.8561, "step": 240 }, { "epoch": 0.0819449166950017, "grad_norm": 1.7358105710203156, "learning_rate": 9.930078125976767e-06, "loss": 0.9872, "step": 241 }, { "epoch": 0.08228493709622578, "grad_norm": 1.9725454367197321, "learning_rate": 9.929157260798504e-06, "loss": 0.796, "step": 242 }, { "epoch": 0.08262495749744984, "grad_norm": 2.0911715350232987, "learning_rate": 9.9282304146267e-06, "loss": 0.7842, "step": 243 }, { "epoch": 0.08296497789867392, "grad_norm": 1.7083869843606982, "learning_rate": 9.927297588585984e-06, "loss": 0.7561, "step": 244 }, { "epoch": 0.083304998299898, "grad_norm": 1.8485993258822613, "learning_rate": 9.926358783808238e-06, "loss": 0.7767, "step": 245 }, { "epoch": 0.08364501870112206, "grad_norm": 2.4843083297610655, "learning_rate": 9.925414001432599e-06, "loss": 0.8209, "step": 246 }, { "epoch": 0.08398503910234614, "grad_norm": 2.514523920882766, "learning_rate": 9.924463242605454e-06, "loss": 0.75, "step": 247 }, { "epoch": 0.08432505950357022, "grad_norm": 2.038961464493961, "learning_rate": 9.92350650848045e-06, "loss": 0.8024, "step": 248 }, { "epoch": 0.08466507990479429, "grad_norm": 5.967821891887257, "learning_rate": 9.922543800218474e-06, "loss": 0.8777, "step": 249 }, { "epoch": 0.08500510030601836, "grad_norm": 2.1398795095312946, "learning_rate": 9.921575118987672e-06, "loss": 0.8732, "step": 250 }, { "epoch": 0.08534512070724243, "grad_norm": 2.9133926895356277, "learning_rate": 9.92060046596343e-06, "loss": 0.7944, "step": 251 }, { "epoch": 0.0856851411084665, "grad_norm": 1.9889465507980215, "learning_rate": 9.919619842328383e-06, "loss": 0.8085, "step": 252 }, { "epoch": 0.08602516150969058, "grad_norm": 3.150534939883254, "learning_rate": 9.918633249272412e-06, "loss": 0.8113, "step": 253 }, { "epoch": 0.08636518191091465, "grad_norm": 2.2835881883036335, "learning_rate": 9.917640687992638e-06, "loss": 0.787, "step": 254 }, { "epoch": 0.08670520231213873, "grad_norm": 2.4230018775230904, "learning_rate": 9.916642159693428e-06, "loss": 0.8945, "step": 255 }, { "epoch": 0.0870452227133628, "grad_norm": 2.2893689337255108, "learning_rate": 9.915637665586386e-06, "loss": 0.8661, "step": 256 }, { "epoch": 0.08738524311458687, "grad_norm": 2.4221470788760713, "learning_rate": 9.914627206890352e-06, "loss": 0.8282, "step": 257 }, { "epoch": 0.08772526351581095, "grad_norm": 2.376986510975116, "learning_rate": 9.913610784831415e-06, "loss": 0.8371, "step": 258 }, { "epoch": 0.08806528391703503, "grad_norm": 2.4413551231465624, "learning_rate": 9.912588400642884e-06, "loss": 0.8826, "step": 259 }, { "epoch": 0.08840530431825909, "grad_norm": 2.644042881659926, "learning_rate": 9.911560055565316e-06, "loss": 0.7355, "step": 260 }, { "epoch": 0.08874532471948317, "grad_norm": 1.9367674986615202, "learning_rate": 9.910525750846494e-06, "loss": 0.8337, "step": 261 }, { "epoch": 0.08908534512070725, "grad_norm": 3.6307557122329657, "learning_rate": 9.909485487741432e-06, "loss": 0.8818, "step": 262 }, { "epoch": 0.08942536552193131, "grad_norm": 2.1386654842761468, "learning_rate": 9.908439267512378e-06, "loss": 0.8079, "step": 263 }, { "epoch": 0.08976538592315539, "grad_norm": 2.05052351091725, "learning_rate": 9.907387091428803e-06, "loss": 0.808, "step": 264 }, { "epoch": 0.09010540632437947, "grad_norm": 2.1166857344624455, "learning_rate": 9.906328960767409e-06, "loss": 0.8604, "step": 265 }, { "epoch": 0.09044542672560353, "grad_norm": 2.179398767884701, "learning_rate": 9.905264876812123e-06, "loss": 0.7211, "step": 266 }, { "epoch": 0.09078544712682761, "grad_norm": 2.8274324506005213, "learning_rate": 9.904194840854094e-06, "loss": 0.9274, "step": 267 }, { "epoch": 0.09112546752805169, "grad_norm": 2.2046170596471315, "learning_rate": 9.903118854191693e-06, "loss": 0.8147, "step": 268 }, { "epoch": 0.09146548792927575, "grad_norm": 2.7523217330730505, "learning_rate": 9.902036918130514e-06, "loss": 0.8264, "step": 269 }, { "epoch": 0.09180550833049983, "grad_norm": 2.2273055464770812, "learning_rate": 9.900949033983366e-06, "loss": 0.866, "step": 270 }, { "epoch": 0.09214552873172391, "grad_norm": 2.2499475354374607, "learning_rate": 9.899855203070278e-06, "loss": 0.818, "step": 271 }, { "epoch": 0.09248554913294797, "grad_norm": 1.9616287518680806, "learning_rate": 9.898755426718493e-06, "loss": 0.8311, "step": 272 }, { "epoch": 0.09282556953417205, "grad_norm": 2.5970292191674655, "learning_rate": 9.897649706262474e-06, "loss": 0.8518, "step": 273 }, { "epoch": 0.09316558993539613, "grad_norm": 1.7660827251520181, "learning_rate": 9.896538043043887e-06, "loss": 0.8273, "step": 274 }, { "epoch": 0.09350561033662019, "grad_norm": 2.6210906521485824, "learning_rate": 9.895420438411616e-06, "loss": 0.8951, "step": 275 }, { "epoch": 0.09384563073784427, "grad_norm": 1.8800676289752767, "learning_rate": 9.89429689372175e-06, "loss": 0.8584, "step": 276 }, { "epoch": 0.09418565113906835, "grad_norm": 3.1773516067582186, "learning_rate": 9.893167410337591e-06, "loss": 0.8535, "step": 277 }, { "epoch": 0.09452567154029241, "grad_norm": 3.404016837148496, "learning_rate": 9.892031989629642e-06, "loss": 0.8279, "step": 278 }, { "epoch": 0.09486569194151649, "grad_norm": 2.173356538675513, "learning_rate": 9.890890632975612e-06, "loss": 0.8635, "step": 279 }, { "epoch": 0.09520571234274057, "grad_norm": 2.261938469413284, "learning_rate": 9.889743341760412e-06, "loss": 0.7996, "step": 280 }, { "epoch": 0.09554573274396463, "grad_norm": 2.1477511262078233, "learning_rate": 9.888590117376154e-06, "loss": 0.9334, "step": 281 }, { "epoch": 0.09588575314518871, "grad_norm": 2.458576128018538, "learning_rate": 9.887430961222153e-06, "loss": 0.88, "step": 282 }, { "epoch": 0.09622577354641279, "grad_norm": 4.04358503504034, "learning_rate": 9.886265874704914e-06, "loss": 0.8699, "step": 283 }, { "epoch": 0.09656579394763685, "grad_norm": 2.1028398707450293, "learning_rate": 9.885094859238145e-06, "loss": 1.0212, "step": 284 }, { "epoch": 0.09690581434886093, "grad_norm": 2.208984051301497, "learning_rate": 9.883917916242744e-06, "loss": 0.8778, "step": 285 }, { "epoch": 0.09724583475008501, "grad_norm": 5.2221607237155245, "learning_rate": 9.882735047146803e-06, "loss": 0.9002, "step": 286 }, { "epoch": 0.09758585515130908, "grad_norm": 1.976265426760934, "learning_rate": 9.881546253385603e-06, "loss": 0.8457, "step": 287 }, { "epoch": 0.09792587555253315, "grad_norm": 2.298479530013667, "learning_rate": 9.880351536401617e-06, "loss": 0.8554, "step": 288 }, { "epoch": 0.09826589595375723, "grad_norm": 2.055015991219169, "learning_rate": 9.879150897644504e-06, "loss": 0.833, "step": 289 }, { "epoch": 0.0986059163549813, "grad_norm": 2.147764160856317, "learning_rate": 9.877944338571108e-06, "loss": 0.8516, "step": 290 }, { "epoch": 0.09894593675620537, "grad_norm": 1.9705786404454357, "learning_rate": 9.876731860645454e-06, "loss": 0.8118, "step": 291 }, { "epoch": 0.09928595715742945, "grad_norm": 2.502835283880914, "learning_rate": 9.875513465338754e-06, "loss": 0.8403, "step": 292 }, { "epoch": 0.09962597755865352, "grad_norm": 1.9113225330806665, "learning_rate": 9.874289154129396e-06, "loss": 0.8076, "step": 293 }, { "epoch": 0.0999659979598776, "grad_norm": 2.282725376690771, "learning_rate": 9.873058928502948e-06, "loss": 0.9446, "step": 294 }, { "epoch": 0.10030601836110166, "grad_norm": 2.2144235263707217, "learning_rate": 9.871822789952155e-06, "loss": 0.8268, "step": 295 }, { "epoch": 0.10064603876232574, "grad_norm": 2.0635404740469525, "learning_rate": 9.870580739976936e-06, "loss": 0.8726, "step": 296 }, { "epoch": 0.10098605916354982, "grad_norm": 1.9237725683316835, "learning_rate": 9.869332780084383e-06, "loss": 0.8556, "step": 297 }, { "epoch": 0.10132607956477388, "grad_norm": 2.2476614190445514, "learning_rate": 9.868078911788756e-06, "loss": 0.9219, "step": 298 }, { "epoch": 0.10166609996599796, "grad_norm": 2.5057546873897882, "learning_rate": 9.866819136611492e-06, "loss": 0.767, "step": 299 }, { "epoch": 0.10200612036722204, "grad_norm": 2.355080917010462, "learning_rate": 9.865553456081188e-06, "loss": 0.7392, "step": 300 }, { "epoch": 0.1023461407684461, "grad_norm": 2.1056856473326273, "learning_rate": 9.864281871733608e-06, "loss": 0.9198, "step": 301 }, { "epoch": 0.10268616116967018, "grad_norm": 3.9555249729605917, "learning_rate": 9.863004385111683e-06, "loss": 0.7841, "step": 302 }, { "epoch": 0.10302618157089426, "grad_norm": 2.2790309051441144, "learning_rate": 9.8617209977655e-06, "loss": 0.785, "step": 303 }, { "epoch": 0.10336620197211832, "grad_norm": 2.117633945883531, "learning_rate": 9.860431711252312e-06, "loss": 0.8726, "step": 304 }, { "epoch": 0.1037062223733424, "grad_norm": 2.5389939700471347, "learning_rate": 9.859136527136525e-06, "loss": 0.8982, "step": 305 }, { "epoch": 0.10404624277456648, "grad_norm": 2.022741013697183, "learning_rate": 9.857835446989708e-06, "loss": 0.7907, "step": 306 }, { "epoch": 0.10438626317579054, "grad_norm": 3.2009092222933715, "learning_rate": 9.856528472390576e-06, "loss": 0.6933, "step": 307 }, { "epoch": 0.10472628357701462, "grad_norm": 2.2334333165472984, "learning_rate": 9.855215604925e-06, "loss": 0.8256, "step": 308 }, { "epoch": 0.1050663039782387, "grad_norm": 2.7821304676829834, "learning_rate": 9.853896846186e-06, "loss": 0.8078, "step": 309 }, { "epoch": 0.10540632437946276, "grad_norm": 1.960403992508658, "learning_rate": 9.852572197773746e-06, "loss": 0.7848, "step": 310 }, { "epoch": 0.10574634478068684, "grad_norm": 2.0477907520683476, "learning_rate": 9.851241661295558e-06, "loss": 0.7813, "step": 311 }, { "epoch": 0.10608636518191092, "grad_norm": 2.0911122056100164, "learning_rate": 9.84990523836589e-06, "loss": 0.8461, "step": 312 }, { "epoch": 0.10642638558313498, "grad_norm": 2.0728170947217492, "learning_rate": 9.848562930606353e-06, "loss": 0.8832, "step": 313 }, { "epoch": 0.10676640598435906, "grad_norm": 2.3673123838424, "learning_rate": 9.847214739645684e-06, "loss": 0.8177, "step": 314 }, { "epoch": 0.10710642638558314, "grad_norm": 6.272345076835982, "learning_rate": 9.845860667119769e-06, "loss": 0.8795, "step": 315 }, { "epoch": 0.1074464467868072, "grad_norm": 2.1579047581155466, "learning_rate": 9.844500714671625e-06, "loss": 0.7415, "step": 316 }, { "epoch": 0.10778646718803128, "grad_norm": 1.9340927610624175, "learning_rate": 9.843134883951405e-06, "loss": 0.7208, "step": 317 }, { "epoch": 0.10812648758925536, "grad_norm": 2.582635490684408, "learning_rate": 9.8417631766164e-06, "loss": 0.788, "step": 318 }, { "epoch": 0.10846650799047942, "grad_norm": 2.5034059769021617, "learning_rate": 9.840385594331022e-06, "loss": 0.8107, "step": 319 }, { "epoch": 0.1088065283917035, "grad_norm": 1.8885099464740926, "learning_rate": 9.839002138766818e-06, "loss": 0.8251, "step": 320 }, { "epoch": 0.10914654879292758, "grad_norm": 2.1105158457858915, "learning_rate": 9.837612811602462e-06, "loss": 0.8193, "step": 321 }, { "epoch": 0.10948656919415165, "grad_norm": 2.35579939491151, "learning_rate": 9.836217614523747e-06, "loss": 0.8366, "step": 322 }, { "epoch": 0.10982658959537572, "grad_norm": 2.0838710319308174, "learning_rate": 9.834816549223595e-06, "loss": 0.8519, "step": 323 }, { "epoch": 0.1101666099965998, "grad_norm": 1.9779789707319297, "learning_rate": 9.833409617402044e-06, "loss": 0.8505, "step": 324 }, { "epoch": 0.11050663039782387, "grad_norm": 2.3766207833663784, "learning_rate": 9.831996820766255e-06, "loss": 0.8322, "step": 325 }, { "epoch": 0.11084665079904794, "grad_norm": 2.1149686177574343, "learning_rate": 9.830578161030498e-06, "loss": 0.7337, "step": 326 }, { "epoch": 0.11118667120027202, "grad_norm": 1.805489375141993, "learning_rate": 9.829153639916162e-06, "loss": 0.8365, "step": 327 }, { "epoch": 0.11152669160149609, "grad_norm": 1.8967483785377255, "learning_rate": 9.827723259151752e-06, "loss": 0.8414, "step": 328 }, { "epoch": 0.11186671200272016, "grad_norm": 1.797366163019355, "learning_rate": 9.826287020472873e-06, "loss": 0.7744, "step": 329 }, { "epoch": 0.11220673240394424, "grad_norm": 2.0302882503199373, "learning_rate": 9.82484492562225e-06, "loss": 0.7147, "step": 330 }, { "epoch": 0.1125467528051683, "grad_norm": 2.5063229156854723, "learning_rate": 9.823396976349702e-06, "loss": 0.9023, "step": 331 }, { "epoch": 0.11288677320639239, "grad_norm": 1.8249477324783674, "learning_rate": 9.821943174412159e-06, "loss": 0.8074, "step": 332 }, { "epoch": 0.11322679360761646, "grad_norm": 1.577389057439917, "learning_rate": 9.82048352157365e-06, "loss": 0.8605, "step": 333 }, { "epoch": 0.11356681400884053, "grad_norm": 2.760876889647242, "learning_rate": 9.819018019605306e-06, "loss": 0.8667, "step": 334 }, { "epoch": 0.1139068344100646, "grad_norm": 1.8963064743986586, "learning_rate": 9.817546670285353e-06, "loss": 0.7706, "step": 335 }, { "epoch": 0.11424685481128868, "grad_norm": 2.2030659264137773, "learning_rate": 9.816069475399113e-06, "loss": 0.8123, "step": 336 }, { "epoch": 0.11458687521251275, "grad_norm": 2.0354951728501685, "learning_rate": 9.814586436738998e-06, "loss": 0.8086, "step": 337 }, { "epoch": 0.11492689561373683, "grad_norm": 1.9773497047211561, "learning_rate": 9.813097556104514e-06, "loss": 0.7746, "step": 338 }, { "epoch": 0.1152669160149609, "grad_norm": 2.0274305580007628, "learning_rate": 9.811602835302257e-06, "loss": 0.8596, "step": 339 }, { "epoch": 0.11560693641618497, "grad_norm": 1.8336317454746485, "learning_rate": 9.810102276145907e-06, "loss": 0.9853, "step": 340 }, { "epoch": 0.11594695681740905, "grad_norm": 2.39198257850906, "learning_rate": 9.808595880456226e-06, "loss": 0.856, "step": 341 }, { "epoch": 0.11628697721863311, "grad_norm": 2.245187215344967, "learning_rate": 9.807083650061063e-06, "loss": 0.8427, "step": 342 }, { "epoch": 0.11662699761985719, "grad_norm": 2.972553693877441, "learning_rate": 9.805565586795343e-06, "loss": 0.7884, "step": 343 }, { "epoch": 0.11696701802108127, "grad_norm": 2.1953868509755776, "learning_rate": 9.804041692501071e-06, "loss": 0.8486, "step": 344 }, { "epoch": 0.11730703842230533, "grad_norm": 1.8166601796451167, "learning_rate": 9.802511969027325e-06, "loss": 0.8615, "step": 345 }, { "epoch": 0.11764705882352941, "grad_norm": 1.9173045797680728, "learning_rate": 9.800976418230257e-06, "loss": 0.8542, "step": 346 }, { "epoch": 0.11798707922475349, "grad_norm": 1.8134758747889816, "learning_rate": 9.799435041973092e-06, "loss": 0.8473, "step": 347 }, { "epoch": 0.11832709962597755, "grad_norm": 4.312106023493087, "learning_rate": 9.797887842126119e-06, "loss": 0.8589, "step": 348 }, { "epoch": 0.11866712002720163, "grad_norm": 2.0162211971459514, "learning_rate": 9.796334820566697e-06, "loss": 0.8679, "step": 349 }, { "epoch": 0.11900714042842571, "grad_norm": 2.0564343236552873, "learning_rate": 9.79477597917925e-06, "loss": 0.8577, "step": 350 }, { "epoch": 0.11934716082964977, "grad_norm": 2.133819909568558, "learning_rate": 9.793211319855258e-06, "loss": 0.8628, "step": 351 }, { "epoch": 0.11968718123087385, "grad_norm": 1.9311795244885308, "learning_rate": 9.791640844493267e-06, "loss": 0.8469, "step": 352 }, { "epoch": 0.12002720163209793, "grad_norm": 2.967519343303041, "learning_rate": 9.790064554998875e-06, "loss": 0.8362, "step": 353 }, { "epoch": 0.120367222033322, "grad_norm": 4.208154882236529, "learning_rate": 9.788482453284737e-06, "loss": 0.9199, "step": 354 }, { "epoch": 0.12070724243454607, "grad_norm": 2.617909166707279, "learning_rate": 9.786894541270563e-06, "loss": 0.7771, "step": 355 }, { "epoch": 0.12104726283577015, "grad_norm": 1.8137174275503192, "learning_rate": 9.785300820883108e-06, "loss": 0.7556, "step": 356 }, { "epoch": 0.12138728323699421, "grad_norm": 1.9432369984112166, "learning_rate": 9.78370129405618e-06, "loss": 0.8698, "step": 357 }, { "epoch": 0.12172730363821829, "grad_norm": 2.5064308483815316, "learning_rate": 9.782095962730628e-06, "loss": 0.8607, "step": 358 }, { "epoch": 0.12206732403944237, "grad_norm": 5.205967310399993, "learning_rate": 9.780484828854346e-06, "loss": 0.8711, "step": 359 }, { "epoch": 0.12240734444066644, "grad_norm": 3.2761307002799462, "learning_rate": 9.77886789438227e-06, "loss": 0.8533, "step": 360 }, { "epoch": 0.12274736484189051, "grad_norm": 1.8164118637880238, "learning_rate": 9.777245161276372e-06, "loss": 0.8407, "step": 361 }, { "epoch": 0.12308738524311459, "grad_norm": 1.933148024538711, "learning_rate": 9.775616631505663e-06, "loss": 0.8629, "step": 362 }, { "epoch": 0.12342740564433866, "grad_norm": 2.0021553446281395, "learning_rate": 9.773982307046185e-06, "loss": 0.7911, "step": 363 }, { "epoch": 0.12376742604556273, "grad_norm": 2.4770275582280155, "learning_rate": 9.772342189881012e-06, "loss": 0.8485, "step": 364 }, { "epoch": 0.12410744644678681, "grad_norm": 2.0858568863354474, "learning_rate": 9.770696282000245e-06, "loss": 0.8311, "step": 365 }, { "epoch": 0.12444746684801088, "grad_norm": 1.8817539415546063, "learning_rate": 9.769044585401017e-06, "loss": 0.8769, "step": 366 }, { "epoch": 0.12478748724923495, "grad_norm": 1.7596502920409212, "learning_rate": 9.767387102087477e-06, "loss": 0.8521, "step": 367 }, { "epoch": 0.12512750765045902, "grad_norm": 4.783494590295243, "learning_rate": 9.765723834070805e-06, "loss": 0.8325, "step": 368 }, { "epoch": 0.1254675280516831, "grad_norm": 4.166385331832633, "learning_rate": 9.764054783369191e-06, "loss": 0.862, "step": 369 }, { "epoch": 0.12580754845290718, "grad_norm": 2.460165725461386, "learning_rate": 9.762379952007847e-06, "loss": 0.8256, "step": 370 }, { "epoch": 0.12614756885413125, "grad_norm": 2.066282248436767, "learning_rate": 9.760699342018997e-06, "loss": 0.8975, "step": 371 }, { "epoch": 0.12648758925535533, "grad_norm": 2.4732231692866793, "learning_rate": 9.759012955441877e-06, "loss": 0.8474, "step": 372 }, { "epoch": 0.12682760965657938, "grad_norm": 2.1370791962949034, "learning_rate": 9.757320794322736e-06, "loss": 0.8541, "step": 373 }, { "epoch": 0.12716763005780346, "grad_norm": 2.545444534776287, "learning_rate": 9.755622860714824e-06, "loss": 0.7436, "step": 374 }, { "epoch": 0.12750765045902754, "grad_norm": 4.486941686450132, "learning_rate": 9.753919156678397e-06, "loss": 0.9077, "step": 375 }, { "epoch": 0.12784767086025162, "grad_norm": 2.3634783435481004, "learning_rate": 9.752209684280717e-06, "loss": 0.9126, "step": 376 }, { "epoch": 0.1281876912614757, "grad_norm": 2.155370098640075, "learning_rate": 9.750494445596039e-06, "loss": 0.9266, "step": 377 }, { "epoch": 0.12852771166269977, "grad_norm": 3.380923933190232, "learning_rate": 9.748773442705617e-06, "loss": 0.859, "step": 378 }, { "epoch": 0.12886773206392382, "grad_norm": 2.0842951308423805, "learning_rate": 9.747046677697703e-06, "loss": 0.8092, "step": 379 }, { "epoch": 0.1292077524651479, "grad_norm": 2.2756970171219026, "learning_rate": 9.745314152667532e-06, "loss": 0.8278, "step": 380 }, { "epoch": 0.12954777286637198, "grad_norm": 2.224154665108581, "learning_rate": 9.743575869717343e-06, "loss": 0.9301, "step": 381 }, { "epoch": 0.12988779326759606, "grad_norm": 1.8618826446938581, "learning_rate": 9.741831830956344e-06, "loss": 0.7506, "step": 382 }, { "epoch": 0.13022781366882014, "grad_norm": 2.359471897466931, "learning_rate": 9.740082038500738e-06, "loss": 0.8335, "step": 383 }, { "epoch": 0.13056783407004421, "grad_norm": 2.2530265023397003, "learning_rate": 9.738326494473708e-06, "loss": 0.6634, "step": 384 }, { "epoch": 0.13090785447126826, "grad_norm": 3.4483730372310313, "learning_rate": 9.736565201005415e-06, "loss": 0.796, "step": 385 }, { "epoch": 0.13124787487249234, "grad_norm": 2.0533540536094264, "learning_rate": 9.734798160232994e-06, "loss": 0.7612, "step": 386 }, { "epoch": 0.13158789527371642, "grad_norm": 2.0190748769698907, "learning_rate": 9.733025374300556e-06, "loss": 0.9188, "step": 387 }, { "epoch": 0.1319279156749405, "grad_norm": 3.434702747485103, "learning_rate": 9.731246845359187e-06, "loss": 0.7621, "step": 388 }, { "epoch": 0.13226793607616458, "grad_norm": 2.418994252917101, "learning_rate": 9.729462575566931e-06, "loss": 0.8452, "step": 389 }, { "epoch": 0.13260795647738866, "grad_norm": 3.0894287674893337, "learning_rate": 9.727672567088809e-06, "loss": 0.9177, "step": 390 }, { "epoch": 0.1329479768786127, "grad_norm": 2.087903312379049, "learning_rate": 9.725876822096798e-06, "loss": 0.8296, "step": 391 }, { "epoch": 0.13328799727983678, "grad_norm": 1.9043558027810883, "learning_rate": 9.724075342769841e-06, "loss": 0.889, "step": 392 }, { "epoch": 0.13362801768106086, "grad_norm": 2.455862323906618, "learning_rate": 9.722268131293835e-06, "loss": 0.7897, "step": 393 }, { "epoch": 0.13396803808228494, "grad_norm": 2.0215817915535346, "learning_rate": 9.720455189861634e-06, "loss": 0.8734, "step": 394 }, { "epoch": 0.13430805848350902, "grad_norm": 2.6158996648329347, "learning_rate": 9.718636520673042e-06, "loss": 0.7503, "step": 395 }, { "epoch": 0.1346480788847331, "grad_norm": 1.9769125457115064, "learning_rate": 9.716812125934818e-06, "loss": 0.8262, "step": 396 }, { "epoch": 0.13498809928595715, "grad_norm": 2.1779733445105514, "learning_rate": 9.714982007860666e-06, "loss": 0.859, "step": 397 }, { "epoch": 0.13532811968718123, "grad_norm": 1.831021062504083, "learning_rate": 9.713146168671229e-06, "loss": 0.7766, "step": 398 }, { "epoch": 0.1356681400884053, "grad_norm": 2.8959475128601224, "learning_rate": 9.711304610594104e-06, "loss": 0.7802, "step": 399 }, { "epoch": 0.13600816048962938, "grad_norm": 2.1321321783640004, "learning_rate": 9.709457335863815e-06, "loss": 0.7941, "step": 400 }, { "epoch": 0.13634818089085346, "grad_norm": 1.9938357040475916, "learning_rate": 9.707604346721833e-06, "loss": 0.8127, "step": 401 }, { "epoch": 0.13668820129207754, "grad_norm": 1.7330981671482506, "learning_rate": 9.705745645416553e-06, "loss": 0.7544, "step": 402 }, { "epoch": 0.1370282216933016, "grad_norm": 2.584162403732432, "learning_rate": 9.703881234203309e-06, "loss": 0.843, "step": 403 }, { "epoch": 0.13736824209452567, "grad_norm": 2.3450929065189596, "learning_rate": 9.702011115344359e-06, "loss": 0.8568, "step": 404 }, { "epoch": 0.13770826249574974, "grad_norm": 1.8906128272198697, "learning_rate": 9.70013529110889e-06, "loss": 0.7954, "step": 405 }, { "epoch": 0.13804828289697382, "grad_norm": 2.005544234328828, "learning_rate": 9.698253763773005e-06, "loss": 0.8552, "step": 406 }, { "epoch": 0.1383883032981979, "grad_norm": 2.1791404886973083, "learning_rate": 9.696366535619735e-06, "loss": 0.7682, "step": 407 }, { "epoch": 0.13872832369942195, "grad_norm": 2.1254255310117216, "learning_rate": 9.694473608939024e-06, "loss": 0.9305, "step": 408 }, { "epoch": 0.13906834410064603, "grad_norm": 2.065032167374917, "learning_rate": 9.692574986027733e-06, "loss": 0.8186, "step": 409 }, { "epoch": 0.1394083645018701, "grad_norm": 1.976786103840094, "learning_rate": 9.690670669189632e-06, "loss": 0.9253, "step": 410 }, { "epoch": 0.13974838490309419, "grad_norm": 9.33711461350008, "learning_rate": 9.688760660735403e-06, "loss": 0.8761, "step": 411 }, { "epoch": 0.14008840530431826, "grad_norm": 2.0857389613573027, "learning_rate": 9.68684496298263e-06, "loss": 0.8343, "step": 412 }, { "epoch": 0.14042842570554234, "grad_norm": 2.496185713769945, "learning_rate": 9.684923578255806e-06, "loss": 0.8012, "step": 413 }, { "epoch": 0.1407684461067664, "grad_norm": 3.227539517727669, "learning_rate": 9.682996508886318e-06, "loss": 0.8353, "step": 414 }, { "epoch": 0.14110846650799047, "grad_norm": 2.713651534237373, "learning_rate": 9.681063757212455e-06, "loss": 0.7775, "step": 415 }, { "epoch": 0.14144848690921455, "grad_norm": 1.6905920998598611, "learning_rate": 9.679125325579402e-06, "loss": 0.79, "step": 416 }, { "epoch": 0.14178850731043863, "grad_norm": 3.2275016714057947, "learning_rate": 9.67718121633923e-06, "loss": 0.8604, "step": 417 }, { "epoch": 0.1421285277116627, "grad_norm": 2.6920966680993503, "learning_rate": 9.675231431850907e-06, "loss": 0.793, "step": 418 }, { "epoch": 0.14246854811288678, "grad_norm": 2.6498430598374583, "learning_rate": 9.673275974480282e-06, "loss": 0.9103, "step": 419 }, { "epoch": 0.14280856851411083, "grad_norm": 2.7892979782736864, "learning_rate": 9.671314846600088e-06, "loss": 0.826, "step": 420 }, { "epoch": 0.1431485889153349, "grad_norm": 1.7387600466875632, "learning_rate": 9.66934805058994e-06, "loss": 0.7522, "step": 421 }, { "epoch": 0.143488609316559, "grad_norm": 1.9252648709916258, "learning_rate": 9.667375588836329e-06, "loss": 0.9249, "step": 422 }, { "epoch": 0.14382862971778307, "grad_norm": 2.0337042623648784, "learning_rate": 9.665397463732623e-06, "loss": 0.832, "step": 423 }, { "epoch": 0.14416865011900715, "grad_norm": 2.355326076923748, "learning_rate": 9.66341367767906e-06, "loss": 0.7972, "step": 424 }, { "epoch": 0.14450867052023122, "grad_norm": 4.252381455801173, "learning_rate": 9.661424233082748e-06, "loss": 0.8571, "step": 425 }, { "epoch": 0.14484869092145528, "grad_norm": 2.2203148724753503, "learning_rate": 9.65942913235766e-06, "loss": 0.8049, "step": 426 }, { "epoch": 0.14518871132267935, "grad_norm": 2.250620096493986, "learning_rate": 9.657428377924632e-06, "loss": 0.8665, "step": 427 }, { "epoch": 0.14552873172390343, "grad_norm": 1.7641375389756913, "learning_rate": 9.655421972211362e-06, "loss": 0.8509, "step": 428 }, { "epoch": 0.1458687521251275, "grad_norm": 1.917741070851843, "learning_rate": 9.653409917652406e-06, "loss": 0.8852, "step": 429 }, { "epoch": 0.1462087725263516, "grad_norm": 2.1235005797818425, "learning_rate": 9.651392216689167e-06, "loss": 0.939, "step": 430 }, { "epoch": 0.14654879292757567, "grad_norm": 2.423393500928274, "learning_rate": 9.649368871769908e-06, "loss": 0.7891, "step": 431 }, { "epoch": 0.14688881332879972, "grad_norm": 2.29836823405829, "learning_rate": 9.647339885349736e-06, "loss": 0.8961, "step": 432 }, { "epoch": 0.1472288337300238, "grad_norm": 3.041323557001672, "learning_rate": 9.645305259890606e-06, "loss": 0.6884, "step": 433 }, { "epoch": 0.14756885413124787, "grad_norm": 2.1635297868724486, "learning_rate": 9.643264997861312e-06, "loss": 0.8554, "step": 434 }, { "epoch": 0.14790887453247195, "grad_norm": 2.0623308876529896, "learning_rate": 9.641219101737489e-06, "loss": 0.8993, "step": 435 }, { "epoch": 0.14824889493369603, "grad_norm": 1.9701367858930552, "learning_rate": 9.639167574001608e-06, "loss": 0.7581, "step": 436 }, { "epoch": 0.1485889153349201, "grad_norm": 2.2500245249695365, "learning_rate": 9.637110417142975e-06, "loss": 0.7519, "step": 437 }, { "epoch": 0.14892893573614416, "grad_norm": 2.2284060545707187, "learning_rate": 9.635047633657723e-06, "loss": 0.9183, "step": 438 }, { "epoch": 0.14926895613736824, "grad_norm": 2.286832136889049, "learning_rate": 9.632979226048816e-06, "loss": 0.8386, "step": 439 }, { "epoch": 0.14960897653859231, "grad_norm": 2.2951771970261143, "learning_rate": 9.630905196826039e-06, "loss": 0.8065, "step": 440 }, { "epoch": 0.1499489969398164, "grad_norm": 2.131373688797792, "learning_rate": 9.628825548506002e-06, "loss": 0.7767, "step": 441 }, { "epoch": 0.15028901734104047, "grad_norm": 2.091224107238558, "learning_rate": 9.62674028361213e-06, "loss": 0.8655, "step": 442 }, { "epoch": 0.15062903774226455, "grad_norm": 3.2172828773538993, "learning_rate": 9.624649404674661e-06, "loss": 0.9147, "step": 443 }, { "epoch": 0.1509690581434886, "grad_norm": 2.025151440715302, "learning_rate": 9.622552914230655e-06, "loss": 0.9121, "step": 444 }, { "epoch": 0.15130907854471268, "grad_norm": 1.9528957154547468, "learning_rate": 9.620450814823966e-06, "loss": 0.8995, "step": 445 }, { "epoch": 0.15164909894593676, "grad_norm": 2.2336331464426245, "learning_rate": 9.618343109005266e-06, "loss": 0.7953, "step": 446 }, { "epoch": 0.15198911934716083, "grad_norm": 2.247828787686121, "learning_rate": 9.616229799332026e-06, "loss": 0.9126, "step": 447 }, { "epoch": 0.1523291397483849, "grad_norm": 3.0710622235399967, "learning_rate": 9.614110888368515e-06, "loss": 0.7671, "step": 448 }, { "epoch": 0.152669160149609, "grad_norm": 2.240205069427427, "learning_rate": 9.6119863786858e-06, "loss": 0.8534, "step": 449 }, { "epoch": 0.15300918055083304, "grad_norm": 2.3440964305088374, "learning_rate": 9.609856272861742e-06, "loss": 0.8859, "step": 450 }, { "epoch": 0.15334920095205712, "grad_norm": 2.947306903901322, "learning_rate": 9.607720573480991e-06, "loss": 0.8971, "step": 451 }, { "epoch": 0.1536892213532812, "grad_norm": 1.9814248823269929, "learning_rate": 9.605579283134985e-06, "loss": 0.8666, "step": 452 }, { "epoch": 0.15402924175450527, "grad_norm": 1.9414054426785363, "learning_rate": 9.603432404421947e-06, "loss": 0.83, "step": 453 }, { "epoch": 0.15436926215572935, "grad_norm": 1.9508100977088108, "learning_rate": 9.601279939946874e-06, "loss": 0.7941, "step": 454 }, { "epoch": 0.1547092825569534, "grad_norm": 2.738798819972234, "learning_rate": 9.599121892321554e-06, "loss": 0.7554, "step": 455 }, { "epoch": 0.15504930295817748, "grad_norm": 2.2388309465744274, "learning_rate": 9.59695826416454e-06, "loss": 0.8047, "step": 456 }, { "epoch": 0.15538932335940156, "grad_norm": 1.9102221923547757, "learning_rate": 9.594789058101154e-06, "loss": 0.7742, "step": 457 }, { "epoch": 0.15572934376062564, "grad_norm": 2.176425662506411, "learning_rate": 9.592614276763494e-06, "loss": 0.8392, "step": 458 }, { "epoch": 0.15606936416184972, "grad_norm": 2.075696818549719, "learning_rate": 9.590433922790418e-06, "loss": 0.8328, "step": 459 }, { "epoch": 0.1564093845630738, "grad_norm": 2.45612251063091, "learning_rate": 9.58824799882755e-06, "loss": 0.8015, "step": 460 }, { "epoch": 0.15674940496429784, "grad_norm": 1.8178552528317342, "learning_rate": 9.586056507527266e-06, "loss": 0.9039, "step": 461 }, { "epoch": 0.15708942536552192, "grad_norm": 2.124214107555732, "learning_rate": 9.583859451548703e-06, "loss": 0.8113, "step": 462 }, { "epoch": 0.157429445766746, "grad_norm": 1.5843205381627385, "learning_rate": 9.581656833557749e-06, "loss": 0.8248, "step": 463 }, { "epoch": 0.15776946616797008, "grad_norm": 1.9306313527246615, "learning_rate": 9.57944865622704e-06, "loss": 0.7318, "step": 464 }, { "epoch": 0.15810948656919416, "grad_norm": 2.9413967318596943, "learning_rate": 9.577234922235954e-06, "loss": 0.8524, "step": 465 }, { "epoch": 0.15844950697041824, "grad_norm": 1.571426184030293, "learning_rate": 9.575015634270619e-06, "loss": 0.9224, "step": 466 }, { "epoch": 0.1587895273716423, "grad_norm": 1.8730771122977774, "learning_rate": 9.5727907950239e-06, "loss": 0.7957, "step": 467 }, { "epoch": 0.15912954777286636, "grad_norm": 2.376107493345504, "learning_rate": 9.570560407195392e-06, "loss": 0.7542, "step": 468 }, { "epoch": 0.15946956817409044, "grad_norm": 1.9384094700182535, "learning_rate": 9.568324473491431e-06, "loss": 0.7407, "step": 469 }, { "epoch": 0.15980958857531452, "grad_norm": 1.7937843614169016, "learning_rate": 9.566082996625072e-06, "loss": 0.7993, "step": 470 }, { "epoch": 0.1601496089765386, "grad_norm": 3.3373628850176127, "learning_rate": 9.56383597931611e-06, "loss": 0.7848, "step": 471 }, { "epoch": 0.16048962937776268, "grad_norm": 2.1659690728359697, "learning_rate": 9.561583424291048e-06, "loss": 0.8287, "step": 472 }, { "epoch": 0.16082964977898673, "grad_norm": 2.2389421306989234, "learning_rate": 9.55932533428312e-06, "loss": 0.7587, "step": 473 }, { "epoch": 0.1611696701802108, "grad_norm": 2.818398736688453, "learning_rate": 9.557061712032269e-06, "loss": 0.8222, "step": 474 }, { "epoch": 0.16150969058143488, "grad_norm": 2.441252664404201, "learning_rate": 9.554792560285152e-06, "loss": 0.734, "step": 475 }, { "epoch": 0.16184971098265896, "grad_norm": 2.295687270495865, "learning_rate": 9.552517881795142e-06, "loss": 0.8626, "step": 476 }, { "epoch": 0.16218973138388304, "grad_norm": 13.760896349921383, "learning_rate": 9.550237679322308e-06, "loss": 0.8463, "step": 477 }, { "epoch": 0.16252975178510712, "grad_norm": 2.1715258052291047, "learning_rate": 9.547951955633428e-06, "loss": 0.7491, "step": 478 }, { "epoch": 0.16286977218633117, "grad_norm": 2.2255593479400906, "learning_rate": 9.545660713501975e-06, "loss": 0.9064, "step": 479 }, { "epoch": 0.16320979258755525, "grad_norm": 2.21806151643282, "learning_rate": 9.543363955708124e-06, "loss": 0.8289, "step": 480 }, { "epoch": 0.16354981298877933, "grad_norm": 2.188397294600766, "learning_rate": 9.541061685038742e-06, "loss": 0.8429, "step": 481 }, { "epoch": 0.1638898333900034, "grad_norm": 2.166972985318867, "learning_rate": 9.538753904287376e-06, "loss": 0.9443, "step": 482 }, { "epoch": 0.16422985379122748, "grad_norm": 2.455788846295091, "learning_rate": 9.53644061625427e-06, "loss": 0.7398, "step": 483 }, { "epoch": 0.16456987419245156, "grad_norm": 2.4033301722625886, "learning_rate": 9.534121823746348e-06, "loss": 0.8728, "step": 484 }, { "epoch": 0.1649098945936756, "grad_norm": 2.2276672135131634, "learning_rate": 9.531797529577205e-06, "loss": 0.9371, "step": 485 }, { "epoch": 0.1652499149948997, "grad_norm": 2.434555018080122, "learning_rate": 9.529467736567124e-06, "loss": 0.9057, "step": 486 }, { "epoch": 0.16558993539612377, "grad_norm": 2.427937801753027, "learning_rate": 9.527132447543051e-06, "loss": 0.8455, "step": 487 }, { "epoch": 0.16592995579734784, "grad_norm": 3.4006562033751817, "learning_rate": 9.524791665338606e-06, "loss": 0.8247, "step": 488 }, { "epoch": 0.16626997619857192, "grad_norm": 2.7613015303421466, "learning_rate": 9.522445392794069e-06, "loss": 0.8169, "step": 489 }, { "epoch": 0.166609996599796, "grad_norm": 2.6236255693220323, "learning_rate": 9.520093632756388e-06, "loss": 0.7666, "step": 490 }, { "epoch": 0.16695001700102005, "grad_norm": 2.389219152903732, "learning_rate": 9.517736388079169e-06, "loss": 0.8067, "step": 491 }, { "epoch": 0.16729003740224413, "grad_norm": 1.7505382569470098, "learning_rate": 9.515373661622665e-06, "loss": 0.8714, "step": 492 }, { "epoch": 0.1676300578034682, "grad_norm": 2.0417205501733795, "learning_rate": 9.51300545625379e-06, "loss": 0.8, "step": 493 }, { "epoch": 0.16797007820469229, "grad_norm": 2.245402590882533, "learning_rate": 9.510631774846099e-06, "loss": 0.762, "step": 494 }, { "epoch": 0.16831009860591636, "grad_norm": 2.82928636204283, "learning_rate": 9.5082526202798e-06, "loss": 0.6621, "step": 495 }, { "epoch": 0.16865011900714044, "grad_norm": 2.387297511941106, "learning_rate": 9.505867995441734e-06, "loss": 0.8231, "step": 496 }, { "epoch": 0.1689901394083645, "grad_norm": 3.8013261588105927, "learning_rate": 9.503477903225382e-06, "loss": 0.8885, "step": 497 }, { "epoch": 0.16933015980958857, "grad_norm": 1.9582651742957375, "learning_rate": 9.501082346530864e-06, "loss": 0.7235, "step": 498 }, { "epoch": 0.16967018021081265, "grad_norm": 2.0197184013240497, "learning_rate": 9.498681328264919e-06, "loss": 0.8888, "step": 499 }, { "epoch": 0.17001020061203673, "grad_norm": 1.9383228814810216, "learning_rate": 9.496274851340926e-06, "loss": 0.7643, "step": 500 }, { "epoch": 0.1703502210132608, "grad_norm": 2.2775734934604728, "learning_rate": 9.49386291867888e-06, "loss": 0.7317, "step": 501 }, { "epoch": 0.17069024141448486, "grad_norm": 1.6625027584786152, "learning_rate": 9.491445533205397e-06, "loss": 0.8367, "step": 502 }, { "epoch": 0.17103026181570893, "grad_norm": 2.1475307975125113, "learning_rate": 9.48902269785371e-06, "loss": 0.8449, "step": 503 }, { "epoch": 0.171370282216933, "grad_norm": 2.347052710314186, "learning_rate": 9.486594415563665e-06, "loss": 0.867, "step": 504 }, { "epoch": 0.1717103026181571, "grad_norm": 1.6939167607337662, "learning_rate": 9.484160689281718e-06, "loss": 0.8089, "step": 505 }, { "epoch": 0.17205032301938117, "grad_norm": 2.7074248206478786, "learning_rate": 9.48172152196093e-06, "loss": 0.9275, "step": 506 }, { "epoch": 0.17239034342060525, "grad_norm": 1.9623542563803935, "learning_rate": 9.47927691656096e-06, "loss": 0.7276, "step": 507 }, { "epoch": 0.1727303638218293, "grad_norm": 2.2577590094858433, "learning_rate": 9.476826876048076e-06, "loss": 0.8322, "step": 508 }, { "epoch": 0.17307038422305338, "grad_norm": 2.610689837249583, "learning_rate": 9.474371403395129e-06, "loss": 0.7989, "step": 509 }, { "epoch": 0.17341040462427745, "grad_norm": 2.981168203823366, "learning_rate": 9.47191050158157e-06, "loss": 0.8787, "step": 510 }, { "epoch": 0.17375042502550153, "grad_norm": 2.0910514651851937, "learning_rate": 9.469444173593433e-06, "loss": 0.8342, "step": 511 }, { "epoch": 0.1740904454267256, "grad_norm": 2.327443732050833, "learning_rate": 9.466972422423338e-06, "loss": 0.7471, "step": 512 }, { "epoch": 0.1744304658279497, "grad_norm": 1.8968205078548026, "learning_rate": 9.464495251070483e-06, "loss": 0.8071, "step": 513 }, { "epoch": 0.17477048622917374, "grad_norm": 5.35775514905284, "learning_rate": 9.462012662540645e-06, "loss": 0.7672, "step": 514 }, { "epoch": 0.17511050663039782, "grad_norm": 1.8205374867928585, "learning_rate": 9.459524659846176e-06, "loss": 0.8094, "step": 515 }, { "epoch": 0.1754505270316219, "grad_norm": 2.70503593128861, "learning_rate": 9.457031246005994e-06, "loss": 0.8121, "step": 516 }, { "epoch": 0.17579054743284597, "grad_norm": 2.097674274994687, "learning_rate": 9.454532424045585e-06, "loss": 0.7831, "step": 517 }, { "epoch": 0.17613056783407005, "grad_norm": 2.541108967039887, "learning_rate": 9.452028196996994e-06, "loss": 0.7744, "step": 518 }, { "epoch": 0.17647058823529413, "grad_norm": 2.448989357418417, "learning_rate": 9.449518567898827e-06, "loss": 0.8201, "step": 519 }, { "epoch": 0.17681060863651818, "grad_norm": 2.4183579990716972, "learning_rate": 9.44700353979625e-06, "loss": 0.9491, "step": 520 }, { "epoch": 0.17715062903774226, "grad_norm": 2.0371443097616204, "learning_rate": 9.444483115740968e-06, "loss": 0.8665, "step": 521 }, { "epoch": 0.17749064943896634, "grad_norm": 2.417951434607015, "learning_rate": 9.441957298791243e-06, "loss": 0.7236, "step": 522 }, { "epoch": 0.17783066984019041, "grad_norm": 3.2234895274833817, "learning_rate": 9.439426092011877e-06, "loss": 0.8275, "step": 523 }, { "epoch": 0.1781706902414145, "grad_norm": 2.6553913479919773, "learning_rate": 9.436889498474213e-06, "loss": 0.8412, "step": 524 }, { "epoch": 0.17851071064263857, "grad_norm": 3.1030470159238392, "learning_rate": 9.434347521256131e-06, "loss": 0.832, "step": 525 }, { "epoch": 0.17885073104386262, "grad_norm": 1.8409627332088008, "learning_rate": 9.431800163442043e-06, "loss": 0.843, "step": 526 }, { "epoch": 0.1791907514450867, "grad_norm": 3.186355638430994, "learning_rate": 9.429247428122886e-06, "loss": 0.707, "step": 527 }, { "epoch": 0.17953077184631078, "grad_norm": 2.0444710139532516, "learning_rate": 9.426689318396128e-06, "loss": 0.8321, "step": 528 }, { "epoch": 0.17987079224753486, "grad_norm": 1.7029237292350985, "learning_rate": 9.424125837365754e-06, "loss": 0.8387, "step": 529 }, { "epoch": 0.18021081264875893, "grad_norm": 2.2003972951001427, "learning_rate": 9.42155698814227e-06, "loss": 0.7516, "step": 530 }, { "epoch": 0.180550833049983, "grad_norm": 2.557957310252339, "learning_rate": 9.41898277384269e-06, "loss": 0.8797, "step": 531 }, { "epoch": 0.18089085345120706, "grad_norm": 2.895779678922211, "learning_rate": 9.416403197590547e-06, "loss": 0.823, "step": 532 }, { "epoch": 0.18123087385243114, "grad_norm": 2.0167366993376947, "learning_rate": 9.41381826251587e-06, "loss": 0.9268, "step": 533 }, { "epoch": 0.18157089425365522, "grad_norm": 2.2879605857864265, "learning_rate": 9.411227971755197e-06, "loss": 0.9309, "step": 534 }, { "epoch": 0.1819109146548793, "grad_norm": 5.454574823683741, "learning_rate": 9.408632328451565e-06, "loss": 0.8586, "step": 535 }, { "epoch": 0.18225093505610337, "grad_norm": 2.145392976793967, "learning_rate": 9.4060313357545e-06, "loss": 0.8267, "step": 536 }, { "epoch": 0.18259095545732745, "grad_norm": 2.4134835759822377, "learning_rate": 9.403424996820024e-06, "loss": 0.8951, "step": 537 }, { "epoch": 0.1829309758585515, "grad_norm": 2.490795625384553, "learning_rate": 9.400813314810644e-06, "loss": 0.8217, "step": 538 }, { "epoch": 0.18327099625977558, "grad_norm": 4.105744879893649, "learning_rate": 9.39819629289535e-06, "loss": 0.7641, "step": 539 }, { "epoch": 0.18361101666099966, "grad_norm": 2.663813984562291, "learning_rate": 9.395573934249614e-06, "loss": 0.8811, "step": 540 }, { "epoch": 0.18395103706222374, "grad_norm": 2.3379526523751037, "learning_rate": 9.392946242055379e-06, "loss": 0.8157, "step": 541 }, { "epoch": 0.18429105746344782, "grad_norm": 2.460601877280579, "learning_rate": 9.390313219501061e-06, "loss": 0.8666, "step": 542 }, { "epoch": 0.18463107786467187, "grad_norm": 2.4088692423050144, "learning_rate": 9.38767486978155e-06, "loss": 0.876, "step": 543 }, { "epoch": 0.18497109826589594, "grad_norm": 1.5738286525981031, "learning_rate": 9.385031196098194e-06, "loss": 0.7488, "step": 544 }, { "epoch": 0.18531111866712002, "grad_norm": 4.532392662900621, "learning_rate": 9.3823822016588e-06, "loss": 0.8693, "step": 545 }, { "epoch": 0.1856511390683441, "grad_norm": 2.237189674784233, "learning_rate": 9.379727889677632e-06, "loss": 0.8958, "step": 546 }, { "epoch": 0.18599115946956818, "grad_norm": 1.554107484360944, "learning_rate": 9.377068263375411e-06, "loss": 0.7866, "step": 547 }, { "epoch": 0.18633117987079226, "grad_norm": 2.4467881402130125, "learning_rate": 9.374403325979301e-06, "loss": 0.8856, "step": 548 }, { "epoch": 0.1866712002720163, "grad_norm": 2.4507416677378213, "learning_rate": 9.371733080722911e-06, "loss": 0.7532, "step": 549 }, { "epoch": 0.18701122067324039, "grad_norm": 1.427343424523804, "learning_rate": 9.369057530846294e-06, "loss": 0.8418, "step": 550 }, { "epoch": 0.18735124107446446, "grad_norm": 3.24841097172593, "learning_rate": 9.366376679595936e-06, "loss": 0.8738, "step": 551 }, { "epoch": 0.18769126147568854, "grad_norm": 1.8294498367391419, "learning_rate": 9.363690530224757e-06, "loss": 0.9536, "step": 552 }, { "epoch": 0.18803128187691262, "grad_norm": 3.147835292974788, "learning_rate": 9.360999085992106e-06, "loss": 0.9387, "step": 553 }, { "epoch": 0.1883713022781367, "grad_norm": 1.7595455512097826, "learning_rate": 9.358302350163758e-06, "loss": 0.893, "step": 554 }, { "epoch": 0.18871132267936075, "grad_norm": 2.0297149767885587, "learning_rate": 9.355600326011903e-06, "loss": 0.8648, "step": 555 }, { "epoch": 0.18905134308058483, "grad_norm": 2.669731699705077, "learning_rate": 9.352893016815155e-06, "loss": 0.8835, "step": 556 }, { "epoch": 0.1893913634818089, "grad_norm": 5.446794892812443, "learning_rate": 9.350180425858538e-06, "loss": 0.7767, "step": 557 }, { "epoch": 0.18973138388303298, "grad_norm": 1.8268464771652575, "learning_rate": 9.347462556433483e-06, "loss": 0.7565, "step": 558 }, { "epoch": 0.19007140428425706, "grad_norm": 1.642166985619522, "learning_rate": 9.34473941183783e-06, "loss": 0.8424, "step": 559 }, { "epoch": 0.19041142468548114, "grad_norm": 2.0901831217312625, "learning_rate": 9.342010995375811e-06, "loss": 0.7805, "step": 560 }, { "epoch": 0.1907514450867052, "grad_norm": 1.90871026730968, "learning_rate": 9.33927731035807e-06, "loss": 0.7668, "step": 561 }, { "epoch": 0.19109146548792927, "grad_norm": 3.4307001624182316, "learning_rate": 9.336538360101631e-06, "loss": 0.8382, "step": 562 }, { "epoch": 0.19143148588915335, "grad_norm": 2.393265808564416, "learning_rate": 9.333794147929907e-06, "loss": 0.7788, "step": 563 }, { "epoch": 0.19177150629037742, "grad_norm": 2.022566960473697, "learning_rate": 9.331044677172705e-06, "loss": 0.744, "step": 564 }, { "epoch": 0.1921115266916015, "grad_norm": 3.3062922036782836, "learning_rate": 9.328289951166205e-06, "loss": 0.8229, "step": 565 }, { "epoch": 0.19245154709282558, "grad_norm": 3.0841654373647516, "learning_rate": 9.325529973252967e-06, "loss": 0.6495, "step": 566 }, { "epoch": 0.19279156749404963, "grad_norm": 5.4892675893239105, "learning_rate": 9.32276474678192e-06, "loss": 0.8487, "step": 567 }, { "epoch": 0.1931315878952737, "grad_norm": 2.013856083284362, "learning_rate": 9.319994275108365e-06, "loss": 0.9441, "step": 568 }, { "epoch": 0.1934716082964978, "grad_norm": 2.4066098547440897, "learning_rate": 9.31721856159397e-06, "loss": 0.8576, "step": 569 }, { "epoch": 0.19381162869772187, "grad_norm": 2.0530822158137023, "learning_rate": 9.314437609606754e-06, "loss": 0.7699, "step": 570 }, { "epoch": 0.19415164909894594, "grad_norm": 1.7520902212648368, "learning_rate": 9.311651422521103e-06, "loss": 0.8794, "step": 571 }, { "epoch": 0.19449166950017002, "grad_norm": 2.034565210374195, "learning_rate": 9.308860003717748e-06, "loss": 0.8773, "step": 572 }, { "epoch": 0.19483168990139407, "grad_norm": 1.8451050446755255, "learning_rate": 9.306063356583772e-06, "loss": 0.7947, "step": 573 }, { "epoch": 0.19517171030261815, "grad_norm": 2.010334432898869, "learning_rate": 9.3032614845126e-06, "loss": 0.8639, "step": 574 }, { "epoch": 0.19551173070384223, "grad_norm": 1.8970535978324625, "learning_rate": 9.300454390903999e-06, "loss": 0.74, "step": 575 }, { "epoch": 0.1958517511050663, "grad_norm": 2.3148877609826544, "learning_rate": 9.297642079164067e-06, "loss": 0.8328, "step": 576 }, { "epoch": 0.19619177150629039, "grad_norm": 1.8964922349773508, "learning_rate": 9.294824552705238e-06, "loss": 0.7799, "step": 577 }, { "epoch": 0.19653179190751446, "grad_norm": 1.970465125409274, "learning_rate": 9.292001814946275e-06, "loss": 0.8337, "step": 578 }, { "epoch": 0.19687181230873851, "grad_norm": 1.8923831770943373, "learning_rate": 9.289173869312259e-06, "loss": 0.9365, "step": 579 }, { "epoch": 0.1972118327099626, "grad_norm": 5.110858953274893, "learning_rate": 9.286340719234592e-06, "loss": 0.8185, "step": 580 }, { "epoch": 0.19755185311118667, "grad_norm": 2.090608423644068, "learning_rate": 9.283502368150996e-06, "loss": 0.8934, "step": 581 }, { "epoch": 0.19789187351241075, "grad_norm": 2.016487940198634, "learning_rate": 9.280658819505495e-06, "loss": 0.7756, "step": 582 }, { "epoch": 0.19823189391363483, "grad_norm": 2.221575121411304, "learning_rate": 9.277810076748427e-06, "loss": 0.821, "step": 583 }, { "epoch": 0.1985719143148589, "grad_norm": 1.8543548108937444, "learning_rate": 9.274956143336433e-06, "loss": 0.873, "step": 584 }, { "epoch": 0.19891193471608296, "grad_norm": 2.259730694848992, "learning_rate": 9.272097022732444e-06, "loss": 0.8786, "step": 585 }, { "epoch": 0.19925195511730703, "grad_norm": 1.848711170198966, "learning_rate": 9.269232718405692e-06, "loss": 0.8858, "step": 586 }, { "epoch": 0.1995919755185311, "grad_norm": 1.866200683323318, "learning_rate": 9.266363233831697e-06, "loss": 0.8016, "step": 587 }, { "epoch": 0.1999319959197552, "grad_norm": 14.97505212921413, "learning_rate": 9.263488572492267e-06, "loss": 0.7263, "step": 588 }, { "epoch": 0.20027201632097927, "grad_norm": 2.15282534325055, "learning_rate": 9.260608737875487e-06, "loss": 0.83, "step": 589 }, { "epoch": 0.20061203672220332, "grad_norm": 3.0728740665252308, "learning_rate": 9.257723733475723e-06, "loss": 0.8643, "step": 590 }, { "epoch": 0.2009520571234274, "grad_norm": 1.9579947280234309, "learning_rate": 9.25483356279361e-06, "loss": 0.7628, "step": 591 }, { "epoch": 0.20129207752465147, "grad_norm": 2.0595756759778645, "learning_rate": 9.251938229336057e-06, "loss": 0.7825, "step": 592 }, { "epoch": 0.20163209792587555, "grad_norm": 3.9099946836071258, "learning_rate": 9.249037736616235e-06, "loss": 0.9511, "step": 593 }, { "epoch": 0.20197211832709963, "grad_norm": 1.9453043208275265, "learning_rate": 9.24613208815357e-06, "loss": 0.8037, "step": 594 }, { "epoch": 0.2023121387283237, "grad_norm": 2.067317387394065, "learning_rate": 9.243221287473755e-06, "loss": 0.8915, "step": 595 }, { "epoch": 0.20265215912954776, "grad_norm": 1.7871868965358906, "learning_rate": 9.240305338108726e-06, "loss": 0.9012, "step": 596 }, { "epoch": 0.20299217953077184, "grad_norm": 1.742758087393586, "learning_rate": 9.237384243596667e-06, "loss": 0.7241, "step": 597 }, { "epoch": 0.20333219993199592, "grad_norm": 3.9506555587545105, "learning_rate": 9.23445800748201e-06, "loss": 0.8664, "step": 598 }, { "epoch": 0.20367222033322, "grad_norm": 2.1447546142847704, "learning_rate": 9.231526633315419e-06, "loss": 0.8176, "step": 599 }, { "epoch": 0.20401224073444407, "grad_norm": 2.0350086212804848, "learning_rate": 9.2285901246538e-06, "loss": 0.858, "step": 600 }, { "epoch": 0.20435226113566815, "grad_norm": 2.0664182960390494, "learning_rate": 9.225648485060283e-06, "loss": 0.7872, "step": 601 }, { "epoch": 0.2046922815368922, "grad_norm": 2.5061940461297714, "learning_rate": 9.222701718104226e-06, "loss": 0.7595, "step": 602 }, { "epoch": 0.20503230193811628, "grad_norm": 2.0379087741894084, "learning_rate": 9.21974982736121e-06, "loss": 0.8303, "step": 603 }, { "epoch": 0.20537232233934036, "grad_norm": 2.4684024083448697, "learning_rate": 9.21679281641303e-06, "loss": 0.7877, "step": 604 }, { "epoch": 0.20571234274056444, "grad_norm": 1.541601484308322, "learning_rate": 9.2138306888477e-06, "loss": 0.8159, "step": 605 }, { "epoch": 0.2060523631417885, "grad_norm": 2.855409839214273, "learning_rate": 9.21086344825943e-06, "loss": 0.892, "step": 606 }, { "epoch": 0.2063923835430126, "grad_norm": 2.0094424248725584, "learning_rate": 9.207891098248648e-06, "loss": 0.8376, "step": 607 }, { "epoch": 0.20673240394423664, "grad_norm": 2.3628874201292103, "learning_rate": 9.204913642421977e-06, "loss": 0.8384, "step": 608 }, { "epoch": 0.20707242434546072, "grad_norm": 2.7503706346744408, "learning_rate": 9.20193108439223e-06, "loss": 0.8241, "step": 609 }, { "epoch": 0.2074124447466848, "grad_norm": 1.8912481720048837, "learning_rate": 9.198943427778415e-06, "loss": 0.7518, "step": 610 }, { "epoch": 0.20775246514790888, "grad_norm": 1.8807659764591915, "learning_rate": 9.19595067620573e-06, "loss": 0.9048, "step": 611 }, { "epoch": 0.20809248554913296, "grad_norm": 2.5399683576595744, "learning_rate": 9.19295283330555e-06, "loss": 0.787, "step": 612 }, { "epoch": 0.20843250595035703, "grad_norm": 2.2042041124830756, "learning_rate": 9.189949902715432e-06, "loss": 0.7788, "step": 613 }, { "epoch": 0.20877252635158108, "grad_norm": 3.1080504073340185, "learning_rate": 9.1869418880791e-06, "loss": 0.9674, "step": 614 }, { "epoch": 0.20911254675280516, "grad_norm": 4.302778372345475, "learning_rate": 9.183928793046456e-06, "loss": 0.8179, "step": 615 }, { "epoch": 0.20945256715402924, "grad_norm": 4.5320039265679375, "learning_rate": 9.180910621273555e-06, "loss": 0.8254, "step": 616 }, { "epoch": 0.20979258755525332, "grad_norm": 1.8992179347695721, "learning_rate": 9.177887376422624e-06, "loss": 0.7908, "step": 617 }, { "epoch": 0.2101326079564774, "grad_norm": 1.9899814372282567, "learning_rate": 9.174859062162037e-06, "loss": 0.7912, "step": 618 }, { "epoch": 0.21047262835770147, "grad_norm": 2.476667419018384, "learning_rate": 9.171825682166325e-06, "loss": 0.8038, "step": 619 }, { "epoch": 0.21081264875892552, "grad_norm": 2.295861238237628, "learning_rate": 9.168787240116162e-06, "loss": 0.8047, "step": 620 }, { "epoch": 0.2111526691601496, "grad_norm": 2.0894123097550104, "learning_rate": 9.165743739698364e-06, "loss": 0.8888, "step": 621 }, { "epoch": 0.21149268956137368, "grad_norm": 1.8237207223507654, "learning_rate": 9.162695184605887e-06, "loss": 0.8017, "step": 622 }, { "epoch": 0.21183270996259776, "grad_norm": 6.442654526204107, "learning_rate": 9.15964157853782e-06, "loss": 0.858, "step": 623 }, { "epoch": 0.21217273036382184, "grad_norm": 1.7241002709899875, "learning_rate": 9.15658292519938e-06, "loss": 0.8993, "step": 624 }, { "epoch": 0.21251275076504592, "grad_norm": 2.4396884010337767, "learning_rate": 9.153519228301907e-06, "loss": 0.8945, "step": 625 }, { "epoch": 0.21285277116626997, "grad_norm": 3.270858990699396, "learning_rate": 9.150450491562864e-06, "loss": 0.8649, "step": 626 }, { "epoch": 0.21319279156749404, "grad_norm": 3.3296457820914904, "learning_rate": 9.147376718705825e-06, "loss": 0.8044, "step": 627 }, { "epoch": 0.21353281196871812, "grad_norm": 1.8050916328292812, "learning_rate": 9.144297913460481e-06, "loss": 0.7789, "step": 628 }, { "epoch": 0.2138728323699422, "grad_norm": 1.6022202427177714, "learning_rate": 9.141214079562624e-06, "loss": 0.7811, "step": 629 }, { "epoch": 0.21421285277116628, "grad_norm": 3.8320152900329987, "learning_rate": 9.13812522075415e-06, "loss": 0.8481, "step": 630 }, { "epoch": 0.21455287317239036, "grad_norm": 2.968430851574413, "learning_rate": 9.13503134078305e-06, "loss": 0.8705, "step": 631 }, { "epoch": 0.2148928935736144, "grad_norm": 2.7320514154269016, "learning_rate": 9.13193244340341e-06, "loss": 0.7574, "step": 632 }, { "epoch": 0.21523291397483849, "grad_norm": 2.5381990072952987, "learning_rate": 9.128828532375404e-06, "loss": 0.758, "step": 633 }, { "epoch": 0.21557293437606256, "grad_norm": 4.664824544519933, "learning_rate": 9.125719611465287e-06, "loss": 0.9002, "step": 634 }, { "epoch": 0.21591295477728664, "grad_norm": 1.8271666953396724, "learning_rate": 9.122605684445397e-06, "loss": 0.8619, "step": 635 }, { "epoch": 0.21625297517851072, "grad_norm": 1.9433634039375405, "learning_rate": 9.119486755094143e-06, "loss": 0.8429, "step": 636 }, { "epoch": 0.21659299557973477, "grad_norm": 1.8859622430454264, "learning_rate": 9.116362827196002e-06, "loss": 0.7708, "step": 637 }, { "epoch": 0.21693301598095885, "grad_norm": 2.211237168392557, "learning_rate": 9.113233904541524e-06, "loss": 0.8633, "step": 638 }, { "epoch": 0.21727303638218293, "grad_norm": 1.767226866716121, "learning_rate": 9.110099990927311e-06, "loss": 0.9302, "step": 639 }, { "epoch": 0.217613056783407, "grad_norm": 1.989789546199134, "learning_rate": 9.106961090156026e-06, "loss": 0.8603, "step": 640 }, { "epoch": 0.21795307718463108, "grad_norm": 4.520549862271871, "learning_rate": 9.103817206036383e-06, "loss": 0.8902, "step": 641 }, { "epoch": 0.21829309758585516, "grad_norm": 1.938477575541222, "learning_rate": 9.100668342383138e-06, "loss": 0.8366, "step": 642 }, { "epoch": 0.2186331179870792, "grad_norm": 2.327565546501374, "learning_rate": 9.097514503017098e-06, "loss": 0.7551, "step": 643 }, { "epoch": 0.2189731383883033, "grad_norm": 2.1109166908567936, "learning_rate": 9.0943556917651e-06, "loss": 0.8257, "step": 644 }, { "epoch": 0.21931315878952737, "grad_norm": 1.8110479549556548, "learning_rate": 9.091191912460014e-06, "loss": 0.9507, "step": 645 }, { "epoch": 0.21965317919075145, "grad_norm": 2.251697261146959, "learning_rate": 9.088023168940743e-06, "loss": 0.6991, "step": 646 }, { "epoch": 0.21999319959197552, "grad_norm": 1.7773244864207038, "learning_rate": 9.08484946505221e-06, "loss": 0.7976, "step": 647 }, { "epoch": 0.2203332199931996, "grad_norm": 2.3077357797688176, "learning_rate": 9.08167080464536e-06, "loss": 0.8666, "step": 648 }, { "epoch": 0.22067324039442365, "grad_norm": 2.28086178140718, "learning_rate": 9.078487191577146e-06, "loss": 0.94, "step": 649 }, { "epoch": 0.22101326079564773, "grad_norm": 1.5485886049410107, "learning_rate": 9.075298629710536e-06, "loss": 0.8475, "step": 650 }, { "epoch": 0.2213532811968718, "grad_norm": 1.9290472754703258, "learning_rate": 9.072105122914502e-06, "loss": 0.8813, "step": 651 }, { "epoch": 0.2216933015980959, "grad_norm": 8.21841805972392, "learning_rate": 9.068906675064016e-06, "loss": 0.7745, "step": 652 }, { "epoch": 0.22203332199931997, "grad_norm": 2.9628280561922713, "learning_rate": 9.065703290040043e-06, "loss": 0.6788, "step": 653 }, { "epoch": 0.22237334240054404, "grad_norm": 3.0384149610010076, "learning_rate": 9.062494971729542e-06, "loss": 0.8977, "step": 654 }, { "epoch": 0.2227133628017681, "grad_norm": 3.001689658067599, "learning_rate": 9.059281724025455e-06, "loss": 0.7856, "step": 655 }, { "epoch": 0.22305338320299217, "grad_norm": 2.4216473597244557, "learning_rate": 9.056063550826708e-06, "loss": 0.8248, "step": 656 }, { "epoch": 0.22339340360421625, "grad_norm": 2.4916553224709115, "learning_rate": 9.052840456038204e-06, "loss": 0.8426, "step": 657 }, { "epoch": 0.22373342400544033, "grad_norm": 2.030963651676548, "learning_rate": 9.049612443570814e-06, "loss": 0.8562, "step": 658 }, { "epoch": 0.2240734444066644, "grad_norm": 2.8064288123845516, "learning_rate": 9.046379517341378e-06, "loss": 0.8298, "step": 659 }, { "epoch": 0.22441346480788849, "grad_norm": 2.279614877119188, "learning_rate": 9.0431416812727e-06, "loss": 0.8515, "step": 660 }, { "epoch": 0.22475348520911254, "grad_norm": 2.1516052872817553, "learning_rate": 9.039898939293539e-06, "loss": 0.7463, "step": 661 }, { "epoch": 0.2250935056103366, "grad_norm": 2.343567497267246, "learning_rate": 9.036651295338608e-06, "loss": 0.8554, "step": 662 }, { "epoch": 0.2254335260115607, "grad_norm": 2.6761262055968795, "learning_rate": 9.033398753348569e-06, "loss": 0.8184, "step": 663 }, { "epoch": 0.22577354641278477, "grad_norm": 2.0464322670596493, "learning_rate": 9.030141317270026e-06, "loss": 0.7478, "step": 664 }, { "epoch": 0.22611356681400885, "grad_norm": 2.2236911397789414, "learning_rate": 9.026878991055521e-06, "loss": 0.9156, "step": 665 }, { "epoch": 0.22645358721523293, "grad_norm": 2.1101918658429124, "learning_rate": 9.02361177866353e-06, "loss": 0.782, "step": 666 }, { "epoch": 0.22679360761645698, "grad_norm": 3.224573101223046, "learning_rate": 9.020339684058459e-06, "loss": 0.8831, "step": 667 }, { "epoch": 0.22713362801768106, "grad_norm": 2.155983593275046, "learning_rate": 9.017062711210638e-06, "loss": 0.8461, "step": 668 }, { "epoch": 0.22747364841890513, "grad_norm": 3.742735571520871, "learning_rate": 9.013780864096313e-06, "loss": 0.9233, "step": 669 }, { "epoch": 0.2278136688201292, "grad_norm": 2.11951387339584, "learning_rate": 9.010494146697648e-06, "loss": 0.8415, "step": 670 }, { "epoch": 0.2281536892213533, "grad_norm": 4.046208706917889, "learning_rate": 9.007202563002715e-06, "loss": 0.8367, "step": 671 }, { "epoch": 0.22849370962257737, "grad_norm": 2.5726534936217353, "learning_rate": 9.003906117005489e-06, "loss": 0.7983, "step": 672 }, { "epoch": 0.22883373002380142, "grad_norm": 2.3143058688116334, "learning_rate": 9.000604812705854e-06, "loss": 0.7471, "step": 673 }, { "epoch": 0.2291737504250255, "grad_norm": 7.689486779358199, "learning_rate": 8.997298654109573e-06, "loss": 0.7961, "step": 674 }, { "epoch": 0.22951377082624957, "grad_norm": 1.9154873476566034, "learning_rate": 8.993987645228313e-06, "loss": 0.8463, "step": 675 }, { "epoch": 0.22985379122747365, "grad_norm": 1.9308286258150182, "learning_rate": 8.99067179007962e-06, "loss": 0.7293, "step": 676 }, { "epoch": 0.23019381162869773, "grad_norm": 3.112154181656327, "learning_rate": 8.987351092686923e-06, "loss": 0.8588, "step": 677 }, { "epoch": 0.2305338320299218, "grad_norm": 1.7834759216247464, "learning_rate": 8.984025557079523e-06, "loss": 0.8339, "step": 678 }, { "epoch": 0.23087385243114586, "grad_norm": 2.316905251892595, "learning_rate": 8.980695187292598e-06, "loss": 0.7621, "step": 679 }, { "epoch": 0.23121387283236994, "grad_norm": 3.101167924048224, "learning_rate": 8.977359987367182e-06, "loss": 0.8604, "step": 680 }, { "epoch": 0.23155389323359402, "grad_norm": 2.2874629757251634, "learning_rate": 8.97401996135018e-06, "loss": 0.8787, "step": 681 }, { "epoch": 0.2318939136348181, "grad_norm": 2.4403671006704677, "learning_rate": 8.970675113294348e-06, "loss": 0.9373, "step": 682 }, { "epoch": 0.23223393403604217, "grad_norm": 2.4115030369064274, "learning_rate": 8.967325447258292e-06, "loss": 0.7396, "step": 683 }, { "epoch": 0.23257395443726622, "grad_norm": 2.1720889668199708, "learning_rate": 8.963970967306466e-06, "loss": 0.843, "step": 684 }, { "epoch": 0.2329139748384903, "grad_norm": 1.9401953765991744, "learning_rate": 8.960611677509166e-06, "loss": 0.8625, "step": 685 }, { "epoch": 0.23325399523971438, "grad_norm": 2.068432286888123, "learning_rate": 8.95724758194252e-06, "loss": 0.8402, "step": 686 }, { "epoch": 0.23359401564093846, "grad_norm": 1.591243068030512, "learning_rate": 8.953878684688492e-06, "loss": 0.7842, "step": 687 }, { "epoch": 0.23393403604216254, "grad_norm": 1.7715469138687294, "learning_rate": 8.950504989834873e-06, "loss": 0.8833, "step": 688 }, { "epoch": 0.2342740564433866, "grad_norm": 2.0026773241901537, "learning_rate": 8.94712650147527e-06, "loss": 0.8189, "step": 689 }, { "epoch": 0.23461407684461066, "grad_norm": 2.8052765922906917, "learning_rate": 8.943743223709109e-06, "loss": 0.7157, "step": 690 }, { "epoch": 0.23495409724583474, "grad_norm": 1.7526459634636724, "learning_rate": 8.94035516064163e-06, "loss": 0.7976, "step": 691 }, { "epoch": 0.23529411764705882, "grad_norm": 1.5061919498395846, "learning_rate": 8.936962316383876e-06, "loss": 0.7932, "step": 692 }, { "epoch": 0.2356341380482829, "grad_norm": 2.5099728137526736, "learning_rate": 8.933564695052692e-06, "loss": 0.7652, "step": 693 }, { "epoch": 0.23597415844950698, "grad_norm": 1.8950918439469646, "learning_rate": 8.930162300770721e-06, "loss": 0.7014, "step": 694 }, { "epoch": 0.23631417885073105, "grad_norm": 2.1074334438205358, "learning_rate": 8.926755137666396e-06, "loss": 0.8158, "step": 695 }, { "epoch": 0.2366541992519551, "grad_norm": 2.093263958473567, "learning_rate": 8.923343209873937e-06, "loss": 0.8099, "step": 696 }, { "epoch": 0.23699421965317918, "grad_norm": 1.8542225188561288, "learning_rate": 8.919926521533346e-06, "loss": 0.8189, "step": 697 }, { "epoch": 0.23733424005440326, "grad_norm": 2.087963516864021, "learning_rate": 8.9165050767904e-06, "loss": 0.8313, "step": 698 }, { "epoch": 0.23767426045562734, "grad_norm": 2.047018307961187, "learning_rate": 8.913078879796648e-06, "loss": 0.8662, "step": 699 }, { "epoch": 0.23801428085685142, "grad_norm": 2.7094333692876837, "learning_rate": 8.90964793470941e-06, "loss": 0.8143, "step": 700 }, { "epoch": 0.2383543012580755, "grad_norm": 2.7906165665573837, "learning_rate": 8.906212245691755e-06, "loss": 0.8905, "step": 701 }, { "epoch": 0.23869432165929955, "grad_norm": 2.797092594783242, "learning_rate": 8.902771816912521e-06, "loss": 0.879, "step": 702 }, { "epoch": 0.23903434206052362, "grad_norm": 2.5180099645451066, "learning_rate": 8.899326652546292e-06, "loss": 0.7547, "step": 703 }, { "epoch": 0.2393743624617477, "grad_norm": 1.7769222125627648, "learning_rate": 8.895876756773398e-06, "loss": 0.9269, "step": 704 }, { "epoch": 0.23971438286297178, "grad_norm": 1.8153973558094076, "learning_rate": 8.89242213377991e-06, "loss": 0.8157, "step": 705 }, { "epoch": 0.24005440326419586, "grad_norm": 1.7008522196194176, "learning_rate": 8.888962787757636e-06, "loss": 0.8323, "step": 706 }, { "epoch": 0.24039442366541994, "grad_norm": 3.122832621811037, "learning_rate": 8.885498722904114e-06, "loss": 0.8148, "step": 707 }, { "epoch": 0.240734444066644, "grad_norm": 2.6291893036900045, "learning_rate": 8.882029943422605e-06, "loss": 0.8432, "step": 708 }, { "epoch": 0.24107446446786807, "grad_norm": 2.273799903298918, "learning_rate": 8.8785564535221e-06, "loss": 0.8374, "step": 709 }, { "epoch": 0.24141448486909214, "grad_norm": 1.8887964125341279, "learning_rate": 8.875078257417294e-06, "loss": 0.8395, "step": 710 }, { "epoch": 0.24175450527031622, "grad_norm": 1.9167482586163092, "learning_rate": 8.871595359328603e-06, "loss": 0.8333, "step": 711 }, { "epoch": 0.2420945256715403, "grad_norm": 2.0043550475154777, "learning_rate": 8.868107763482137e-06, "loss": 0.8465, "step": 712 }, { "epoch": 0.24243454607276438, "grad_norm": 1.8537150982530552, "learning_rate": 8.864615474109715e-06, "loss": 0.7761, "step": 713 }, { "epoch": 0.24277456647398843, "grad_norm": 2.2270421393649777, "learning_rate": 8.861118495448847e-06, "loss": 0.7535, "step": 714 }, { "epoch": 0.2431145868752125, "grad_norm": 1.9397964001880972, "learning_rate": 8.857616831742739e-06, "loss": 0.751, "step": 715 }, { "epoch": 0.24345460727643659, "grad_norm": 2.797785675978316, "learning_rate": 8.854110487240275e-06, "loss": 0.7928, "step": 716 }, { "epoch": 0.24379462767766066, "grad_norm": 3.0971738774972604, "learning_rate": 8.850599466196018e-06, "loss": 0.7754, "step": 717 }, { "epoch": 0.24413464807888474, "grad_norm": 3.154393286576707, "learning_rate": 8.847083772870209e-06, "loss": 0.7009, "step": 718 }, { "epoch": 0.24447466848010882, "grad_norm": 2.605914049831787, "learning_rate": 8.84356341152876e-06, "loss": 0.7458, "step": 719 }, { "epoch": 0.24481468888133287, "grad_norm": 2.3256897705301522, "learning_rate": 8.840038386443243e-06, "loss": 0.7355, "step": 720 }, { "epoch": 0.24515470928255695, "grad_norm": 2.092998344309539, "learning_rate": 8.836508701890892e-06, "loss": 0.859, "step": 721 }, { "epoch": 0.24549472968378103, "grad_norm": 1.8695733446918772, "learning_rate": 8.832974362154592e-06, "loss": 0.8425, "step": 722 }, { "epoch": 0.2458347500850051, "grad_norm": 1.9623481565876253, "learning_rate": 8.829435371522879e-06, "loss": 0.7531, "step": 723 }, { "epoch": 0.24617477048622918, "grad_norm": 1.7281382412711035, "learning_rate": 8.82589173428993e-06, "loss": 0.8975, "step": 724 }, { "epoch": 0.24651479088745323, "grad_norm": 1.6071614173391668, "learning_rate": 8.822343454755562e-06, "loss": 0.8718, "step": 725 }, { "epoch": 0.2468548112886773, "grad_norm": 1.9394291672670085, "learning_rate": 8.818790537225224e-06, "loss": 0.7458, "step": 726 }, { "epoch": 0.2471948316899014, "grad_norm": 2.0539393000556343, "learning_rate": 8.815232986009994e-06, "loss": 0.8104, "step": 727 }, { "epoch": 0.24753485209112547, "grad_norm": 2.047963973266287, "learning_rate": 8.81167080542657e-06, "loss": 0.8877, "step": 728 }, { "epoch": 0.24787487249234955, "grad_norm": 3.0026729958135134, "learning_rate": 8.80810399979727e-06, "loss": 0.8977, "step": 729 }, { "epoch": 0.24821489289357362, "grad_norm": 1.7334258967558374, "learning_rate": 8.804532573450024e-06, "loss": 0.7311, "step": 730 }, { "epoch": 0.24855491329479767, "grad_norm": 2.2237717981886056, "learning_rate": 8.800956530718365e-06, "loss": 0.8934, "step": 731 }, { "epoch": 0.24889493369602175, "grad_norm": 1.4722862638834173, "learning_rate": 8.797375875941431e-06, "loss": 0.7578, "step": 732 }, { "epoch": 0.24923495409724583, "grad_norm": 2.076977010854489, "learning_rate": 8.793790613463956e-06, "loss": 0.8266, "step": 733 }, { "epoch": 0.2495749744984699, "grad_norm": 1.9019007451038732, "learning_rate": 8.790200747636261e-06, "loss": 0.817, "step": 734 }, { "epoch": 0.249914994899694, "grad_norm": 3.2523001391650603, "learning_rate": 8.78660628281426e-06, "loss": 0.823, "step": 735 }, { "epoch": 0.25025501530091804, "grad_norm": 2.2606861111968706, "learning_rate": 8.78300722335944e-06, "loss": 0.7769, "step": 736 }, { "epoch": 0.25059503570214214, "grad_norm": 2.4433629042102467, "learning_rate": 8.77940357363887e-06, "loss": 0.7904, "step": 737 }, { "epoch": 0.2509350561033662, "grad_norm": 2.108341620407174, "learning_rate": 8.77579533802518e-06, "loss": 0.8316, "step": 738 }, { "epoch": 0.2512750765045903, "grad_norm": 4.98477571143613, "learning_rate": 8.772182520896573e-06, "loss": 0.8266, "step": 739 }, { "epoch": 0.25161509690581435, "grad_norm": 1.9399774055291894, "learning_rate": 8.768565126636806e-06, "loss": 0.8225, "step": 740 }, { "epoch": 0.2519551173070384, "grad_norm": 2.594549449894867, "learning_rate": 8.764943159635193e-06, "loss": 0.7238, "step": 741 }, { "epoch": 0.2522951377082625, "grad_norm": 2.6610598581449247, "learning_rate": 8.761316624286593e-06, "loss": 0.7797, "step": 742 }, { "epoch": 0.25263515810948656, "grad_norm": 1.7706557977888584, "learning_rate": 8.757685524991414e-06, "loss": 0.8875, "step": 743 }, { "epoch": 0.25297517851071066, "grad_norm": 2.367385917663463, "learning_rate": 8.754049866155594e-06, "loss": 0.8251, "step": 744 }, { "epoch": 0.2533151989119347, "grad_norm": 2.0014441027718557, "learning_rate": 8.750409652190609e-06, "loss": 0.8519, "step": 745 }, { "epoch": 0.25365521931315876, "grad_norm": 1.8473603569334116, "learning_rate": 8.74676488751346e-06, "loss": 0.8601, "step": 746 }, { "epoch": 0.25399523971438287, "grad_norm": 2.477807792703976, "learning_rate": 8.743115576546672e-06, "loss": 0.9798, "step": 747 }, { "epoch": 0.2543352601156069, "grad_norm": 2.1596648623116694, "learning_rate": 8.739461723718286e-06, "loss": 0.9241, "step": 748 }, { "epoch": 0.254675280516831, "grad_norm": 2.271967660622451, "learning_rate": 8.73580333346185e-06, "loss": 0.9333, "step": 749 }, { "epoch": 0.2550153009180551, "grad_norm": 1.89409525964846, "learning_rate": 8.732140410216422e-06, "loss": 0.9235, "step": 750 }, { "epoch": 0.2553553213192792, "grad_norm": 1.9160319862426827, "learning_rate": 8.72847295842656e-06, "loss": 0.8362, "step": 751 }, { "epoch": 0.25569534172050323, "grad_norm": 2.75041804529313, "learning_rate": 8.724800982542313e-06, "loss": 0.8281, "step": 752 }, { "epoch": 0.2560353621217273, "grad_norm": 1.9660343049850402, "learning_rate": 8.721124487019226e-06, "loss": 0.8134, "step": 753 }, { "epoch": 0.2563753825229514, "grad_norm": 1.7476864872494857, "learning_rate": 8.717443476318322e-06, "loss": 0.7963, "step": 754 }, { "epoch": 0.25671540292417544, "grad_norm": 1.7181600952027278, "learning_rate": 8.713757954906105e-06, "loss": 0.6619, "step": 755 }, { "epoch": 0.25705542332539955, "grad_norm": 4.689603340381868, "learning_rate": 8.710067927254555e-06, "loss": 0.8325, "step": 756 }, { "epoch": 0.2573954437266236, "grad_norm": 2.0670743417962014, "learning_rate": 8.706373397841114e-06, "loss": 0.7841, "step": 757 }, { "epoch": 0.25773546412784765, "grad_norm": 1.9345516631091482, "learning_rate": 8.702674371148692e-06, "loss": 0.7412, "step": 758 }, { "epoch": 0.25807548452907175, "grad_norm": 2.3058554102539865, "learning_rate": 8.698970851665652e-06, "loss": 0.8672, "step": 759 }, { "epoch": 0.2584155049302958, "grad_norm": 1.906875691115053, "learning_rate": 8.695262843885812e-06, "loss": 0.7907, "step": 760 }, { "epoch": 0.2587555253315199, "grad_norm": 1.8081498930839859, "learning_rate": 8.691550352308431e-06, "loss": 0.7257, "step": 761 }, { "epoch": 0.25909554573274396, "grad_norm": 2.0456832516321377, "learning_rate": 8.687833381438215e-06, "loss": 0.8767, "step": 762 }, { "epoch": 0.25943556613396807, "grad_norm": 4.818955286864829, "learning_rate": 8.684111935785299e-06, "loss": 0.809, "step": 763 }, { "epoch": 0.2597755865351921, "grad_norm": 1.6359696437957223, "learning_rate": 8.680386019865253e-06, "loss": 0.8736, "step": 764 }, { "epoch": 0.26011560693641617, "grad_norm": 1.9275763227542202, "learning_rate": 8.676655638199068e-06, "loss": 0.7778, "step": 765 }, { "epoch": 0.26045562733764027, "grad_norm": 1.5111168632740775, "learning_rate": 8.67292079531315e-06, "loss": 0.7518, "step": 766 }, { "epoch": 0.2607956477388643, "grad_norm": 1.9868081030493614, "learning_rate": 8.669181495739332e-06, "loss": 0.876, "step": 767 }, { "epoch": 0.26113566814008843, "grad_norm": 1.9632367709448835, "learning_rate": 8.665437744014838e-06, "loss": 0.7469, "step": 768 }, { "epoch": 0.2614756885413125, "grad_norm": 2.658790741994479, "learning_rate": 8.661689544682301e-06, "loss": 0.8102, "step": 769 }, { "epoch": 0.26181570894253653, "grad_norm": 1.9709245415214305, "learning_rate": 8.657936902289756e-06, "loss": 0.8966, "step": 770 }, { "epoch": 0.26215572934376064, "grad_norm": 3.0701001889258515, "learning_rate": 8.65417982139062e-06, "loss": 0.9841, "step": 771 }, { "epoch": 0.2624957497449847, "grad_norm": 4.54013202807214, "learning_rate": 8.650418306543704e-06, "loss": 0.8277, "step": 772 }, { "epoch": 0.2628357701462088, "grad_norm": 1.6031790587096684, "learning_rate": 8.646652362313193e-06, "loss": 0.8168, "step": 773 }, { "epoch": 0.26317579054743284, "grad_norm": 2.2725192507554857, "learning_rate": 8.642881993268647e-06, "loss": 0.8552, "step": 774 }, { "epoch": 0.2635158109486569, "grad_norm": 1.9449496721499624, "learning_rate": 8.639107203985e-06, "loss": 0.8014, "step": 775 }, { "epoch": 0.263855831349881, "grad_norm": 4.227240205226276, "learning_rate": 8.635327999042543e-06, "loss": 0.9003, "step": 776 }, { "epoch": 0.26419585175110505, "grad_norm": 1.7770839323226375, "learning_rate": 8.63154438302693e-06, "loss": 0.8669, "step": 777 }, { "epoch": 0.26453587215232915, "grad_norm": 4.668426873038303, "learning_rate": 8.627756360529166e-06, "loss": 0.861, "step": 778 }, { "epoch": 0.2648758925535532, "grad_norm": 2.0048269343626663, "learning_rate": 8.6239639361456e-06, "loss": 0.7886, "step": 779 }, { "epoch": 0.2652159129547773, "grad_norm": 2.793168861569981, "learning_rate": 8.620167114477926e-06, "loss": 0.8552, "step": 780 }, { "epoch": 0.26555593335600136, "grad_norm": 1.832488110710129, "learning_rate": 8.616365900133175e-06, "loss": 0.8196, "step": 781 }, { "epoch": 0.2658959537572254, "grad_norm": 4.023379445825273, "learning_rate": 8.612560297723697e-06, "loss": 0.7989, "step": 782 }, { "epoch": 0.2662359741584495, "grad_norm": 2.332651801821611, "learning_rate": 8.608750311867182e-06, "loss": 0.7508, "step": 783 }, { "epoch": 0.26657599455967357, "grad_norm": 6.696709614360918, "learning_rate": 8.60493594718663e-06, "loss": 0.8147, "step": 784 }, { "epoch": 0.2669160149608977, "grad_norm": 1.8834108862545373, "learning_rate": 8.601117208310351e-06, "loss": 0.9059, "step": 785 }, { "epoch": 0.2672560353621217, "grad_norm": 2.077771875039454, "learning_rate": 8.597294099871974e-06, "loss": 0.7673, "step": 786 }, { "epoch": 0.2675960557633458, "grad_norm": 6.9046174801442675, "learning_rate": 8.59346662651042e-06, "loss": 0.8263, "step": 787 }, { "epoch": 0.2679360761645699, "grad_norm": 2.77437081702886, "learning_rate": 8.589634792869908e-06, "loss": 0.8334, "step": 788 }, { "epoch": 0.26827609656579393, "grad_norm": 2.308063436284089, "learning_rate": 8.58579860359995e-06, "loss": 0.8516, "step": 789 }, { "epoch": 0.26861611696701804, "grad_norm": 1.5049106697591117, "learning_rate": 8.581958063355344e-06, "loss": 0.7896, "step": 790 }, { "epoch": 0.2689561373682421, "grad_norm": 2.273632666328108, "learning_rate": 8.578113176796165e-06, "loss": 0.9209, "step": 791 }, { "epoch": 0.2692961577694662, "grad_norm": 2.223581774886736, "learning_rate": 8.574263948587762e-06, "loss": 0.7586, "step": 792 }, { "epoch": 0.26963617817069024, "grad_norm": 2.234320213681529, "learning_rate": 8.570410383400754e-06, "loss": 0.9106, "step": 793 }, { "epoch": 0.2699761985719143, "grad_norm": 1.8073070815618781, "learning_rate": 8.56655248591102e-06, "loss": 0.8563, "step": 794 }, { "epoch": 0.2703162189731384, "grad_norm": 2.216862016544106, "learning_rate": 8.562690260799696e-06, "loss": 0.8404, "step": 795 }, { "epoch": 0.27065623937436245, "grad_norm": 3.3719814382949944, "learning_rate": 8.558823712753171e-06, "loss": 0.8676, "step": 796 }, { "epoch": 0.27099625977558656, "grad_norm": 2.196633013307635, "learning_rate": 8.554952846463081e-06, "loss": 0.8648, "step": 797 }, { "epoch": 0.2713362801768106, "grad_norm": 2.5165242077281595, "learning_rate": 8.551077666626292e-06, "loss": 0.7004, "step": 798 }, { "epoch": 0.27167630057803466, "grad_norm": 1.7249240592256398, "learning_rate": 8.54719817794492e-06, "loss": 0.7373, "step": 799 }, { "epoch": 0.27201632097925876, "grad_norm": 1.677617648661045, "learning_rate": 8.543314385126296e-06, "loss": 0.8333, "step": 800 }, { "epoch": 0.2723563413804828, "grad_norm": 2.7130063773245223, "learning_rate": 8.539426292882976e-06, "loss": 0.7646, "step": 801 }, { "epoch": 0.2726963617817069, "grad_norm": 3.5342476236653084, "learning_rate": 8.535533905932739e-06, "loss": 0.747, "step": 802 }, { "epoch": 0.27303638218293097, "grad_norm": 2.6210750163057357, "learning_rate": 8.531637228998569e-06, "loss": 0.8778, "step": 803 }, { "epoch": 0.2733764025841551, "grad_norm": 2.0347303242471853, "learning_rate": 8.527736266808658e-06, "loss": 0.769, "step": 804 }, { "epoch": 0.2737164229853791, "grad_norm": 2.4027128478891915, "learning_rate": 8.523831024096396e-06, "loss": 0.8585, "step": 805 }, { "epoch": 0.2740564433866032, "grad_norm": 1.8350089573984862, "learning_rate": 8.519921505600368e-06, "loss": 0.8113, "step": 806 }, { "epoch": 0.2743964637878273, "grad_norm": 2.2589489484204908, "learning_rate": 8.516007716064352e-06, "loss": 0.8187, "step": 807 }, { "epoch": 0.27473648418905133, "grad_norm": 1.858296448626169, "learning_rate": 8.5120896602373e-06, "loss": 0.9453, "step": 808 }, { "epoch": 0.27507650459027544, "grad_norm": 2.355100329431512, "learning_rate": 8.508167342873342e-06, "loss": 0.8078, "step": 809 }, { "epoch": 0.2754165249914995, "grad_norm": 1.9899667485080101, "learning_rate": 8.504240768731787e-06, "loss": 0.8554, "step": 810 }, { "epoch": 0.27575654539272354, "grad_norm": 1.8787714779536697, "learning_rate": 8.500309942577098e-06, "loss": 0.8568, "step": 811 }, { "epoch": 0.27609656579394765, "grad_norm": 2.068637813773282, "learning_rate": 8.496374869178908e-06, "loss": 0.848, "step": 812 }, { "epoch": 0.2764365861951717, "grad_norm": 1.7791526260663866, "learning_rate": 8.492435553311995e-06, "loss": 0.8251, "step": 813 }, { "epoch": 0.2767766065963958, "grad_norm": 1.8869896486229023, "learning_rate": 8.48849199975629e-06, "loss": 0.778, "step": 814 }, { "epoch": 0.27711662699761985, "grad_norm": 2.2096789072585414, "learning_rate": 8.484544213296864e-06, "loss": 0.8346, "step": 815 }, { "epoch": 0.2774566473988439, "grad_norm": 2.170554684057787, "learning_rate": 8.480592198723922e-06, "loss": 0.9079, "step": 816 }, { "epoch": 0.277796667800068, "grad_norm": 1.9605343168925984, "learning_rate": 8.476635960832805e-06, "loss": 0.9024, "step": 817 }, { "epoch": 0.27813668820129206, "grad_norm": 2.5300613494335638, "learning_rate": 8.472675504423972e-06, "loss": 0.7871, "step": 818 }, { "epoch": 0.27847670860251617, "grad_norm": 2.8042077194040287, "learning_rate": 8.468710834303007e-06, "loss": 0.7785, "step": 819 }, { "epoch": 0.2788167290037402, "grad_norm": 1.697967343522963, "learning_rate": 8.464741955280603e-06, "loss": 0.8535, "step": 820 }, { "epoch": 0.2791567494049643, "grad_norm": 2.2291137255399303, "learning_rate": 8.460768872172558e-06, "loss": 0.8406, "step": 821 }, { "epoch": 0.27949676980618837, "grad_norm": 1.919715258452679, "learning_rate": 8.456791589799777e-06, "loss": 0.8334, "step": 822 }, { "epoch": 0.2798367902074124, "grad_norm": 1.9257825337008065, "learning_rate": 8.45281011298826e-06, "loss": 0.7674, "step": 823 }, { "epoch": 0.28017681060863653, "grad_norm": 1.9491101623001321, "learning_rate": 8.448824446569087e-06, "loss": 0.8832, "step": 824 }, { "epoch": 0.2805168310098606, "grad_norm": 1.870418445256147, "learning_rate": 8.444834595378434e-06, "loss": 0.8243, "step": 825 }, { "epoch": 0.2808568514110847, "grad_norm": 15.898061851643817, "learning_rate": 8.440840564257547e-06, "loss": 0.9136, "step": 826 }, { "epoch": 0.28119687181230874, "grad_norm": 4.472135160620738, "learning_rate": 8.436842358052746e-06, "loss": 0.7969, "step": 827 }, { "epoch": 0.2815368922135328, "grad_norm": 1.761895889926857, "learning_rate": 8.432839981615419e-06, "loss": 0.7631, "step": 828 }, { "epoch": 0.2818769126147569, "grad_norm": 2.3826293642920735, "learning_rate": 8.428833439802012e-06, "loss": 0.8369, "step": 829 }, { "epoch": 0.28221693301598094, "grad_norm": 2.027771199922908, "learning_rate": 8.424822737474023e-06, "loss": 0.752, "step": 830 }, { "epoch": 0.28255695341720505, "grad_norm": 2.5040444225047596, "learning_rate": 8.420807879498002e-06, "loss": 0.9132, "step": 831 }, { "epoch": 0.2828969738184291, "grad_norm": 1.8159022349945535, "learning_rate": 8.416788870745544e-06, "loss": 0.8259, "step": 832 }, { "epoch": 0.2832369942196532, "grad_norm": 2.620947283954682, "learning_rate": 8.412765716093273e-06, "loss": 0.8616, "step": 833 }, { "epoch": 0.28357701462087725, "grad_norm": 1.985024515267911, "learning_rate": 8.408738420422847e-06, "loss": 0.8538, "step": 834 }, { "epoch": 0.2839170350221013, "grad_norm": 1.7903361247800387, "learning_rate": 8.40470698862095e-06, "loss": 0.8478, "step": 835 }, { "epoch": 0.2842570554233254, "grad_norm": 2.1338723210061974, "learning_rate": 8.400671425579283e-06, "loss": 0.7906, "step": 836 }, { "epoch": 0.28459707582454946, "grad_norm": 1.9295239837495932, "learning_rate": 8.396631736194563e-06, "loss": 0.8481, "step": 837 }, { "epoch": 0.28493709622577357, "grad_norm": 1.983034000002347, "learning_rate": 8.39258792536851e-06, "loss": 0.8847, "step": 838 }, { "epoch": 0.2852771166269976, "grad_norm": 2.2803142495667035, "learning_rate": 8.388539998007847e-06, "loss": 0.9007, "step": 839 }, { "epoch": 0.28561713702822167, "grad_norm": 2.7645969730004807, "learning_rate": 8.384487959024293e-06, "loss": 0.7356, "step": 840 }, { "epoch": 0.2859571574294458, "grad_norm": 3.149778399577589, "learning_rate": 8.380431813334548e-06, "loss": 0.7855, "step": 841 }, { "epoch": 0.2862971778306698, "grad_norm": 1.830554950516933, "learning_rate": 8.37637156586031e-06, "loss": 0.8831, "step": 842 }, { "epoch": 0.28663719823189393, "grad_norm": 1.9793055946594367, "learning_rate": 8.372307221528239e-06, "loss": 0.8116, "step": 843 }, { "epoch": 0.286977218633118, "grad_norm": 2.062954991657379, "learning_rate": 8.368238785269976e-06, "loss": 0.8563, "step": 844 }, { "epoch": 0.2873172390343421, "grad_norm": 2.6403272415419834, "learning_rate": 8.36416626202212e-06, "loss": 0.8033, "step": 845 }, { "epoch": 0.28765725943556614, "grad_norm": 2.1424920280150506, "learning_rate": 8.360089656726238e-06, "loss": 0.9417, "step": 846 }, { "epoch": 0.2879972798367902, "grad_norm": 1.8495657823428482, "learning_rate": 8.356008974328843e-06, "loss": 0.8778, "step": 847 }, { "epoch": 0.2883373002380143, "grad_norm": 1.964102211636596, "learning_rate": 8.351924219781393e-06, "loss": 0.8762, "step": 848 }, { "epoch": 0.28867732063923834, "grad_norm": 5.329745330260959, "learning_rate": 8.347835398040297e-06, "loss": 0.8703, "step": 849 }, { "epoch": 0.28901734104046245, "grad_norm": 6.872745204669564, "learning_rate": 8.34374251406689e-06, "loss": 0.9126, "step": 850 }, { "epoch": 0.2893573614416865, "grad_norm": 3.021940703140289, "learning_rate": 8.339645572827439e-06, "loss": 0.8435, "step": 851 }, { "epoch": 0.28969738184291055, "grad_norm": 1.7359605774084226, "learning_rate": 8.335544579293138e-06, "loss": 0.8956, "step": 852 }, { "epoch": 0.29003740224413466, "grad_norm": 2.0495366847155645, "learning_rate": 8.331439538440089e-06, "loss": 0.8737, "step": 853 }, { "epoch": 0.2903774226453587, "grad_norm": 1.7403332283198236, "learning_rate": 8.327330455249316e-06, "loss": 0.836, "step": 854 }, { "epoch": 0.2907174430465828, "grad_norm": 1.765839927053788, "learning_rate": 8.323217334706736e-06, "loss": 0.7708, "step": 855 }, { "epoch": 0.29105746344780686, "grad_norm": 2.561707026442392, "learning_rate": 8.319100181803177e-06, "loss": 0.8048, "step": 856 }, { "epoch": 0.29139748384903097, "grad_norm": 1.893355173621553, "learning_rate": 8.314979001534351e-06, "loss": 0.8355, "step": 857 }, { "epoch": 0.291737504250255, "grad_norm": 1.6626617126300058, "learning_rate": 8.310853798900861e-06, "loss": 0.8117, "step": 858 }, { "epoch": 0.29207752465147907, "grad_norm": 2.2181642572480404, "learning_rate": 8.306724578908187e-06, "loss": 0.8809, "step": 859 }, { "epoch": 0.2924175450527032, "grad_norm": 2.1608263105904237, "learning_rate": 8.302591346566691e-06, "loss": 0.9428, "step": 860 }, { "epoch": 0.2927575654539272, "grad_norm": 2.2361606195465, "learning_rate": 8.298454106891593e-06, "loss": 0.8456, "step": 861 }, { "epoch": 0.29309758585515133, "grad_norm": 2.161708305297874, "learning_rate": 8.294312864902985e-06, "loss": 0.7702, "step": 862 }, { "epoch": 0.2934376062563754, "grad_norm": 1.9911157415642, "learning_rate": 8.290167625625811e-06, "loss": 0.8566, "step": 863 }, { "epoch": 0.29377762665759943, "grad_norm": 2.204248530981356, "learning_rate": 8.286018394089864e-06, "loss": 0.785, "step": 864 }, { "epoch": 0.29411764705882354, "grad_norm": 2.54115940873232, "learning_rate": 8.281865175329783e-06, "loss": 0.8669, "step": 865 }, { "epoch": 0.2944576674600476, "grad_norm": 1.6985894379936504, "learning_rate": 8.277707974385047e-06, "loss": 0.8809, "step": 866 }, { "epoch": 0.2947976878612717, "grad_norm": 1.9914761180754428, "learning_rate": 8.273546796299962e-06, "loss": 0.868, "step": 867 }, { "epoch": 0.29513770826249575, "grad_norm": 1.9848345218936125, "learning_rate": 8.269381646123666e-06, "loss": 0.8266, "step": 868 }, { "epoch": 0.2954777286637198, "grad_norm": 2.0822000899070674, "learning_rate": 8.265212528910113e-06, "loss": 0.9115, "step": 869 }, { "epoch": 0.2958177490649439, "grad_norm": 1.9979737857871827, "learning_rate": 8.261039449718068e-06, "loss": 0.7968, "step": 870 }, { "epoch": 0.29615776946616795, "grad_norm": 2.0218833894280532, "learning_rate": 8.256862413611113e-06, "loss": 0.8031, "step": 871 }, { "epoch": 0.29649778986739206, "grad_norm": 2.006168397097048, "learning_rate": 8.252681425657617e-06, "loss": 0.8669, "step": 872 }, { "epoch": 0.2968378102686161, "grad_norm": 2.2355962957542377, "learning_rate": 8.248496490930753e-06, "loss": 0.8274, "step": 873 }, { "epoch": 0.2971778306698402, "grad_norm": 1.7904006258629988, "learning_rate": 8.244307614508487e-06, "loss": 0.7554, "step": 874 }, { "epoch": 0.29751785107106427, "grad_norm": 2.0320747257565444, "learning_rate": 8.240114801473558e-06, "loss": 0.7651, "step": 875 }, { "epoch": 0.2978578714722883, "grad_norm": 2.6665182314923412, "learning_rate": 8.23591805691349e-06, "loss": 0.8223, "step": 876 }, { "epoch": 0.2981978918735124, "grad_norm": 1.8328124167485744, "learning_rate": 8.23171738592057e-06, "loss": 0.9082, "step": 877 }, { "epoch": 0.29853791227473647, "grad_norm": 2.3699720185830757, "learning_rate": 8.227512793591855e-06, "loss": 0.9096, "step": 878 }, { "epoch": 0.2988779326759606, "grad_norm": 1.9090567074503153, "learning_rate": 8.223304285029159e-06, "loss": 0.7705, "step": 879 }, { "epoch": 0.29921795307718463, "grad_norm": 2.2190907511890368, "learning_rate": 8.219091865339045e-06, "loss": 0.7971, "step": 880 }, { "epoch": 0.2995579734784087, "grad_norm": 2.253264189984432, "learning_rate": 8.214875539632825e-06, "loss": 0.7269, "step": 881 }, { "epoch": 0.2998979938796328, "grad_norm": 2.0376522816245934, "learning_rate": 8.21065531302655e-06, "loss": 0.8329, "step": 882 }, { "epoch": 0.30023801428085684, "grad_norm": 3.6223570808965007, "learning_rate": 8.206431190641002e-06, "loss": 0.8321, "step": 883 }, { "epoch": 0.30057803468208094, "grad_norm": 3.8252210397062694, "learning_rate": 8.202203177601693e-06, "loss": 0.8164, "step": 884 }, { "epoch": 0.300918055083305, "grad_norm": 1.776370483766253, "learning_rate": 8.197971279038854e-06, "loss": 0.8426, "step": 885 }, { "epoch": 0.3012580754845291, "grad_norm": 2.385354175900532, "learning_rate": 8.193735500087432e-06, "loss": 0.7418, "step": 886 }, { "epoch": 0.30159809588575315, "grad_norm": 1.6779774511826855, "learning_rate": 8.189495845887083e-06, "loss": 0.7568, "step": 887 }, { "epoch": 0.3019381162869772, "grad_norm": 2.4863457173840544, "learning_rate": 8.185252321582162e-06, "loss": 0.8176, "step": 888 }, { "epoch": 0.3022781366882013, "grad_norm": 4.0386363547881485, "learning_rate": 8.18100493232172e-06, "loss": 0.9485, "step": 889 }, { "epoch": 0.30261815708942535, "grad_norm": 1.7173326803227138, "learning_rate": 8.176753683259506e-06, "loss": 0.7396, "step": 890 }, { "epoch": 0.30295817749064946, "grad_norm": 2.3498732585677202, "learning_rate": 8.172498579553939e-06, "loss": 0.7183, "step": 891 }, { "epoch": 0.3032981978918735, "grad_norm": 2.44411432379618, "learning_rate": 8.168239626368126e-06, "loss": 0.7807, "step": 892 }, { "epoch": 0.30363821829309756, "grad_norm": 2.478384366935357, "learning_rate": 8.16397682886984e-06, "loss": 0.8315, "step": 893 }, { "epoch": 0.30397823869432167, "grad_norm": 2.5774348066125894, "learning_rate": 8.15971019223152e-06, "loss": 0.8123, "step": 894 }, { "epoch": 0.3043182590955457, "grad_norm": 1.7111325488707947, "learning_rate": 8.155439721630265e-06, "loss": 0.8263, "step": 895 }, { "epoch": 0.3046582794967698, "grad_norm": 2.7448751652607553, "learning_rate": 8.151165422247822e-06, "loss": 0.8248, "step": 896 }, { "epoch": 0.3049982998979939, "grad_norm": 2.1485101781392877, "learning_rate": 8.146887299270585e-06, "loss": 0.8035, "step": 897 }, { "epoch": 0.305338320299218, "grad_norm": 1.776591747388704, "learning_rate": 8.142605357889592e-06, "loss": 0.8089, "step": 898 }, { "epoch": 0.30567834070044203, "grad_norm": 3.2632560104335173, "learning_rate": 8.13831960330051e-06, "loss": 0.8202, "step": 899 }, { "epoch": 0.3060183611016661, "grad_norm": 2.8739582827981347, "learning_rate": 8.13403004070363e-06, "loss": 0.9092, "step": 900 }, { "epoch": 0.3063583815028902, "grad_norm": 2.1943148602179994, "learning_rate": 8.129736675303873e-06, "loss": 0.8322, "step": 901 }, { "epoch": 0.30669840190411424, "grad_norm": 1.8531479477302115, "learning_rate": 8.125439512310765e-06, "loss": 0.7566, "step": 902 }, { "epoch": 0.30703842230533834, "grad_norm": 1.7228875957473064, "learning_rate": 8.121138556938444e-06, "loss": 0.8078, "step": 903 }, { "epoch": 0.3073784427065624, "grad_norm": 2.3898144961502745, "learning_rate": 8.116833814405648e-06, "loss": 0.8067, "step": 904 }, { "epoch": 0.30771846310778644, "grad_norm": 1.802933531637354, "learning_rate": 8.112525289935716e-06, "loss": 0.7799, "step": 905 }, { "epoch": 0.30805848350901055, "grad_norm": 2.5139323313707673, "learning_rate": 8.108212988756568e-06, "loss": 0.9037, "step": 906 }, { "epoch": 0.3083985039102346, "grad_norm": 1.9251179471419289, "learning_rate": 8.10389691610071e-06, "loss": 0.8635, "step": 907 }, { "epoch": 0.3087385243114587, "grad_norm": 2.193694058263112, "learning_rate": 8.099577077205225e-06, "loss": 0.8323, "step": 908 }, { "epoch": 0.30907854471268276, "grad_norm": 2.0048196549770885, "learning_rate": 8.095253477311765e-06, "loss": 0.7756, "step": 909 }, { "epoch": 0.3094185651139068, "grad_norm": 1.980230499045498, "learning_rate": 8.090926121666547e-06, "loss": 0.7977, "step": 910 }, { "epoch": 0.3097585855151309, "grad_norm": 2.4883937143671564, "learning_rate": 8.086595015520345e-06, "loss": 0.8233, "step": 911 }, { "epoch": 0.31009860591635496, "grad_norm": 2.1286197933597584, "learning_rate": 8.08226016412848e-06, "loss": 0.9729, "step": 912 }, { "epoch": 0.31043862631757907, "grad_norm": 2.086146264470014, "learning_rate": 8.07792157275082e-06, "loss": 0.8914, "step": 913 }, { "epoch": 0.3107786467188031, "grad_norm": 2.0772062973899423, "learning_rate": 8.073579246651775e-06, "loss": 0.945, "step": 914 }, { "epoch": 0.3111186671200272, "grad_norm": 2.0695504749457143, "learning_rate": 8.069233191100278e-06, "loss": 0.8634, "step": 915 }, { "epoch": 0.3114586875212513, "grad_norm": 2.0384939167389393, "learning_rate": 8.064883411369799e-06, "loss": 0.7785, "step": 916 }, { "epoch": 0.3117987079224753, "grad_norm": 2.1885792727969138, "learning_rate": 8.060529912738316e-06, "loss": 0.8655, "step": 917 }, { "epoch": 0.31213872832369943, "grad_norm": 2.1403065502782406, "learning_rate": 8.056172700488324e-06, "loss": 0.8965, "step": 918 }, { "epoch": 0.3124787487249235, "grad_norm": 2.1339209234822647, "learning_rate": 8.051811779906823e-06, "loss": 0.7545, "step": 919 }, { "epoch": 0.3128187691261476, "grad_norm": 1.8344955563510745, "learning_rate": 8.047447156285314e-06, "loss": 0.8804, "step": 920 }, { "epoch": 0.31315878952737164, "grad_norm": 2.423743963035901, "learning_rate": 8.043078834919792e-06, "loss": 0.8068, "step": 921 }, { "epoch": 0.3134988099285957, "grad_norm": 1.787239252052783, "learning_rate": 8.038706821110738e-06, "loss": 0.9271, "step": 922 }, { "epoch": 0.3138388303298198, "grad_norm": 2.0084809664473684, "learning_rate": 8.03433112016311e-06, "loss": 0.8244, "step": 923 }, { "epoch": 0.31417885073104385, "grad_norm": 2.0471692656266085, "learning_rate": 8.029951737386345e-06, "loss": 0.7478, "step": 924 }, { "epoch": 0.31451887113226795, "grad_norm": 2.143535486588015, "learning_rate": 8.025568678094346e-06, "loss": 0.7579, "step": 925 }, { "epoch": 0.314858891533492, "grad_norm": 1.835302258844517, "learning_rate": 8.021181947605474e-06, "loss": 0.771, "step": 926 }, { "epoch": 0.3151989119347161, "grad_norm": 2.109820781590098, "learning_rate": 8.016791551242548e-06, "loss": 0.8985, "step": 927 }, { "epoch": 0.31553893233594016, "grad_norm": 1.9465940317912624, "learning_rate": 8.012397494332832e-06, "loss": 0.9183, "step": 928 }, { "epoch": 0.3158789527371642, "grad_norm": 1.9499330380070024, "learning_rate": 8.00799978220804e-06, "loss": 0.8158, "step": 929 }, { "epoch": 0.3162189731383883, "grad_norm": 1.8510195207977735, "learning_rate": 8.003598420204307e-06, "loss": 0.8287, "step": 930 }, { "epoch": 0.31655899353961237, "grad_norm": 2.2106314828328895, "learning_rate": 7.99919341366221e-06, "loss": 0.8159, "step": 931 }, { "epoch": 0.31689901394083647, "grad_norm": 2.132400411746793, "learning_rate": 7.994784767926743e-06, "loss": 0.8686, "step": 932 }, { "epoch": 0.3172390343420605, "grad_norm": 1.9835217982818234, "learning_rate": 7.99037248834731e-06, "loss": 0.7661, "step": 933 }, { "epoch": 0.3175790547432846, "grad_norm": 1.8257541034651736, "learning_rate": 7.985956580277738e-06, "loss": 0.8968, "step": 934 }, { "epoch": 0.3179190751445087, "grad_norm": 1.7806392840797605, "learning_rate": 7.981537049076243e-06, "loss": 0.8334, "step": 935 }, { "epoch": 0.31825909554573273, "grad_norm": 1.946318339623462, "learning_rate": 7.977113900105444e-06, "loss": 0.8255, "step": 936 }, { "epoch": 0.31859911594695683, "grad_norm": 2.5853186802795856, "learning_rate": 7.972687138732352e-06, "loss": 0.8669, "step": 937 }, { "epoch": 0.3189391363481809, "grad_norm": 2.2143025012416913, "learning_rate": 7.968256770328353e-06, "loss": 0.7807, "step": 938 }, { "epoch": 0.319279156749405, "grad_norm": 2.025109539800048, "learning_rate": 7.96382280026922e-06, "loss": 0.7668, "step": 939 }, { "epoch": 0.31961917715062904, "grad_norm": 2.309014049320171, "learning_rate": 7.959385233935087e-06, "loss": 0.7586, "step": 940 }, { "epoch": 0.3199591975518531, "grad_norm": 2.297274599241439, "learning_rate": 7.954944076710457e-06, "loss": 0.8962, "step": 941 }, { "epoch": 0.3202992179530772, "grad_norm": 2.054541472734675, "learning_rate": 7.95049933398419e-06, "loss": 0.8203, "step": 942 }, { "epoch": 0.32063923835430125, "grad_norm": 2.6215309252037873, "learning_rate": 7.946051011149494e-06, "loss": 0.8248, "step": 943 }, { "epoch": 0.32097925875552535, "grad_norm": 2.711737030629169, "learning_rate": 7.941599113603923e-06, "loss": 0.8764, "step": 944 }, { "epoch": 0.3213192791567494, "grad_norm": 2.2912349790018633, "learning_rate": 7.937143646749367e-06, "loss": 0.7335, "step": 945 }, { "epoch": 0.32165929955797345, "grad_norm": 1.7908168705820537, "learning_rate": 7.93268461599205e-06, "loss": 0.8435, "step": 946 }, { "epoch": 0.32199931995919756, "grad_norm": 4.946848177327164, "learning_rate": 7.928222026742517e-06, "loss": 0.8039, "step": 947 }, { "epoch": 0.3223393403604216, "grad_norm": 2.0622056965822932, "learning_rate": 7.923755884415634e-06, "loss": 0.9067, "step": 948 }, { "epoch": 0.3226793607616457, "grad_norm": 2.6667796950375715, "learning_rate": 7.919286194430573e-06, "loss": 0.7022, "step": 949 }, { "epoch": 0.32301938116286977, "grad_norm": 1.9317481210027303, "learning_rate": 7.914812962210819e-06, "loss": 0.8264, "step": 950 }, { "epoch": 0.3233594015640938, "grad_norm": 1.8020949558699724, "learning_rate": 7.910336193184146e-06, "loss": 0.7472, "step": 951 }, { "epoch": 0.3236994219653179, "grad_norm": 1.9275396744198368, "learning_rate": 7.905855892782625e-06, "loss": 0.7309, "step": 952 }, { "epoch": 0.324039442366542, "grad_norm": 1.8182685008859338, "learning_rate": 7.901372066442615e-06, "loss": 0.7625, "step": 953 }, { "epoch": 0.3243794627677661, "grad_norm": 2.010794684369215, "learning_rate": 7.89688471960474e-06, "loss": 0.8687, "step": 954 }, { "epoch": 0.32471948316899013, "grad_norm": 2.0540586819091997, "learning_rate": 7.892393857713914e-06, "loss": 0.8335, "step": 955 }, { "epoch": 0.32505950357021424, "grad_norm": 2.325367093840662, "learning_rate": 7.887899486219304e-06, "loss": 0.783, "step": 956 }, { "epoch": 0.3253995239714383, "grad_norm": 2.1873077332560156, "learning_rate": 7.883401610574338e-06, "loss": 0.8885, "step": 957 }, { "epoch": 0.32573954437266234, "grad_norm": 2.177598891005239, "learning_rate": 7.878900236236693e-06, "loss": 0.763, "step": 958 }, { "epoch": 0.32607956477388644, "grad_norm": 1.9372352426800454, "learning_rate": 7.874395368668302e-06, "loss": 0.8097, "step": 959 }, { "epoch": 0.3264195851751105, "grad_norm": 1.8413146998215235, "learning_rate": 7.869887013335324e-06, "loss": 0.7083, "step": 960 }, { "epoch": 0.3267596055763346, "grad_norm": 1.704925469624369, "learning_rate": 7.865375175708158e-06, "loss": 0.6822, "step": 961 }, { "epoch": 0.32709962597755865, "grad_norm": 3.100437896445037, "learning_rate": 7.860859861261423e-06, "loss": 0.7932, "step": 962 }, { "epoch": 0.3274396463787827, "grad_norm": 1.6975108379354062, "learning_rate": 7.856341075473963e-06, "loss": 0.7636, "step": 963 }, { "epoch": 0.3277796667800068, "grad_norm": 1.7501297373086775, "learning_rate": 7.851818823828828e-06, "loss": 0.7754, "step": 964 }, { "epoch": 0.32811968718123086, "grad_norm": 2.545140448497676, "learning_rate": 7.847293111813276e-06, "loss": 0.9082, "step": 965 }, { "epoch": 0.32845970758245496, "grad_norm": 1.8008653754801884, "learning_rate": 7.842763944918766e-06, "loss": 0.83, "step": 966 }, { "epoch": 0.328799727983679, "grad_norm": 2.6708500385698484, "learning_rate": 7.838231328640945e-06, "loss": 0.8698, "step": 967 }, { "epoch": 0.3291397483849031, "grad_norm": 1.8002730480278057, "learning_rate": 7.83369526847965e-06, "loss": 0.8098, "step": 968 }, { "epoch": 0.32947976878612717, "grad_norm": 2.032934392213337, "learning_rate": 7.82915576993889e-06, "loss": 0.841, "step": 969 }, { "epoch": 0.3298197891873512, "grad_norm": 2.1268627260893473, "learning_rate": 7.824612838526853e-06, "loss": 0.8791, "step": 970 }, { "epoch": 0.3301598095885753, "grad_norm": 1.9943092833782559, "learning_rate": 7.82006647975589e-06, "loss": 0.8746, "step": 971 }, { "epoch": 0.3304998299897994, "grad_norm": 12.28140454443901, "learning_rate": 7.81551669914251e-06, "loss": 0.8952, "step": 972 }, { "epoch": 0.3308398503910235, "grad_norm": 2.0328755642218055, "learning_rate": 7.810963502207373e-06, "loss": 0.7673, "step": 973 }, { "epoch": 0.33117987079224753, "grad_norm": 1.7777944287029475, "learning_rate": 7.806406894475286e-06, "loss": 0.8826, "step": 974 }, { "epoch": 0.3315198911934716, "grad_norm": 1.89053310342559, "learning_rate": 7.801846881475199e-06, "loss": 0.8305, "step": 975 }, { "epoch": 0.3318599115946957, "grad_norm": 1.7916446125501533, "learning_rate": 7.797283468740184e-06, "loss": 0.7707, "step": 976 }, { "epoch": 0.33219993199591974, "grad_norm": 1.583383697703338, "learning_rate": 7.792716661807443e-06, "loss": 0.7796, "step": 977 }, { "epoch": 0.33253995239714385, "grad_norm": 2.4183910263443087, "learning_rate": 7.788146466218301e-06, "loss": 0.7304, "step": 978 }, { "epoch": 0.3328799727983679, "grad_norm": 2.3658727843683907, "learning_rate": 7.78357288751819e-06, "loss": 0.7426, "step": 979 }, { "epoch": 0.333219993199592, "grad_norm": 1.828116229828553, "learning_rate": 7.778995931256646e-06, "loss": 0.8078, "step": 980 }, { "epoch": 0.33356001360081605, "grad_norm": 3.597726067768484, "learning_rate": 7.774415602987304e-06, "loss": 0.6857, "step": 981 }, { "epoch": 0.3339000340020401, "grad_norm": 3.166303108924718, "learning_rate": 7.769831908267896e-06, "loss": 0.7904, "step": 982 }, { "epoch": 0.3342400544032642, "grad_norm": 2.8933855627551304, "learning_rate": 7.765244852660233e-06, "loss": 0.8998, "step": 983 }, { "epoch": 0.33458007480448826, "grad_norm": 1.9338110662323469, "learning_rate": 7.760654441730202e-06, "loss": 0.9007, "step": 984 }, { "epoch": 0.33492009520571236, "grad_norm": 1.6591607534919344, "learning_rate": 7.756060681047769e-06, "loss": 0.8238, "step": 985 }, { "epoch": 0.3352601156069364, "grad_norm": 3.243453226362474, "learning_rate": 7.751463576186957e-06, "loss": 0.7642, "step": 986 }, { "epoch": 0.33560013600816047, "grad_norm": 2.7428152535139607, "learning_rate": 7.746863132725856e-06, "loss": 0.7282, "step": 987 }, { "epoch": 0.33594015640938457, "grad_norm": 2.1236456873501144, "learning_rate": 7.742259356246594e-06, "loss": 0.7627, "step": 988 }, { "epoch": 0.3362801768106086, "grad_norm": 1.6743079956506057, "learning_rate": 7.737652252335356e-06, "loss": 0.8406, "step": 989 }, { "epoch": 0.33662019721183273, "grad_norm": 1.87268843836185, "learning_rate": 7.733041826582357e-06, "loss": 0.8455, "step": 990 }, { "epoch": 0.3369602176130568, "grad_norm": 2.030664659551619, "learning_rate": 7.728428084581844e-06, "loss": 0.7965, "step": 991 }, { "epoch": 0.3373002380142809, "grad_norm": 3.5752732157045215, "learning_rate": 7.72381103193209e-06, "loss": 0.7149, "step": 992 }, { "epoch": 0.33764025841550493, "grad_norm": 2.8550170098863323, "learning_rate": 7.719190674235383e-06, "loss": 0.8308, "step": 993 }, { "epoch": 0.337980278816729, "grad_norm": 2.590955515466377, "learning_rate": 7.714567017098023e-06, "loss": 0.902, "step": 994 }, { "epoch": 0.3383202992179531, "grad_norm": 2.0307936229873467, "learning_rate": 7.709940066130312e-06, "loss": 0.8208, "step": 995 }, { "epoch": 0.33866031961917714, "grad_norm": 1.4578787186534492, "learning_rate": 7.705309826946547e-06, "loss": 0.8051, "step": 996 }, { "epoch": 0.33900034002040125, "grad_norm": 2.07437464133569, "learning_rate": 7.70067630516502e-06, "loss": 0.7707, "step": 997 }, { "epoch": 0.3393403604216253, "grad_norm": 2.15530600462403, "learning_rate": 7.696039506408001e-06, "loss": 0.7745, "step": 998 }, { "epoch": 0.33968038082284935, "grad_norm": 1.9609888444625139, "learning_rate": 7.691399436301743e-06, "loss": 0.7726, "step": 999 }, { "epoch": 0.34002040122407345, "grad_norm": 1.7131939444884539, "learning_rate": 7.686756100476458e-06, "loss": 0.8546, "step": 1000 }, { "epoch": 0.3403604216252975, "grad_norm": 2.0565210562712397, "learning_rate": 7.68210950456633e-06, "loss": 0.7028, "step": 1001 }, { "epoch": 0.3407004420265216, "grad_norm": 2.1703917499812664, "learning_rate": 7.677459654209493e-06, "loss": 0.83, "step": 1002 }, { "epoch": 0.34104046242774566, "grad_norm": 2.047040344782625, "learning_rate": 7.672806555048034e-06, "loss": 0.949, "step": 1003 }, { "epoch": 0.3413804828289697, "grad_norm": 2.938822311667402, "learning_rate": 7.66815021272798e-06, "loss": 1.0131, "step": 1004 }, { "epoch": 0.3417205032301938, "grad_norm": 2.5006641954997257, "learning_rate": 7.663490632899293e-06, "loss": 0.7971, "step": 1005 }, { "epoch": 0.34206052363141787, "grad_norm": 2.1529578288278817, "learning_rate": 7.658827821215863e-06, "loss": 0.8715, "step": 1006 }, { "epoch": 0.342400544032642, "grad_norm": 3.1033304500720442, "learning_rate": 7.654161783335506e-06, "loss": 0.7939, "step": 1007 }, { "epoch": 0.342740564433866, "grad_norm": 2.484067747922095, "learning_rate": 7.649492524919944e-06, "loss": 0.8513, "step": 1008 }, { "epoch": 0.34308058483509013, "grad_norm": 2.1827092693175256, "learning_rate": 7.644820051634813e-06, "loss": 0.8447, "step": 1009 }, { "epoch": 0.3434206052363142, "grad_norm": 1.9307893267754808, "learning_rate": 7.64014436914965e-06, "loss": 0.7384, "step": 1010 }, { "epoch": 0.34376062563753823, "grad_norm": 1.9144570087459527, "learning_rate": 7.635465483137885e-06, "loss": 0.8265, "step": 1011 }, { "epoch": 0.34410064603876234, "grad_norm": 1.9292779787587635, "learning_rate": 7.63078339927683e-06, "loss": 0.7725, "step": 1012 }, { "epoch": 0.3444406664399864, "grad_norm": 2.532262819572805, "learning_rate": 7.626098123247691e-06, "loss": 0.8484, "step": 1013 }, { "epoch": 0.3447806868412105, "grad_norm": 1.516985451302082, "learning_rate": 7.621409660735531e-06, "loss": 0.7265, "step": 1014 }, { "epoch": 0.34512070724243454, "grad_norm": 2.4387700274364335, "learning_rate": 7.616718017429288e-06, "loss": 0.8759, "step": 1015 }, { "epoch": 0.3454607276436586, "grad_norm": 1.7038256750677205, "learning_rate": 7.612023199021759e-06, "loss": 0.8983, "step": 1016 }, { "epoch": 0.3458007480448827, "grad_norm": 1.915028296278682, "learning_rate": 7.607325211209593e-06, "loss": 0.8207, "step": 1017 }, { "epoch": 0.34614076844610675, "grad_norm": 5.327535008237301, "learning_rate": 7.6026240596932854e-06, "loss": 0.8199, "step": 1018 }, { "epoch": 0.34648078884733086, "grad_norm": 2.0485629266374596, "learning_rate": 7.597919750177168e-06, "loss": 0.7972, "step": 1019 }, { "epoch": 0.3468208092485549, "grad_norm": 2.5900475236404215, "learning_rate": 7.593212288369408e-06, "loss": 0.8245, "step": 1020 }, { "epoch": 0.347160829649779, "grad_norm": 1.71454309402805, "learning_rate": 7.588501679981997e-06, "loss": 0.7877, "step": 1021 }, { "epoch": 0.34750085005100306, "grad_norm": 1.7173724399881714, "learning_rate": 7.583787930730737e-06, "loss": 0.8098, "step": 1022 }, { "epoch": 0.3478408704522271, "grad_norm": 1.3352281724662016, "learning_rate": 7.579071046335256e-06, "loss": 0.8151, "step": 1023 }, { "epoch": 0.3481808908534512, "grad_norm": 2.2809433135453565, "learning_rate": 7.57435103251897e-06, "loss": 0.8194, "step": 1024 }, { "epoch": 0.34852091125467527, "grad_norm": 1.776619177140561, "learning_rate": 7.569627895009104e-06, "loss": 0.7774, "step": 1025 }, { "epoch": 0.3488609316558994, "grad_norm": 1.8490485181067193, "learning_rate": 7.564901639536671e-06, "loss": 0.7279, "step": 1026 }, { "epoch": 0.3492009520571234, "grad_norm": 1.9994188958218952, "learning_rate": 7.560172271836459e-06, "loss": 0.8714, "step": 1027 }, { "epoch": 0.3495409724583475, "grad_norm": 2.4785018885474903, "learning_rate": 7.555439797647044e-06, "loss": 0.7691, "step": 1028 }, { "epoch": 0.3498809928595716, "grad_norm": 2.3412120228462427, "learning_rate": 7.5507042227107655e-06, "loss": 0.7584, "step": 1029 }, { "epoch": 0.35022101326079563, "grad_norm": 2.165022439020042, "learning_rate": 7.545965552773724e-06, "loss": 0.8901, "step": 1030 }, { "epoch": 0.35056103366201974, "grad_norm": 2.006216307009843, "learning_rate": 7.54122379358578e-06, "loss": 0.9007, "step": 1031 }, { "epoch": 0.3509010540632438, "grad_norm": 4.543392686075566, "learning_rate": 7.536478950900537e-06, "loss": 0.8423, "step": 1032 }, { "epoch": 0.3512410744644679, "grad_norm": 1.976663114216136, "learning_rate": 7.531731030475345e-06, "loss": 0.8405, "step": 1033 }, { "epoch": 0.35158109486569195, "grad_norm": 2.117430140673597, "learning_rate": 7.526980038071288e-06, "loss": 0.8223, "step": 1034 }, { "epoch": 0.351921115266916, "grad_norm": 1.9728797359425245, "learning_rate": 7.52222597945317e-06, "loss": 0.9271, "step": 1035 }, { "epoch": 0.3522611356681401, "grad_norm": 3.175594333388401, "learning_rate": 7.517468860389528e-06, "loss": 0.6723, "step": 1036 }, { "epoch": 0.35260115606936415, "grad_norm": 1.9729371741404098, "learning_rate": 7.512708686652603e-06, "loss": 0.8227, "step": 1037 }, { "epoch": 0.35294117647058826, "grad_norm": 1.4699410732834897, "learning_rate": 7.507945464018347e-06, "loss": 0.85, "step": 1038 }, { "epoch": 0.3532811968718123, "grad_norm": 2.4025141879021743, "learning_rate": 7.5031791982664085e-06, "loss": 0.8379, "step": 1039 }, { "epoch": 0.35362121727303636, "grad_norm": 1.5826438634608264, "learning_rate": 7.49840989518013e-06, "loss": 0.8426, "step": 1040 }, { "epoch": 0.35396123767426046, "grad_norm": 1.8143996630470258, "learning_rate": 7.493637560546539e-06, "loss": 0.8218, "step": 1041 }, { "epoch": 0.3543012580754845, "grad_norm": 2.9819746909856875, "learning_rate": 7.488862200156345e-06, "loss": 0.8116, "step": 1042 }, { "epoch": 0.3546412784767086, "grad_norm": 1.6025956087863886, "learning_rate": 7.484083819803922e-06, "loss": 0.8996, "step": 1043 }, { "epoch": 0.35498129887793267, "grad_norm": 2.2569463838656394, "learning_rate": 7.479302425287314e-06, "loss": 0.793, "step": 1044 }, { "epoch": 0.3553213192791567, "grad_norm": 1.7640317823457161, "learning_rate": 7.4745180224082205e-06, "loss": 0.9197, "step": 1045 }, { "epoch": 0.35566133968038083, "grad_norm": 2.0978448016759796, "learning_rate": 7.469730616971992e-06, "loss": 0.8864, "step": 1046 }, { "epoch": 0.3560013600816049, "grad_norm": 1.815417919798777, "learning_rate": 7.464940214787622e-06, "loss": 0.8489, "step": 1047 }, { "epoch": 0.356341380482829, "grad_norm": 2.5179528675012306, "learning_rate": 7.4601468216677375e-06, "loss": 0.7985, "step": 1048 }, { "epoch": 0.35668140088405303, "grad_norm": 2.0737400366436978, "learning_rate": 7.455350443428598e-06, "loss": 0.8621, "step": 1049 }, { "epoch": 0.35702142128527714, "grad_norm": 1.9500143741002205, "learning_rate": 7.450551085890087e-06, "loss": 0.7252, "step": 1050 }, { "epoch": 0.3573614416865012, "grad_norm": 2.487299951341808, "learning_rate": 7.445748754875697e-06, "loss": 0.8219, "step": 1051 }, { "epoch": 0.35770146208772524, "grad_norm": 1.9293676887234388, "learning_rate": 7.440943456212533e-06, "loss": 0.8055, "step": 1052 }, { "epoch": 0.35804148248894935, "grad_norm": 1.630631399721771, "learning_rate": 7.4361351957313e-06, "loss": 0.8086, "step": 1053 }, { "epoch": 0.3583815028901734, "grad_norm": 1.993396157634338, "learning_rate": 7.431323979266296e-06, "loss": 0.8358, "step": 1054 }, { "epoch": 0.3587215232913975, "grad_norm": 2.1096399659613434, "learning_rate": 7.4265098126554065e-06, "loss": 0.6622, "step": 1055 }, { "epoch": 0.35906154369262155, "grad_norm": 2.0957274147043568, "learning_rate": 7.421692701740095e-06, "loss": 0.8535, "step": 1056 }, { "epoch": 0.3594015640938456, "grad_norm": 2.280331567819238, "learning_rate": 7.416872652365401e-06, "loss": 0.7613, "step": 1057 }, { "epoch": 0.3597415844950697, "grad_norm": 1.7088762504236967, "learning_rate": 7.412049670379927e-06, "loss": 0.8598, "step": 1058 }, { "epoch": 0.36008160489629376, "grad_norm": 1.8455695752782821, "learning_rate": 7.4072237616358356e-06, "loss": 0.9097, "step": 1059 }, { "epoch": 0.36042162529751787, "grad_norm": 2.4513346866686483, "learning_rate": 7.402394931988836e-06, "loss": 0.8521, "step": 1060 }, { "epoch": 0.3607616456987419, "grad_norm": 1.7993496451007047, "learning_rate": 7.397563187298189e-06, "loss": 0.8043, "step": 1061 }, { "epoch": 0.361101666099966, "grad_norm": 2.5033019796675777, "learning_rate": 7.392728533426687e-06, "loss": 0.6882, "step": 1062 }, { "epoch": 0.3614416865011901, "grad_norm": 1.6959114046452552, "learning_rate": 7.387890976240655e-06, "loss": 0.7466, "step": 1063 }, { "epoch": 0.3617817069024141, "grad_norm": 1.7439198675611804, "learning_rate": 7.383050521609938e-06, "loss": 0.8818, "step": 1064 }, { "epoch": 0.36212172730363823, "grad_norm": 2.053598063714424, "learning_rate": 7.378207175407899e-06, "loss": 0.7483, "step": 1065 }, { "epoch": 0.3624617477048623, "grad_norm": 2.781756547823472, "learning_rate": 7.3733609435114096e-06, "loss": 0.8445, "step": 1066 }, { "epoch": 0.3628017681060864, "grad_norm": 2.09779855425177, "learning_rate": 7.368511831800844e-06, "loss": 0.851, "step": 1067 }, { "epoch": 0.36314178850731044, "grad_norm": 3.020910053469312, "learning_rate": 7.363659846160066e-06, "loss": 0.8892, "step": 1068 }, { "epoch": 0.3634818089085345, "grad_norm": 2.4787490690903944, "learning_rate": 7.358804992476432e-06, "loss": 0.8432, "step": 1069 }, { "epoch": 0.3638218293097586, "grad_norm": 2.8989448273348573, "learning_rate": 7.353947276640776e-06, "loss": 0.6853, "step": 1070 }, { "epoch": 0.36416184971098264, "grad_norm": 1.726759255900026, "learning_rate": 7.349086704547408e-06, "loss": 0.8831, "step": 1071 }, { "epoch": 0.36450187011220675, "grad_norm": 1.8399420040429915, "learning_rate": 7.344223282094095e-06, "loss": 0.7882, "step": 1072 }, { "epoch": 0.3648418905134308, "grad_norm": 1.806073228885492, "learning_rate": 7.3393570151820714e-06, "loss": 0.8412, "step": 1073 }, { "epoch": 0.3651819109146549, "grad_norm": 1.6710296660184638, "learning_rate": 7.334487909716021e-06, "loss": 0.9138, "step": 1074 }, { "epoch": 0.36552193131587896, "grad_norm": 2.1624300932227682, "learning_rate": 7.329615971604071e-06, "loss": 0.8227, "step": 1075 }, { "epoch": 0.365861951717103, "grad_norm": 1.7009224007949533, "learning_rate": 7.324741206757785e-06, "loss": 0.8129, "step": 1076 }, { "epoch": 0.3662019721183271, "grad_norm": 2.408114357062906, "learning_rate": 7.3198636210921556e-06, "loss": 0.8746, "step": 1077 }, { "epoch": 0.36654199251955116, "grad_norm": 1.963920816993082, "learning_rate": 7.314983220525604e-06, "loss": 0.8986, "step": 1078 }, { "epoch": 0.36688201292077527, "grad_norm": 2.625997972868134, "learning_rate": 7.3101000109799616e-06, "loss": 0.8639, "step": 1079 }, { "epoch": 0.3672220333219993, "grad_norm": 2.089484971251662, "learning_rate": 7.305213998380466e-06, "loss": 0.9753, "step": 1080 }, { "epoch": 0.36756205372322337, "grad_norm": 1.879765672023159, "learning_rate": 7.300325188655762e-06, "loss": 0.8868, "step": 1081 }, { "epoch": 0.3679020741244475, "grad_norm": 2.742242310525079, "learning_rate": 7.295433587737885e-06, "loss": 0.8266, "step": 1082 }, { "epoch": 0.3682420945256715, "grad_norm": 2.110886719188207, "learning_rate": 7.29053920156226e-06, "loss": 0.8568, "step": 1083 }, { "epoch": 0.36858211492689563, "grad_norm": 1.6424790092242403, "learning_rate": 7.285642036067687e-06, "loss": 0.803, "step": 1084 }, { "epoch": 0.3689221353281197, "grad_norm": 1.6786948404232462, "learning_rate": 7.280742097196342e-06, "loss": 0.7703, "step": 1085 }, { "epoch": 0.36926215572934373, "grad_norm": 3.0606693350336154, "learning_rate": 7.275839390893766e-06, "loss": 0.7358, "step": 1086 }, { "epoch": 0.36960217613056784, "grad_norm": 2.1162203963053146, "learning_rate": 7.270933923108857e-06, "loss": 0.7855, "step": 1087 }, { "epoch": 0.3699421965317919, "grad_norm": 1.8016862422684354, "learning_rate": 7.2660256997938635e-06, "loss": 0.8466, "step": 1088 }, { "epoch": 0.370282216933016, "grad_norm": 1.6606430113607151, "learning_rate": 7.261114726904379e-06, "loss": 0.8152, "step": 1089 }, { "epoch": 0.37062223733424005, "grad_norm": 2.761851717407974, "learning_rate": 7.25620101039933e-06, "loss": 0.9042, "step": 1090 }, { "epoch": 0.37096225773546415, "grad_norm": 1.9771042284445832, "learning_rate": 7.2512845562409764e-06, "loss": 0.7056, "step": 1091 }, { "epoch": 0.3713022781366882, "grad_norm": 1.653746373879253, "learning_rate": 7.246365370394896e-06, "loss": 0.8067, "step": 1092 }, { "epoch": 0.37164229853791225, "grad_norm": 1.8811326242800515, "learning_rate": 7.241443458829985e-06, "loss": 0.9338, "step": 1093 }, { "epoch": 0.37198231893913636, "grad_norm": 2.7501158364403637, "learning_rate": 7.236518827518444e-06, "loss": 0.8809, "step": 1094 }, { "epoch": 0.3723223393403604, "grad_norm": 2.5384080733406074, "learning_rate": 7.231591482435777e-06, "loss": 0.7763, "step": 1095 }, { "epoch": 0.3726623597415845, "grad_norm": 2.228534473485439, "learning_rate": 7.226661429560776e-06, "loss": 0.7819, "step": 1096 }, { "epoch": 0.37300238014280857, "grad_norm": 1.9926490104103836, "learning_rate": 7.221728674875522e-06, "loss": 0.886, "step": 1097 }, { "epoch": 0.3733424005440326, "grad_norm": 2.362100344429819, "learning_rate": 7.216793224365373e-06, "loss": 0.8621, "step": 1098 }, { "epoch": 0.3736824209452567, "grad_norm": 2.0797006061886103, "learning_rate": 7.2118550840189605e-06, "loss": 0.8351, "step": 1099 }, { "epoch": 0.37402244134648077, "grad_norm": 1.9013645029122852, "learning_rate": 7.206914259828177e-06, "loss": 0.7349, "step": 1100 }, { "epoch": 0.3743624617477049, "grad_norm": 2.096526511001182, "learning_rate": 7.201970757788172e-06, "loss": 0.8096, "step": 1101 }, { "epoch": 0.37470248214892893, "grad_norm": 1.9487792698034627, "learning_rate": 7.197024583897345e-06, "loss": 0.7688, "step": 1102 }, { "epoch": 0.37504250255015303, "grad_norm": 1.7616136379351248, "learning_rate": 7.19207574415734e-06, "loss": 0.877, "step": 1103 }, { "epoch": 0.3753825229513771, "grad_norm": 2.216518466261683, "learning_rate": 7.187124244573029e-06, "loss": 0.7425, "step": 1104 }, { "epoch": 0.37572254335260113, "grad_norm": 1.9420595220834904, "learning_rate": 7.182170091152518e-06, "loss": 0.8859, "step": 1105 }, { "epoch": 0.37606256375382524, "grad_norm": 3.1843147726618892, "learning_rate": 7.17721328990713e-06, "loss": 0.7846, "step": 1106 }, { "epoch": 0.3764025841550493, "grad_norm": 3.5406950144124134, "learning_rate": 7.1722538468514015e-06, "loss": 0.7288, "step": 1107 }, { "epoch": 0.3767426045562734, "grad_norm": 1.9027244103388907, "learning_rate": 7.167291768003075e-06, "loss": 0.8369, "step": 1108 }, { "epoch": 0.37708262495749745, "grad_norm": 2.229506384560749, "learning_rate": 7.162327059383089e-06, "loss": 0.8006, "step": 1109 }, { "epoch": 0.3774226453587215, "grad_norm": 2.483789935208878, "learning_rate": 7.157359727015578e-06, "loss": 0.8531, "step": 1110 }, { "epoch": 0.3777626657599456, "grad_norm": 2.0952967393905597, "learning_rate": 7.152389776927855e-06, "loss": 0.7728, "step": 1111 }, { "epoch": 0.37810268616116965, "grad_norm": 2.8696175226987135, "learning_rate": 7.147417215150411e-06, "loss": 0.8312, "step": 1112 }, { "epoch": 0.37844270656239376, "grad_norm": 2.0770110520679115, "learning_rate": 7.142442047716905e-06, "loss": 0.7291, "step": 1113 }, { "epoch": 0.3787827269636178, "grad_norm": 4.9493875756484345, "learning_rate": 7.13746428066416e-06, "loss": 0.8679, "step": 1114 }, { "epoch": 0.3791227473648419, "grad_norm": 1.4736356648123277, "learning_rate": 7.132483920032154e-06, "loss": 0.8668, "step": 1115 }, { "epoch": 0.37946276776606597, "grad_norm": 2.1871010916988407, "learning_rate": 7.127500971864008e-06, "loss": 0.8253, "step": 1116 }, { "epoch": 0.37980278816729, "grad_norm": 2.7048260870598395, "learning_rate": 7.122515442205985e-06, "loss": 0.8072, "step": 1117 }, { "epoch": 0.3801428085685141, "grad_norm": 1.6568599508148707, "learning_rate": 7.117527337107481e-06, "loss": 0.883, "step": 1118 }, { "epoch": 0.3804828289697382, "grad_norm": 1.7744379748328984, "learning_rate": 7.112536662621017e-06, "loss": 0.8028, "step": 1119 }, { "epoch": 0.3808228493709623, "grad_norm": 1.9075136504794952, "learning_rate": 7.10754342480223e-06, "loss": 0.7285, "step": 1120 }, { "epoch": 0.38116286977218633, "grad_norm": 1.9011664282216376, "learning_rate": 7.102547629709867e-06, "loss": 0.8961, "step": 1121 }, { "epoch": 0.3815028901734104, "grad_norm": 3.3324355205350096, "learning_rate": 7.097549283405782e-06, "loss": 0.8518, "step": 1122 }, { "epoch": 0.3818429105746345, "grad_norm": 1.7672003604856446, "learning_rate": 7.092548391954919e-06, "loss": 0.8808, "step": 1123 }, { "epoch": 0.38218293097585854, "grad_norm": 2.013141247061694, "learning_rate": 7.087544961425317e-06, "loss": 0.725, "step": 1124 }, { "epoch": 0.38252295137708264, "grad_norm": 1.985634648817913, "learning_rate": 7.082538997888087e-06, "loss": 0.75, "step": 1125 }, { "epoch": 0.3828629717783067, "grad_norm": 1.9538380256881642, "learning_rate": 7.077530507417423e-06, "loss": 0.7453, "step": 1126 }, { "epoch": 0.3832029921795308, "grad_norm": 1.4275684400693776, "learning_rate": 7.07251949609058e-06, "loss": 0.8418, "step": 1127 }, { "epoch": 0.38354301258075485, "grad_norm": 1.876464428838775, "learning_rate": 7.067505969987869e-06, "loss": 0.8856, "step": 1128 }, { "epoch": 0.3838830329819789, "grad_norm": 2.4676650596958134, "learning_rate": 7.06248993519266e-06, "loss": 0.7442, "step": 1129 }, { "epoch": 0.384223053383203, "grad_norm": 1.9772400387901186, "learning_rate": 7.05747139779136e-06, "loss": 0.7476, "step": 1130 }, { "epoch": 0.38456307378442706, "grad_norm": 1.7114896839198552, "learning_rate": 7.0524503638734175e-06, "loss": 0.7586, "step": 1131 }, { "epoch": 0.38490309418565116, "grad_norm": 1.979719268713037, "learning_rate": 7.047426839531308e-06, "loss": 0.7806, "step": 1132 }, { "epoch": 0.3852431145868752, "grad_norm": 2.573830551015435, "learning_rate": 7.04240083086053e-06, "loss": 0.804, "step": 1133 }, { "epoch": 0.38558313498809926, "grad_norm": 2.0092570242754286, "learning_rate": 7.037372343959592e-06, "loss": 0.784, "step": 1134 }, { "epoch": 0.38592315538932337, "grad_norm": 1.7021704495221963, "learning_rate": 7.032341384930018e-06, "loss": 0.8437, "step": 1135 }, { "epoch": 0.3862631757905474, "grad_norm": 2.005711746099124, "learning_rate": 7.0273079598763236e-06, "loss": 0.8695, "step": 1136 }, { "epoch": 0.3866031961917715, "grad_norm": 3.7689109362135897, "learning_rate": 7.022272074906021e-06, "loss": 0.8509, "step": 1137 }, { "epoch": 0.3869432165929956, "grad_norm": 2.040973796481224, "learning_rate": 7.017233736129606e-06, "loss": 0.7938, "step": 1138 }, { "epoch": 0.3872832369942196, "grad_norm": 1.6275779088061617, "learning_rate": 7.012192949660552e-06, "loss": 0.7431, "step": 1139 }, { "epoch": 0.38762325739544373, "grad_norm": 3.289438767702211, "learning_rate": 7.007149721615303e-06, "loss": 0.8054, "step": 1140 }, { "epoch": 0.3879632777966678, "grad_norm": 1.5783269725777442, "learning_rate": 7.002104058113264e-06, "loss": 0.7602, "step": 1141 }, { "epoch": 0.3883032981978919, "grad_norm": 2.401935957785332, "learning_rate": 6.997055965276796e-06, "loss": 0.8494, "step": 1142 }, { "epoch": 0.38864331859911594, "grad_norm": 2.2155438256160447, "learning_rate": 6.9920054492312086e-06, "loss": 0.8322, "step": 1143 }, { "epoch": 0.38898333900034004, "grad_norm": 2.5303097895478426, "learning_rate": 6.98695251610475e-06, "loss": 0.7219, "step": 1144 }, { "epoch": 0.3893233594015641, "grad_norm": 1.9492455847007166, "learning_rate": 6.981897172028605e-06, "loss": 0.7452, "step": 1145 }, { "epoch": 0.38966337980278815, "grad_norm": 1.8888613087300397, "learning_rate": 6.9768394231368765e-06, "loss": 0.7079, "step": 1146 }, { "epoch": 0.39000340020401225, "grad_norm": 1.8614707671392436, "learning_rate": 6.971779275566593e-06, "loss": 0.8869, "step": 1147 }, { "epoch": 0.3903434206052363, "grad_norm": 2.196139754184654, "learning_rate": 6.96671673545769e-06, "loss": 0.8182, "step": 1148 }, { "epoch": 0.3906834410064604, "grad_norm": 2.015645605594665, "learning_rate": 6.961651808953008e-06, "loss": 0.788, "step": 1149 }, { "epoch": 0.39102346140768446, "grad_norm": 2.118358018353812, "learning_rate": 6.956584502198278e-06, "loss": 0.7944, "step": 1150 }, { "epoch": 0.3913634818089085, "grad_norm": 2.5297802062767505, "learning_rate": 6.9515148213421265e-06, "loss": 0.7594, "step": 1151 }, { "epoch": 0.3917035022101326, "grad_norm": 1.7235690561643628, "learning_rate": 6.946442772536055e-06, "loss": 0.8006, "step": 1152 }, { "epoch": 0.39204352261135667, "grad_norm": 2.0605815364417723, "learning_rate": 6.941368361934442e-06, "loss": 0.7571, "step": 1153 }, { "epoch": 0.39238354301258077, "grad_norm": 1.802107862787346, "learning_rate": 6.9362915956945264e-06, "loss": 0.7694, "step": 1154 }, { "epoch": 0.3927235634138048, "grad_norm": 3.0394698418460373, "learning_rate": 6.931212479976413e-06, "loss": 0.8776, "step": 1155 }, { "epoch": 0.3930635838150289, "grad_norm": 2.344468637665646, "learning_rate": 6.9261310209430525e-06, "loss": 0.7557, "step": 1156 }, { "epoch": 0.393403604216253, "grad_norm": 2.1496119244458693, "learning_rate": 6.921047224760239e-06, "loss": 0.8703, "step": 1157 }, { "epoch": 0.39374362461747703, "grad_norm": 1.8022051744909748, "learning_rate": 6.9159610975966044e-06, "loss": 0.9033, "step": 1158 }, { "epoch": 0.39408364501870113, "grad_norm": 1.7062404002111817, "learning_rate": 6.910872645623608e-06, "loss": 0.8358, "step": 1159 }, { "epoch": 0.3944236654199252, "grad_norm": 2.807991980988546, "learning_rate": 6.905781875015529e-06, "loss": 0.732, "step": 1160 }, { "epoch": 0.3947636858211493, "grad_norm": 2.010080594688907, "learning_rate": 6.900688791949463e-06, "loss": 0.8275, "step": 1161 }, { "epoch": 0.39510370622237334, "grad_norm": 2.543059620006072, "learning_rate": 6.895593402605308e-06, "loss": 0.8672, "step": 1162 }, { "epoch": 0.3954437266235974, "grad_norm": 2.323489162154479, "learning_rate": 6.890495713165761e-06, "loss": 0.8112, "step": 1163 }, { "epoch": 0.3957837470248215, "grad_norm": 2.181543784406478, "learning_rate": 6.885395729816313e-06, "loss": 0.9508, "step": 1164 }, { "epoch": 0.39612376742604555, "grad_norm": 2.226593260830025, "learning_rate": 6.880293458745237e-06, "loss": 0.6444, "step": 1165 }, { "epoch": 0.39646378782726965, "grad_norm": 1.634156363859306, "learning_rate": 6.87518890614358e-06, "loss": 0.8444, "step": 1166 }, { "epoch": 0.3968038082284937, "grad_norm": 2.5459595291596493, "learning_rate": 6.870082078205158e-06, "loss": 0.8996, "step": 1167 }, { "epoch": 0.3971438286297178, "grad_norm": 3.9234543111468567, "learning_rate": 6.86497298112655e-06, "loss": 0.9022, "step": 1168 }, { "epoch": 0.39748384903094186, "grad_norm": 1.9173410435574607, "learning_rate": 6.859861621107084e-06, "loss": 0.8068, "step": 1169 }, { "epoch": 0.3978238694321659, "grad_norm": 3.035334796056879, "learning_rate": 6.85474800434884e-06, "loss": 0.8787, "step": 1170 }, { "epoch": 0.39816388983339, "grad_norm": 1.996336800913808, "learning_rate": 6.849632137056631e-06, "loss": 0.8218, "step": 1171 }, { "epoch": 0.39850391023461407, "grad_norm": 2.4997036230867877, "learning_rate": 6.844514025438003e-06, "loss": 0.8944, "step": 1172 }, { "epoch": 0.3988439306358382, "grad_norm": 2.058620980233996, "learning_rate": 6.8393936757032255e-06, "loss": 0.8202, "step": 1173 }, { "epoch": 0.3991839510370622, "grad_norm": 2.0680642784567764, "learning_rate": 6.834271094065284e-06, "loss": 0.8465, "step": 1174 }, { "epoch": 0.3995239714382863, "grad_norm": 1.8727199079644916, "learning_rate": 6.82914628673987e-06, "loss": 0.8527, "step": 1175 }, { "epoch": 0.3998639918395104, "grad_norm": 1.590081791651527, "learning_rate": 6.824019259945376e-06, "loss": 0.8613, "step": 1176 }, { "epoch": 0.40020401224073443, "grad_norm": 2.0260284044840278, "learning_rate": 6.818890019902891e-06, "loss": 0.7465, "step": 1177 }, { "epoch": 0.40054403264195854, "grad_norm": 1.8398479387280182, "learning_rate": 6.813758572836187e-06, "loss": 0.7806, "step": 1178 }, { "epoch": 0.4008840530431826, "grad_norm": 2.105917280765373, "learning_rate": 6.808624924971711e-06, "loss": 0.7141, "step": 1179 }, { "epoch": 0.40122407344440664, "grad_norm": 1.9634355488466153, "learning_rate": 6.803489082538586e-06, "loss": 0.8055, "step": 1180 }, { "epoch": 0.40156409384563074, "grad_norm": 1.8526489425120056, "learning_rate": 6.798351051768597e-06, "loss": 0.8832, "step": 1181 }, { "epoch": 0.4019041142468548, "grad_norm": 2.3356716947930316, "learning_rate": 6.79321083889618e-06, "loss": 0.7484, "step": 1182 }, { "epoch": 0.4022441346480789, "grad_norm": 2.0837152056282964, "learning_rate": 6.788068450158422e-06, "loss": 0.718, "step": 1183 }, { "epoch": 0.40258415504930295, "grad_norm": 1.8999186943331179, "learning_rate": 6.78292389179505e-06, "loss": 0.7811, "step": 1184 }, { "epoch": 0.40292417545052706, "grad_norm": 1.6658111499434904, "learning_rate": 6.777777170048423e-06, "loss": 0.8201, "step": 1185 }, { "epoch": 0.4032641958517511, "grad_norm": 1.680917916696707, "learning_rate": 6.772628291163527e-06, "loss": 0.807, "step": 1186 }, { "epoch": 0.40360421625297516, "grad_norm": 1.7407790818800217, "learning_rate": 6.76747726138796e-06, "loss": 0.8313, "step": 1187 }, { "epoch": 0.40394423665419926, "grad_norm": 1.5617043074412897, "learning_rate": 6.762324086971936e-06, "loss": 0.9455, "step": 1188 }, { "epoch": 0.4042842570554233, "grad_norm": 4.146596153131383, "learning_rate": 6.75716877416827e-06, "loss": 0.7997, "step": 1189 }, { "epoch": 0.4046242774566474, "grad_norm": 2.0617653843196884, "learning_rate": 6.752011329232369e-06, "loss": 0.8153, "step": 1190 }, { "epoch": 0.40496429785787147, "grad_norm": 2.076737017009885, "learning_rate": 6.746851758422228e-06, "loss": 0.8002, "step": 1191 }, { "epoch": 0.4053043182590955, "grad_norm": 1.813071998464279, "learning_rate": 6.741690067998423e-06, "loss": 0.8347, "step": 1192 }, { "epoch": 0.4056443386603196, "grad_norm": 1.9981326256931067, "learning_rate": 6.736526264224101e-06, "loss": 0.9294, "step": 1193 }, { "epoch": 0.4059843590615437, "grad_norm": 1.8827868546011934, "learning_rate": 6.731360353364975e-06, "loss": 0.867, "step": 1194 }, { "epoch": 0.4063243794627678, "grad_norm": 1.8243418052617972, "learning_rate": 6.726192341689311e-06, "loss": 0.8223, "step": 1195 }, { "epoch": 0.40666439986399183, "grad_norm": 1.770350872789149, "learning_rate": 6.721022235467926e-06, "loss": 0.8619, "step": 1196 }, { "epoch": 0.40700442026521594, "grad_norm": 1.8639174786964454, "learning_rate": 6.7158500409741815e-06, "loss": 0.9201, "step": 1197 }, { "epoch": 0.40734444066644, "grad_norm": 2.0254058760681803, "learning_rate": 6.710675764483968e-06, "loss": 0.7695, "step": 1198 }, { "epoch": 0.40768446106766404, "grad_norm": 1.760831957768078, "learning_rate": 6.7054994122757046e-06, "loss": 0.819, "step": 1199 }, { "epoch": 0.40802448146888814, "grad_norm": 2.3849603429891997, "learning_rate": 6.700320990630329e-06, "loss": 0.8816, "step": 1200 }, { "epoch": 0.4083645018701122, "grad_norm": 3.24791611718312, "learning_rate": 6.69514050583129e-06, "loss": 0.9234, "step": 1201 }, { "epoch": 0.4087045222713363, "grad_norm": 1.7471455004845486, "learning_rate": 6.689957964164539e-06, "loss": 0.7623, "step": 1202 }, { "epoch": 0.40904454267256035, "grad_norm": 2.785514967554598, "learning_rate": 6.684773371918526e-06, "loss": 0.7937, "step": 1203 }, { "epoch": 0.4093845630737844, "grad_norm": 1.8030855427647954, "learning_rate": 6.679586735384184e-06, "loss": 0.8442, "step": 1204 }, { "epoch": 0.4097245834750085, "grad_norm": 3.721825112571208, "learning_rate": 6.674398060854931e-06, "loss": 0.7539, "step": 1205 }, { "epoch": 0.41006460387623256, "grad_norm": 1.80798373103189, "learning_rate": 6.669207354626657e-06, "loss": 0.8992, "step": 1206 }, { "epoch": 0.41040462427745666, "grad_norm": 2.0935781969101352, "learning_rate": 6.664014622997717e-06, "loss": 0.8665, "step": 1207 }, { "epoch": 0.4107446446786807, "grad_norm": 1.77781409293517, "learning_rate": 6.65881987226892e-06, "loss": 0.9314, "step": 1208 }, { "epoch": 0.4110846650799048, "grad_norm": 2.0253755293143105, "learning_rate": 6.65362310874353e-06, "loss": 0.8807, "step": 1209 }, { "epoch": 0.41142468548112887, "grad_norm": 3.8985316382602444, "learning_rate": 6.648424338727254e-06, "loss": 0.7557, "step": 1210 }, { "epoch": 0.4117647058823529, "grad_norm": 2.9177556160015317, "learning_rate": 6.643223568528228e-06, "loss": 0.7773, "step": 1211 }, { "epoch": 0.412104726283577, "grad_norm": 1.7364907122378959, "learning_rate": 6.638020804457017e-06, "loss": 0.7708, "step": 1212 }, { "epoch": 0.4124447466848011, "grad_norm": 2.5902233868083107, "learning_rate": 6.632816052826611e-06, "loss": 0.8803, "step": 1213 }, { "epoch": 0.4127847670860252, "grad_norm": 2.196962080702439, "learning_rate": 6.627609319952404e-06, "loss": 0.8413, "step": 1214 }, { "epoch": 0.41312478748724923, "grad_norm": 1.7517859998624659, "learning_rate": 6.622400612152199e-06, "loss": 0.7581, "step": 1215 }, { "epoch": 0.4134648078884733, "grad_norm": 2.725187748694304, "learning_rate": 6.617189935746191e-06, "loss": 0.8616, "step": 1216 }, { "epoch": 0.4138048282896974, "grad_norm": 2.186121004596113, "learning_rate": 6.6119772970569686e-06, "loss": 0.867, "step": 1217 }, { "epoch": 0.41414484869092144, "grad_norm": 1.7550877153581408, "learning_rate": 6.606762702409499e-06, "loss": 0.8189, "step": 1218 }, { "epoch": 0.41448486909214555, "grad_norm": 1.911639257053032, "learning_rate": 6.60154615813112e-06, "loss": 0.8447, "step": 1219 }, { "epoch": 0.4148248894933696, "grad_norm": 2.2359482219667566, "learning_rate": 6.596327670551541e-06, "loss": 0.7098, "step": 1220 }, { "epoch": 0.4151649098945937, "grad_norm": 1.8270789562811462, "learning_rate": 6.591107246002825e-06, "loss": 0.8242, "step": 1221 }, { "epoch": 0.41550493029581775, "grad_norm": 1.4119348213396106, "learning_rate": 6.585884890819388e-06, "loss": 0.9456, "step": 1222 }, { "epoch": 0.4158449506970418, "grad_norm": 2.4831333073302115, "learning_rate": 6.5806606113379855e-06, "loss": 0.843, "step": 1223 }, { "epoch": 0.4161849710982659, "grad_norm": 2.0195834149272907, "learning_rate": 6.57543441389771e-06, "loss": 0.7616, "step": 1224 }, { "epoch": 0.41652499149948996, "grad_norm": 1.8560990970772, "learning_rate": 6.570206304839979e-06, "loss": 0.772, "step": 1225 }, { "epoch": 0.41686501190071407, "grad_norm": 3.4777502532844364, "learning_rate": 6.564976290508535e-06, "loss": 0.7694, "step": 1226 }, { "epoch": 0.4172050323019381, "grad_norm": 2.1141524879270337, "learning_rate": 6.559744377249426e-06, "loss": 0.8212, "step": 1227 }, { "epoch": 0.41754505270316217, "grad_norm": 1.9995986640884873, "learning_rate": 6.554510571411009e-06, "loss": 0.7696, "step": 1228 }, { "epoch": 0.4178850731043863, "grad_norm": 4.388048606323549, "learning_rate": 6.549274879343932e-06, "loss": 0.8521, "step": 1229 }, { "epoch": 0.4182250935056103, "grad_norm": 2.512566436457032, "learning_rate": 6.54403730740114e-06, "loss": 0.8375, "step": 1230 }, { "epoch": 0.41856511390683443, "grad_norm": 2.022875278093896, "learning_rate": 6.53879786193785e-06, "loss": 0.8287, "step": 1231 }, { "epoch": 0.4189051343080585, "grad_norm": 2.1514715191325338, "learning_rate": 6.533556549311557e-06, "loss": 0.8248, "step": 1232 }, { "epoch": 0.41924515470928253, "grad_norm": 1.903140049004442, "learning_rate": 6.52831337588202e-06, "loss": 0.8134, "step": 1233 }, { "epoch": 0.41958517511050664, "grad_norm": 3.9459515916276815, "learning_rate": 6.52306834801126e-06, "loss": 0.8674, "step": 1234 }, { "epoch": 0.4199251955117307, "grad_norm": 1.9243723260268402, "learning_rate": 6.517821472063543e-06, "loss": 0.8009, "step": 1235 }, { "epoch": 0.4202652159129548, "grad_norm": 2.1622315952308484, "learning_rate": 6.51257275440538e-06, "loss": 0.8302, "step": 1236 }, { "epoch": 0.42060523631417884, "grad_norm": 2.246688116810149, "learning_rate": 6.507322201405515e-06, "loss": 0.8518, "step": 1237 }, { "epoch": 0.42094525671540295, "grad_norm": 1.7866795660553714, "learning_rate": 6.502069819434921e-06, "loss": 0.7996, "step": 1238 }, { "epoch": 0.421285277116627, "grad_norm": 2.483859305848629, "learning_rate": 6.496815614866792e-06, "loss": 0.798, "step": 1239 }, { "epoch": 0.42162529751785105, "grad_norm": 2.6273097801827743, "learning_rate": 6.491559594076526e-06, "loss": 0.7717, "step": 1240 }, { "epoch": 0.42196531791907516, "grad_norm": 1.9705933045927748, "learning_rate": 6.486301763441732e-06, "loss": 0.8437, "step": 1241 }, { "epoch": 0.4223053383202992, "grad_norm": 1.6987794156789002, "learning_rate": 6.4810421293422124e-06, "loss": 0.7846, "step": 1242 }, { "epoch": 0.4226453587215233, "grad_norm": 1.7952873310252566, "learning_rate": 6.475780698159959e-06, "loss": 0.8228, "step": 1243 }, { "epoch": 0.42298537912274736, "grad_norm": 2.3781383897255357, "learning_rate": 6.470517476279143e-06, "loss": 0.9275, "step": 1244 }, { "epoch": 0.4233253995239714, "grad_norm": 1.7388598039077234, "learning_rate": 6.465252470086109e-06, "loss": 0.7543, "step": 1245 }, { "epoch": 0.4236654199251955, "grad_norm": 3.847734904824095, "learning_rate": 6.459985685969365e-06, "loss": 0.7801, "step": 1246 }, { "epoch": 0.42400544032641957, "grad_norm": 2.0015735326442123, "learning_rate": 6.454717130319583e-06, "loss": 0.8404, "step": 1247 }, { "epoch": 0.4243454607276437, "grad_norm": 1.676873018907222, "learning_rate": 6.449446809529573e-06, "loss": 0.7616, "step": 1248 }, { "epoch": 0.4246854811288677, "grad_norm": 1.9476313763300463, "learning_rate": 6.444174729994295e-06, "loss": 0.8572, "step": 1249 }, { "epoch": 0.42502550153009183, "grad_norm": 3.039778353662543, "learning_rate": 6.438900898110843e-06, "loss": 0.6842, "step": 1250 }, { "epoch": 0.4253655219313159, "grad_norm": 2.212503519532677, "learning_rate": 6.433625320278435e-06, "loss": 0.7895, "step": 1251 }, { "epoch": 0.42570554233253993, "grad_norm": 3.696419960495062, "learning_rate": 6.4283480028984065e-06, "loss": 0.7889, "step": 1252 }, { "epoch": 0.42604556273376404, "grad_norm": 1.8504613276269528, "learning_rate": 6.423068952374208e-06, "loss": 0.6952, "step": 1253 }, { "epoch": 0.4263855831349881, "grad_norm": 2.011583690777513, "learning_rate": 6.4177881751113854e-06, "loss": 0.7343, "step": 1254 }, { "epoch": 0.4267256035362122, "grad_norm": 1.9658764564932893, "learning_rate": 6.412505677517592e-06, "loss": 0.8955, "step": 1255 }, { "epoch": 0.42706562393743625, "grad_norm": 1.7771550054870846, "learning_rate": 6.4072214660025555e-06, "loss": 0.794, "step": 1256 }, { "epoch": 0.4274056443386603, "grad_norm": 2.170566029231825, "learning_rate": 6.401935546978091e-06, "loss": 0.8307, "step": 1257 }, { "epoch": 0.4277456647398844, "grad_norm": 2.795606918335267, "learning_rate": 6.396647926858082e-06, "loss": 0.7408, "step": 1258 }, { "epoch": 0.42808568514110845, "grad_norm": 1.9857919239067945, "learning_rate": 6.391358612058479e-06, "loss": 0.7435, "step": 1259 }, { "epoch": 0.42842570554233256, "grad_norm": 4.183650214686305, "learning_rate": 6.386067608997286e-06, "loss": 0.8171, "step": 1260 }, { "epoch": 0.4287657259435566, "grad_norm": 2.2866953692840517, "learning_rate": 6.3807749240945594e-06, "loss": 0.9282, "step": 1261 }, { "epoch": 0.4291057463447807, "grad_norm": 1.723522832107471, "learning_rate": 6.375480563772391e-06, "loss": 0.8644, "step": 1262 }, { "epoch": 0.42944576674600476, "grad_norm": 1.9376266728840439, "learning_rate": 6.3701845344549105e-06, "loss": 0.7975, "step": 1263 }, { "epoch": 0.4297857871472288, "grad_norm": 2.0320058786563884, "learning_rate": 6.3648868425682695e-06, "loss": 0.8404, "step": 1264 }, { "epoch": 0.4301258075484529, "grad_norm": 1.833242518517846, "learning_rate": 6.359587494540638e-06, "loss": 0.9727, "step": 1265 }, { "epoch": 0.43046582794967697, "grad_norm": 2.0656866508193237, "learning_rate": 6.354286496802195e-06, "loss": 0.7088, "step": 1266 }, { "epoch": 0.4308058483509011, "grad_norm": 2.333059652715563, "learning_rate": 6.348983855785122e-06, "loss": 0.7784, "step": 1267 }, { "epoch": 0.4311458687521251, "grad_norm": 1.8812951830256721, "learning_rate": 6.343679577923596e-06, "loss": 0.8082, "step": 1268 }, { "epoch": 0.4314858891533492, "grad_norm": 2.2757541276249103, "learning_rate": 6.338373669653777e-06, "loss": 0.8048, "step": 1269 }, { "epoch": 0.4318259095545733, "grad_norm": 1.7616508457249394, "learning_rate": 6.333066137413803e-06, "loss": 0.7967, "step": 1270 }, { "epoch": 0.43216592995579733, "grad_norm": 2.5839639191285912, "learning_rate": 6.327756987643788e-06, "loss": 0.8475, "step": 1271 }, { "epoch": 0.43250595035702144, "grad_norm": 1.983977392970035, "learning_rate": 6.322446226785803e-06, "loss": 0.7688, "step": 1272 }, { "epoch": 0.4328459707582455, "grad_norm": 1.920278640728409, "learning_rate": 6.317133861283876e-06, "loss": 0.8112, "step": 1273 }, { "epoch": 0.43318599115946954, "grad_norm": 2.175785687192607, "learning_rate": 6.311819897583981e-06, "loss": 0.8807, "step": 1274 }, { "epoch": 0.43352601156069365, "grad_norm": 2.1329693868490156, "learning_rate": 6.306504342134032e-06, "loss": 0.7646, "step": 1275 }, { "epoch": 0.4338660319619177, "grad_norm": 2.7197062834600643, "learning_rate": 6.301187201383876e-06, "loss": 0.8924, "step": 1276 }, { "epoch": 0.4342060523631418, "grad_norm": 2.0900606207430976, "learning_rate": 6.295868481785281e-06, "loss": 0.8063, "step": 1277 }, { "epoch": 0.43454607276436585, "grad_norm": 1.7320405000379613, "learning_rate": 6.290548189791932e-06, "loss": 0.7871, "step": 1278 }, { "epoch": 0.43488609316558996, "grad_norm": 3.246424780026875, "learning_rate": 6.285226331859423e-06, "loss": 0.7022, "step": 1279 }, { "epoch": 0.435226113566814, "grad_norm": 3.2299354049530558, "learning_rate": 6.279902914445246e-06, "loss": 0.8512, "step": 1280 }, { "epoch": 0.43556613396803806, "grad_norm": 1.7974856940773503, "learning_rate": 6.274577944008785e-06, "loss": 0.7445, "step": 1281 }, { "epoch": 0.43590615436926217, "grad_norm": 1.7902884652613178, "learning_rate": 6.26925142701131e-06, "loss": 0.7549, "step": 1282 }, { "epoch": 0.4362461747704862, "grad_norm": 1.7575937886544872, "learning_rate": 6.263923369915968e-06, "loss": 0.7033, "step": 1283 }, { "epoch": 0.4365861951717103, "grad_norm": 2.003652418101978, "learning_rate": 6.258593779187774e-06, "loss": 0.7226, "step": 1284 }, { "epoch": 0.4369262155729344, "grad_norm": 1.586460473657157, "learning_rate": 6.2532626612936035e-06, "loss": 0.7918, "step": 1285 }, { "epoch": 0.4372662359741584, "grad_norm": 1.7790120795884707, "learning_rate": 6.247930022702184e-06, "loss": 0.7426, "step": 1286 }, { "epoch": 0.43760625637538253, "grad_norm": 1.4845861438510766, "learning_rate": 6.242595869884093e-06, "loss": 0.75, "step": 1287 }, { "epoch": 0.4379462767766066, "grad_norm": 2.586395804452128, "learning_rate": 6.237260209311738e-06, "loss": 0.7247, "step": 1288 }, { "epoch": 0.4382862971778307, "grad_norm": 1.7488826727955935, "learning_rate": 6.231923047459362e-06, "loss": 0.7819, "step": 1289 }, { "epoch": 0.43862631757905474, "grad_norm": 2.7476753864785306, "learning_rate": 6.2265843908030255e-06, "loss": 0.8755, "step": 1290 }, { "epoch": 0.43896633798027884, "grad_norm": 2.04843978154324, "learning_rate": 6.2212442458206065e-06, "loss": 0.845, "step": 1291 }, { "epoch": 0.4393063583815029, "grad_norm": 2.056402370171357, "learning_rate": 6.215902618991789e-06, "loss": 0.6932, "step": 1292 }, { "epoch": 0.43964637878272694, "grad_norm": 2.3809614347105814, "learning_rate": 6.21055951679805e-06, "loss": 0.8301, "step": 1293 }, { "epoch": 0.43998639918395105, "grad_norm": 2.216045492126213, "learning_rate": 6.20521494572266e-06, "loss": 0.8544, "step": 1294 }, { "epoch": 0.4403264195851751, "grad_norm": 1.7506779190930466, "learning_rate": 6.1998689122506765e-06, "loss": 0.8289, "step": 1295 }, { "epoch": 0.4406664399863992, "grad_norm": 1.904093376273434, "learning_rate": 6.19452142286892e-06, "loss": 0.7709, "step": 1296 }, { "epoch": 0.44100646038762326, "grad_norm": 2.7089730576700664, "learning_rate": 6.1891724840659895e-06, "loss": 0.8263, "step": 1297 }, { "epoch": 0.4413464807888473, "grad_norm": 1.7011604570765408, "learning_rate": 6.183822102332234e-06, "loss": 0.7318, "step": 1298 }, { "epoch": 0.4416865011900714, "grad_norm": 1.7063499626312666, "learning_rate": 6.17847028415976e-06, "loss": 0.7845, "step": 1299 }, { "epoch": 0.44202652159129546, "grad_norm": 2.5512420126857998, "learning_rate": 6.1731170360424116e-06, "loss": 0.8297, "step": 1300 }, { "epoch": 0.44236654199251957, "grad_norm": 2.3308930708868614, "learning_rate": 6.1677623644757715e-06, "loss": 0.7281, "step": 1301 }, { "epoch": 0.4427065623937436, "grad_norm": 1.9814345181679105, "learning_rate": 6.162406275957147e-06, "loss": 0.6841, "step": 1302 }, { "epoch": 0.4430465827949677, "grad_norm": 1.8308664790589082, "learning_rate": 6.157048776985568e-06, "loss": 0.7597, "step": 1303 }, { "epoch": 0.4433866031961918, "grad_norm": 1.9450975948070095, "learning_rate": 6.151689874061773e-06, "loss": 0.8809, "step": 1304 }, { "epoch": 0.4437266235974158, "grad_norm": 2.783986050727857, "learning_rate": 6.1463295736882045e-06, "loss": 0.7678, "step": 1305 }, { "epoch": 0.44406664399863993, "grad_norm": 2.654115532064757, "learning_rate": 6.140967882369001e-06, "loss": 0.7656, "step": 1306 }, { "epoch": 0.444406664399864, "grad_norm": 2.1154238353595938, "learning_rate": 6.135604806609988e-06, "loss": 0.7393, "step": 1307 }, { "epoch": 0.4447466848010881, "grad_norm": 1.8316694323729121, "learning_rate": 6.130240352918675e-06, "loss": 0.7955, "step": 1308 }, { "epoch": 0.44508670520231214, "grad_norm": 1.7923597848780743, "learning_rate": 6.1248745278042375e-06, "loss": 0.7902, "step": 1309 }, { "epoch": 0.4454267256035362, "grad_norm": 2.1469603822475882, "learning_rate": 6.119507337777517e-06, "loss": 0.8111, "step": 1310 }, { "epoch": 0.4457667460047603, "grad_norm": 3.204925579816377, "learning_rate": 6.114138789351015e-06, "loss": 0.898, "step": 1311 }, { "epoch": 0.44610676640598435, "grad_norm": 2.4583222181061735, "learning_rate": 6.108768889038875e-06, "loss": 0.8401, "step": 1312 }, { "epoch": 0.44644678680720845, "grad_norm": 1.6306721028852273, "learning_rate": 6.103397643356888e-06, "loss": 0.8261, "step": 1313 }, { "epoch": 0.4467868072084325, "grad_norm": 1.6997011922153806, "learning_rate": 6.098025058822467e-06, "loss": 0.8157, "step": 1314 }, { "epoch": 0.44712682760965655, "grad_norm": 1.894873083473829, "learning_rate": 6.092651141954663e-06, "loss": 0.818, "step": 1315 }, { "epoch": 0.44746684801088066, "grad_norm": 1.8152984091344468, "learning_rate": 6.087275899274132e-06, "loss": 0.846, "step": 1316 }, { "epoch": 0.4478068684121047, "grad_norm": 1.8804247156065967, "learning_rate": 6.081899337303148e-06, "loss": 0.8775, "step": 1317 }, { "epoch": 0.4481468888133288, "grad_norm": 2.2811320840226874, "learning_rate": 6.076521462565575e-06, "loss": 0.8405, "step": 1318 }, { "epoch": 0.44848690921455286, "grad_norm": 1.9014030398430317, "learning_rate": 6.071142281586883e-06, "loss": 0.6665, "step": 1319 }, { "epoch": 0.44882692961577697, "grad_norm": 1.7598473537668629, "learning_rate": 6.0657618008941135e-06, "loss": 0.8114, "step": 1320 }, { "epoch": 0.449166950017001, "grad_norm": 1.701348824209668, "learning_rate": 6.060380027015897e-06, "loss": 0.9063, "step": 1321 }, { "epoch": 0.44950697041822507, "grad_norm": 2.0151604743954192, "learning_rate": 6.054996966482425e-06, "loss": 0.7727, "step": 1322 }, { "epoch": 0.4498469908194492, "grad_norm": 1.8240997834179458, "learning_rate": 6.049612625825454e-06, "loss": 0.6151, "step": 1323 }, { "epoch": 0.4501870112206732, "grad_norm": 1.7621003478299089, "learning_rate": 6.044227011578292e-06, "loss": 0.8248, "step": 1324 }, { "epoch": 0.45052703162189733, "grad_norm": 2.3449253350037647, "learning_rate": 6.038840130275795e-06, "loss": 0.8094, "step": 1325 }, { "epoch": 0.4508670520231214, "grad_norm": 2.186940711291871, "learning_rate": 6.033451988454352e-06, "loss": 0.8526, "step": 1326 }, { "epoch": 0.45120707242434543, "grad_norm": 1.8098545794134784, "learning_rate": 6.0280625926518865e-06, "loss": 0.8167, "step": 1327 }, { "epoch": 0.45154709282556954, "grad_norm": 2.11938751373049, "learning_rate": 6.02267194940784e-06, "loss": 0.8615, "step": 1328 }, { "epoch": 0.4518871132267936, "grad_norm": 5.798776280397436, "learning_rate": 6.0172800652631706e-06, "loss": 0.8126, "step": 1329 }, { "epoch": 0.4522271336280177, "grad_norm": 1.9362659319180398, "learning_rate": 6.011886946760337e-06, "loss": 0.8515, "step": 1330 }, { "epoch": 0.45256715402924175, "grad_norm": 1.9240359926336854, "learning_rate": 6.006492600443301e-06, "loss": 0.795, "step": 1331 }, { "epoch": 0.45290717443046585, "grad_norm": 2.6312493040402223, "learning_rate": 6.001097032857513e-06, "loss": 0.9005, "step": 1332 }, { "epoch": 0.4532471948316899, "grad_norm": 2.4588002159411975, "learning_rate": 5.995700250549903e-06, "loss": 0.9122, "step": 1333 }, { "epoch": 0.45358721523291395, "grad_norm": 1.8102464748281866, "learning_rate": 5.990302260068877e-06, "loss": 0.7861, "step": 1334 }, { "epoch": 0.45392723563413806, "grad_norm": 2.4499379979673654, "learning_rate": 5.9849030679643075e-06, "loss": 0.8793, "step": 1335 }, { "epoch": 0.4542672560353621, "grad_norm": 1.643369045844105, "learning_rate": 5.97950268078752e-06, "loss": 0.9176, "step": 1336 }, { "epoch": 0.4546072764365862, "grad_norm": 2.94241862218008, "learning_rate": 5.9741011050913e-06, "loss": 0.7631, "step": 1337 }, { "epoch": 0.45494729683781027, "grad_norm": 2.260745557245212, "learning_rate": 5.968698347429864e-06, "loss": 0.8574, "step": 1338 }, { "epoch": 0.4552873172390343, "grad_norm": 1.9189222697412902, "learning_rate": 5.96329441435887e-06, "loss": 0.8627, "step": 1339 }, { "epoch": 0.4556273376402584, "grad_norm": 2.3145970729902725, "learning_rate": 5.9578893124354e-06, "loss": 0.8203, "step": 1340 }, { "epoch": 0.4559673580414825, "grad_norm": 1.7097699263826742, "learning_rate": 5.9524830482179565e-06, "loss": 0.8143, "step": 1341 }, { "epoch": 0.4563073784427066, "grad_norm": 1.7644511385918709, "learning_rate": 5.9470756282664455e-06, "loss": 0.8428, "step": 1342 }, { "epoch": 0.45664739884393063, "grad_norm": 2.0834007860288835, "learning_rate": 5.941667059142184e-06, "loss": 0.8975, "step": 1343 }, { "epoch": 0.45698741924515474, "grad_norm": 1.9264637214129008, "learning_rate": 5.936257347407877e-06, "loss": 0.7147, "step": 1344 }, { "epoch": 0.4573274396463788, "grad_norm": 1.7108620469281093, "learning_rate": 5.9308464996276195e-06, "loss": 0.8773, "step": 1345 }, { "epoch": 0.45766746004760284, "grad_norm": 1.8740444271707064, "learning_rate": 5.925434522366884e-06, "loss": 0.8765, "step": 1346 }, { "epoch": 0.45800748044882694, "grad_norm": 2.9159889860260706, "learning_rate": 5.920021422192512e-06, "loss": 0.7429, "step": 1347 }, { "epoch": 0.458347500850051, "grad_norm": 2.2683984119230964, "learning_rate": 5.914607205672711e-06, "loss": 0.8265, "step": 1348 }, { "epoch": 0.4586875212512751, "grad_norm": 2.229706808343337, "learning_rate": 5.909191879377041e-06, "loss": 0.8355, "step": 1349 }, { "epoch": 0.45902754165249915, "grad_norm": 2.284917272323551, "learning_rate": 5.903775449876406e-06, "loss": 0.706, "step": 1350 }, { "epoch": 0.4593675620537232, "grad_norm": 1.9399879628948615, "learning_rate": 5.898357923743052e-06, "loss": 0.6978, "step": 1351 }, { "epoch": 0.4597075824549473, "grad_norm": 2.0206889656325777, "learning_rate": 5.892939307550556e-06, "loss": 0.7937, "step": 1352 }, { "epoch": 0.46004760285617136, "grad_norm": 2.0270151745480107, "learning_rate": 5.887519607873815e-06, "loss": 0.801, "step": 1353 }, { "epoch": 0.46038762325739546, "grad_norm": 1.8581224024582177, "learning_rate": 5.882098831289044e-06, "loss": 0.8618, "step": 1354 }, { "epoch": 0.4607276436586195, "grad_norm": 1.9755282021333846, "learning_rate": 5.8766769843737604e-06, "loss": 0.7721, "step": 1355 }, { "epoch": 0.4610676640598436, "grad_norm": 1.8373059374517786, "learning_rate": 5.8712540737067835e-06, "loss": 0.7952, "step": 1356 }, { "epoch": 0.46140768446106767, "grad_norm": 1.5959326369230662, "learning_rate": 5.865830105868226e-06, "loss": 0.7782, "step": 1357 }, { "epoch": 0.4617477048622917, "grad_norm": 1.9988446870321313, "learning_rate": 5.860405087439475e-06, "loss": 0.8748, "step": 1358 }, { "epoch": 0.4620877252635158, "grad_norm": 4.024311573353348, "learning_rate": 5.8549790250032e-06, "loss": 0.7804, "step": 1359 }, { "epoch": 0.4624277456647399, "grad_norm": 1.7924867446440473, "learning_rate": 5.849551925143334e-06, "loss": 0.7366, "step": 1360 }, { "epoch": 0.462767766065964, "grad_norm": 1.9333933529106293, "learning_rate": 5.84412379444507e-06, "loss": 0.7634, "step": 1361 }, { "epoch": 0.46310778646718803, "grad_norm": 2.239027398676504, "learning_rate": 5.838694639494852e-06, "loss": 0.7516, "step": 1362 }, { "epoch": 0.4634478068684121, "grad_norm": 2.564558822755495, "learning_rate": 5.833264466880363e-06, "loss": 0.7493, "step": 1363 }, { "epoch": 0.4637878272696362, "grad_norm": 2.406933971641201, "learning_rate": 5.827833283190527e-06, "loss": 0.7643, "step": 1364 }, { "epoch": 0.46412784767086024, "grad_norm": 1.6911599873633802, "learning_rate": 5.8224010950154895e-06, "loss": 0.8361, "step": 1365 }, { "epoch": 0.46446786807208434, "grad_norm": 1.8143704851616336, "learning_rate": 5.81696790894662e-06, "loss": 0.8781, "step": 1366 }, { "epoch": 0.4648078884733084, "grad_norm": 1.9049878875916997, "learning_rate": 5.811533731576494e-06, "loss": 0.883, "step": 1367 }, { "epoch": 0.46514790887453245, "grad_norm": 1.6427893085515717, "learning_rate": 5.806098569498892e-06, "loss": 0.7631, "step": 1368 }, { "epoch": 0.46548792927575655, "grad_norm": 1.950478915992292, "learning_rate": 5.800662429308787e-06, "loss": 0.7777, "step": 1369 }, { "epoch": 0.4658279496769806, "grad_norm": 2.3387814639730995, "learning_rate": 5.795225317602344e-06, "loss": 0.7839, "step": 1370 }, { "epoch": 0.4661679700782047, "grad_norm": 2.3136990487853306, "learning_rate": 5.789787240976903e-06, "loss": 0.8801, "step": 1371 }, { "epoch": 0.46650799047942876, "grad_norm": 1.883331409095713, "learning_rate": 5.784348206030974e-06, "loss": 0.7718, "step": 1372 }, { "epoch": 0.46684801088065286, "grad_norm": 1.5552364347893213, "learning_rate": 5.778908219364234e-06, "loss": 0.7953, "step": 1373 }, { "epoch": 0.4671880312818769, "grad_norm": 1.8758755215294272, "learning_rate": 5.77346728757751e-06, "loss": 0.9304, "step": 1374 }, { "epoch": 0.46752805168310096, "grad_norm": 2.8864202166172857, "learning_rate": 5.768025417272779e-06, "loss": 0.8601, "step": 1375 }, { "epoch": 0.46786807208432507, "grad_norm": 1.7477299549548073, "learning_rate": 5.762582615053155e-06, "loss": 0.8618, "step": 1376 }, { "epoch": 0.4682080924855491, "grad_norm": 1.6359643840822298, "learning_rate": 5.757138887522884e-06, "loss": 0.8735, "step": 1377 }, { "epoch": 0.4685481128867732, "grad_norm": 1.9885997278079388, "learning_rate": 5.751694241287336e-06, "loss": 0.7201, "step": 1378 }, { "epoch": 0.4688881332879973, "grad_norm": 2.024775175272147, "learning_rate": 5.7462486829529895e-06, "loss": 0.9019, "step": 1379 }, { "epoch": 0.46922815368922133, "grad_norm": 2.1517611217388164, "learning_rate": 5.7408022191274385e-06, "loss": 0.7558, "step": 1380 }, { "epoch": 0.46956817409044543, "grad_norm": 1.6071554576538785, "learning_rate": 5.735354856419371e-06, "loss": 0.7544, "step": 1381 }, { "epoch": 0.4699081944916695, "grad_norm": 3.0679645169810588, "learning_rate": 5.729906601438564e-06, "loss": 0.6876, "step": 1382 }, { "epoch": 0.4702482148928936, "grad_norm": 1.7186871226619356, "learning_rate": 5.724457460795883e-06, "loss": 0.9415, "step": 1383 }, { "epoch": 0.47058823529411764, "grad_norm": 2.8408348818159648, "learning_rate": 5.71900744110326e-06, "loss": 0.7498, "step": 1384 }, { "epoch": 0.47092825569534175, "grad_norm": 1.5730885516835262, "learning_rate": 5.713556548973701e-06, "loss": 0.8499, "step": 1385 }, { "epoch": 0.4712682760965658, "grad_norm": 2.4054974564917146, "learning_rate": 5.708104791021267e-06, "loss": 0.7346, "step": 1386 }, { "epoch": 0.47160829649778985, "grad_norm": 7.6135309739245605, "learning_rate": 5.702652173861073e-06, "loss": 0.9721, "step": 1387 }, { "epoch": 0.47194831689901395, "grad_norm": 2.157238376626225, "learning_rate": 5.697198704109269e-06, "loss": 0.8068, "step": 1388 }, { "epoch": 0.472288337300238, "grad_norm": 2.0366968869428206, "learning_rate": 5.691744388383047e-06, "loss": 0.8825, "step": 1389 }, { "epoch": 0.4726283577014621, "grad_norm": 3.332007974752036, "learning_rate": 5.686289233300625e-06, "loss": 0.8573, "step": 1390 }, { "epoch": 0.47296837810268616, "grad_norm": 2.996038777755443, "learning_rate": 5.680833245481234e-06, "loss": 0.7937, "step": 1391 }, { "epoch": 0.4733083985039102, "grad_norm": 1.876848717220222, "learning_rate": 5.6753764315451196e-06, "loss": 0.8256, "step": 1392 }, { "epoch": 0.4736484189051343, "grad_norm": 1.7456685592190304, "learning_rate": 5.669918798113531e-06, "loss": 0.781, "step": 1393 }, { "epoch": 0.47398843930635837, "grad_norm": 2.1150837869236243, "learning_rate": 5.664460351808706e-06, "loss": 0.7863, "step": 1394 }, { "epoch": 0.4743284597075825, "grad_norm": 2.106099742367351, "learning_rate": 5.659001099253878e-06, "loss": 0.8522, "step": 1395 }, { "epoch": 0.4746684801088065, "grad_norm": 1.6430769745388225, "learning_rate": 5.653541047073248e-06, "loss": 0.8509, "step": 1396 }, { "epoch": 0.47500850051003063, "grad_norm": 2.7934049226507893, "learning_rate": 5.648080201891994e-06, "loss": 0.6624, "step": 1397 }, { "epoch": 0.4753485209112547, "grad_norm": 1.8064873975135194, "learning_rate": 5.642618570336258e-06, "loss": 0.7733, "step": 1398 }, { "epoch": 0.47568854131247873, "grad_norm": 1.7201690606896272, "learning_rate": 5.637156159033129e-06, "loss": 0.7874, "step": 1399 }, { "epoch": 0.47602856171370284, "grad_norm": 2.377775914178791, "learning_rate": 5.631692974610647e-06, "loss": 0.9048, "step": 1400 }, { "epoch": 0.4763685821149269, "grad_norm": 1.9190322261379347, "learning_rate": 5.626229023697789e-06, "loss": 0.8154, "step": 1401 }, { "epoch": 0.476708602516151, "grad_norm": 1.846727656609046, "learning_rate": 5.6207643129244625e-06, "loss": 0.7141, "step": 1402 }, { "epoch": 0.47704862291737504, "grad_norm": 1.6992506612080924, "learning_rate": 5.6152988489214985e-06, "loss": 0.8167, "step": 1403 }, { "epoch": 0.4773886433185991, "grad_norm": 1.8237274064366962, "learning_rate": 5.609832638320637e-06, "loss": 0.8415, "step": 1404 }, { "epoch": 0.4777286637198232, "grad_norm": 2.046569580094362, "learning_rate": 5.604365687754528e-06, "loss": 0.8745, "step": 1405 }, { "epoch": 0.47806868412104725, "grad_norm": 2.576331253597654, "learning_rate": 5.59889800385672e-06, "loss": 0.7876, "step": 1406 }, { "epoch": 0.47840870452227136, "grad_norm": 1.696243517927065, "learning_rate": 5.59342959326165e-06, "loss": 0.6787, "step": 1407 }, { "epoch": 0.4787487249234954, "grad_norm": 1.7387459270264907, "learning_rate": 5.587960462604634e-06, "loss": 0.7973, "step": 1408 }, { "epoch": 0.47908874532471946, "grad_norm": 2.1302859398907934, "learning_rate": 5.582490618521864e-06, "loss": 0.7099, "step": 1409 }, { "epoch": 0.47942876572594356, "grad_norm": 1.7380121057600302, "learning_rate": 5.5770200676504e-06, "loss": 0.877, "step": 1410 }, { "epoch": 0.4797687861271676, "grad_norm": 2.327507521749447, "learning_rate": 5.571548816628159e-06, "loss": 0.7612, "step": 1411 }, { "epoch": 0.4801088065283917, "grad_norm": 1.9212379412038696, "learning_rate": 5.5660768720939e-06, "loss": 0.8138, "step": 1412 }, { "epoch": 0.48044882692961577, "grad_norm": 1.6433392321151539, "learning_rate": 5.560604240687235e-06, "loss": 0.8439, "step": 1413 }, { "epoch": 0.4807888473308399, "grad_norm": 1.6964960554479955, "learning_rate": 5.555130929048603e-06, "loss": 0.8821, "step": 1414 }, { "epoch": 0.4811288677320639, "grad_norm": 1.8194872677059573, "learning_rate": 5.5496569438192695e-06, "loss": 0.7899, "step": 1415 }, { "epoch": 0.481468888133288, "grad_norm": 1.8072104696328097, "learning_rate": 5.544182291641317e-06, "loss": 0.7687, "step": 1416 }, { "epoch": 0.4818089085345121, "grad_norm": 1.7790457934236203, "learning_rate": 5.538706979157635e-06, "loss": 0.7862, "step": 1417 }, { "epoch": 0.48214892893573613, "grad_norm": 1.7951850721079083, "learning_rate": 5.533231013011919e-06, "loss": 0.7515, "step": 1418 }, { "epoch": 0.48248894933696024, "grad_norm": 1.5561662456874903, "learning_rate": 5.527754399848657e-06, "loss": 0.8133, "step": 1419 }, { "epoch": 0.4828289697381843, "grad_norm": 2.157936588540733, "learning_rate": 5.522277146313117e-06, "loss": 0.7538, "step": 1420 }, { "epoch": 0.48316899013940834, "grad_norm": 2.4290999302724288, "learning_rate": 5.51679925905135e-06, "loss": 0.7814, "step": 1421 }, { "epoch": 0.48350901054063244, "grad_norm": 1.841958993342082, "learning_rate": 5.511320744710171e-06, "loss": 0.8118, "step": 1422 }, { "epoch": 0.4838490309418565, "grad_norm": 2.09445138995467, "learning_rate": 5.505841609937162e-06, "loss": 0.771, "step": 1423 }, { "epoch": 0.4841890513430806, "grad_norm": 1.9025737655981094, "learning_rate": 5.500361861380651e-06, "loss": 0.8158, "step": 1424 }, { "epoch": 0.48452907174430465, "grad_norm": 1.593238831589755, "learning_rate": 5.494881505689714e-06, "loss": 0.845, "step": 1425 }, { "epoch": 0.48486909214552876, "grad_norm": 2.1406933443102902, "learning_rate": 5.489400549514165e-06, "loss": 0.7092, "step": 1426 }, { "epoch": 0.4852091125467528, "grad_norm": 2.5531087262152354, "learning_rate": 5.483918999504544e-06, "loss": 0.7776, "step": 1427 }, { "epoch": 0.48554913294797686, "grad_norm": 1.919105692583851, "learning_rate": 5.478436862312113e-06, "loss": 0.719, "step": 1428 }, { "epoch": 0.48588915334920096, "grad_norm": 1.8829313213513676, "learning_rate": 5.472954144588847e-06, "loss": 0.7841, "step": 1429 }, { "epoch": 0.486229173750425, "grad_norm": 1.4881374527192293, "learning_rate": 5.467470852987424e-06, "loss": 0.7724, "step": 1430 }, { "epoch": 0.4865691941516491, "grad_norm": 1.924768602698392, "learning_rate": 5.4619869941612204e-06, "loss": 0.7726, "step": 1431 }, { "epoch": 0.48690921455287317, "grad_norm": 1.5858390673137608, "learning_rate": 5.456502574764299e-06, "loss": 0.8339, "step": 1432 }, { "epoch": 0.4872492349540972, "grad_norm": 2.009244140179676, "learning_rate": 5.4510176014514e-06, "loss": 0.8099, "step": 1433 }, { "epoch": 0.4875892553553213, "grad_norm": 1.7556534967172963, "learning_rate": 5.445532080877942e-06, "loss": 0.8161, "step": 1434 }, { "epoch": 0.4879292757565454, "grad_norm": 2.1544729558220315, "learning_rate": 5.440046019700004e-06, "loss": 0.822, "step": 1435 }, { "epoch": 0.4882692961577695, "grad_norm": 2.050182429485166, "learning_rate": 5.434559424574323e-06, "loss": 0.7798, "step": 1436 }, { "epoch": 0.48860931655899353, "grad_norm": 2.250279830505212, "learning_rate": 5.429072302158279e-06, "loss": 0.772, "step": 1437 }, { "epoch": 0.48894933696021764, "grad_norm": 1.7152397535373827, "learning_rate": 5.4235846591098995e-06, "loss": 0.7366, "step": 1438 }, { "epoch": 0.4892893573614417, "grad_norm": 1.99011884513494, "learning_rate": 5.4180965020878365e-06, "loss": 0.7173, "step": 1439 }, { "epoch": 0.48962937776266574, "grad_norm": 2.173655615360162, "learning_rate": 5.41260783775137e-06, "loss": 0.7406, "step": 1440 }, { "epoch": 0.48996939816388985, "grad_norm": 2.2971002978470576, "learning_rate": 5.407118672760393e-06, "loss": 0.9206, "step": 1441 }, { "epoch": 0.4903094185651139, "grad_norm": 1.5023371228055133, "learning_rate": 5.401629013775408e-06, "loss": 0.8379, "step": 1442 }, { "epoch": 0.490649438966338, "grad_norm": 2.0910378047987375, "learning_rate": 5.396138867457517e-06, "loss": 0.829, "step": 1443 }, { "epoch": 0.49098945936756205, "grad_norm": 3.7490164898531675, "learning_rate": 5.39064824046841e-06, "loss": 0.9118, "step": 1444 }, { "epoch": 0.4913294797687861, "grad_norm": 2.126644865691582, "learning_rate": 5.385157139470365e-06, "loss": 0.865, "step": 1445 }, { "epoch": 0.4916695001700102, "grad_norm": 1.8516246611445681, "learning_rate": 5.379665571126232e-06, "loss": 0.8226, "step": 1446 }, { "epoch": 0.49200952057123426, "grad_norm": 2.3484056426904467, "learning_rate": 5.374173542099429e-06, "loss": 0.7315, "step": 1447 }, { "epoch": 0.49234954097245837, "grad_norm": 1.5288170614125434, "learning_rate": 5.368681059053934e-06, "loss": 0.791, "step": 1448 }, { "epoch": 0.4926895613736824, "grad_norm": 1.9647101647201137, "learning_rate": 5.363188128654272e-06, "loss": 0.7512, "step": 1449 }, { "epoch": 0.49302958177490647, "grad_norm": 1.5696810158458874, "learning_rate": 5.357694757565515e-06, "loss": 0.8299, "step": 1450 }, { "epoch": 0.4933696021761306, "grad_norm": 2.0603657799295165, "learning_rate": 5.352200952453268e-06, "loss": 0.8204, "step": 1451 }, { "epoch": 0.4937096225773546, "grad_norm": 1.9159683428169025, "learning_rate": 5.3467067199836665e-06, "loss": 0.832, "step": 1452 }, { "epoch": 0.49404964297857873, "grad_norm": 2.2261677908581343, "learning_rate": 5.341212066823356e-06, "loss": 0.8217, "step": 1453 }, { "epoch": 0.4943896633798028, "grad_norm": 1.81536044760645, "learning_rate": 5.335716999639499e-06, "loss": 0.7984, "step": 1454 }, { "epoch": 0.4947296837810269, "grad_norm": 2.9109596077188447, "learning_rate": 5.330221525099761e-06, "loss": 0.7177, "step": 1455 }, { "epoch": 0.49506970418225094, "grad_norm": 2.0524828104111554, "learning_rate": 5.3247256498722985e-06, "loss": 0.8618, "step": 1456 }, { "epoch": 0.495409724583475, "grad_norm": 1.7519967928504512, "learning_rate": 5.319229380625754e-06, "loss": 0.8823, "step": 1457 }, { "epoch": 0.4957497449846991, "grad_norm": 1.8637116727130303, "learning_rate": 5.31373272402925e-06, "loss": 0.7384, "step": 1458 }, { "epoch": 0.49608976538592314, "grad_norm": 2.152147896428193, "learning_rate": 5.308235686752379e-06, "loss": 0.8812, "step": 1459 }, { "epoch": 0.49642978578714725, "grad_norm": 2.078235845036691, "learning_rate": 5.302738275465196e-06, "loss": 0.7727, "step": 1460 }, { "epoch": 0.4967698061883713, "grad_norm": 2.102266420017895, "learning_rate": 5.297240496838206e-06, "loss": 0.8562, "step": 1461 }, { "epoch": 0.49710982658959535, "grad_norm": 1.8174835697796294, "learning_rate": 5.291742357542364e-06, "loss": 0.8048, "step": 1462 }, { "epoch": 0.49744984699081946, "grad_norm": 1.754386383349789, "learning_rate": 5.2862438642490634e-06, "loss": 0.7872, "step": 1463 }, { "epoch": 0.4977898673920435, "grad_norm": 4.158515391970638, "learning_rate": 5.280745023630119e-06, "loss": 0.7779, "step": 1464 }, { "epoch": 0.4981298877932676, "grad_norm": 2.4054339081607767, "learning_rate": 5.275245842357778e-06, "loss": 0.7462, "step": 1465 }, { "epoch": 0.49846990819449166, "grad_norm": 2.689734918133284, "learning_rate": 5.269746327104693e-06, "loss": 0.8174, "step": 1466 }, { "epoch": 0.49880992859571577, "grad_norm": 2.295691045707937, "learning_rate": 5.264246484543926e-06, "loss": 0.7969, "step": 1467 }, { "epoch": 0.4991499489969398, "grad_norm": 2.107973616659225, "learning_rate": 5.258746321348934e-06, "loss": 0.7944, "step": 1468 }, { "epoch": 0.49948996939816387, "grad_norm": 1.930350616477822, "learning_rate": 5.253245844193564e-06, "loss": 0.8304, "step": 1469 }, { "epoch": 0.499829989799388, "grad_norm": 1.7525142127819853, "learning_rate": 5.247745059752044e-06, "loss": 0.7762, "step": 1470 }, { "epoch": 0.500170010200612, "grad_norm": 1.8529587101036085, "learning_rate": 5.242243974698975e-06, "loss": 0.8314, "step": 1471 }, { "epoch": 0.5005100306018361, "grad_norm": 2.3903901423865457, "learning_rate": 5.236742595709321e-06, "loss": 0.7822, "step": 1472 }, { "epoch": 0.5008500510030602, "grad_norm": 2.0348324811278777, "learning_rate": 5.231240929458406e-06, "loss": 0.7494, "step": 1473 }, { "epoch": 0.5011900714042843, "grad_norm": 1.7162050480913733, "learning_rate": 5.225738982621898e-06, "loss": 0.9737, "step": 1474 }, { "epoch": 0.5015300918055083, "grad_norm": 2.2064804559522093, "learning_rate": 5.220236761875811e-06, "loss": 0.7815, "step": 1475 }, { "epoch": 0.5018701122067324, "grad_norm": 2.2251124219486798, "learning_rate": 5.214734273896488e-06, "loss": 0.7881, "step": 1476 }, { "epoch": 0.5022101326079564, "grad_norm": 2.981440295612256, "learning_rate": 5.209231525360594e-06, "loss": 0.8, "step": 1477 }, { "epoch": 0.5025501530091806, "grad_norm": 7.45515078847437, "learning_rate": 5.203728522945115e-06, "loss": 0.7911, "step": 1478 }, { "epoch": 0.5028901734104047, "grad_norm": 1.92941189117845, "learning_rate": 5.198225273327343e-06, "loss": 0.7445, "step": 1479 }, { "epoch": 0.5032301938116287, "grad_norm": 1.8975113113950242, "learning_rate": 5.1927217831848685e-06, "loss": 0.843, "step": 1480 }, { "epoch": 0.5035702142128528, "grad_norm": 2.1094734040883893, "learning_rate": 5.187218059195578e-06, "loss": 0.8277, "step": 1481 }, { "epoch": 0.5039102346140768, "grad_norm": 2.1248192008186964, "learning_rate": 5.181714108037635e-06, "loss": 0.7933, "step": 1482 }, { "epoch": 0.504250255015301, "grad_norm": 1.8280812151104824, "learning_rate": 5.176209936389485e-06, "loss": 0.7447, "step": 1483 }, { "epoch": 0.504590275416525, "grad_norm": 3.305850569207107, "learning_rate": 5.17070555092984e-06, "loss": 0.7644, "step": 1484 }, { "epoch": 0.5049302958177491, "grad_norm": 2.714270205426286, "learning_rate": 5.1652009583376676e-06, "loss": 0.7827, "step": 1485 }, { "epoch": 0.5052703162189731, "grad_norm": 3.091352082861896, "learning_rate": 5.159696165292189e-06, "loss": 0.8001, "step": 1486 }, { "epoch": 0.5056103366201972, "grad_norm": 1.593603634259395, "learning_rate": 5.154191178472873e-06, "loss": 0.8329, "step": 1487 }, { "epoch": 0.5059503570214213, "grad_norm": 2.1224743879515633, "learning_rate": 5.148686004559412e-06, "loss": 0.7409, "step": 1488 }, { "epoch": 0.5062903774226454, "grad_norm": 3.01066198517414, "learning_rate": 5.143180650231741e-06, "loss": 0.8388, "step": 1489 }, { "epoch": 0.5066303978238694, "grad_norm": 1.7136792637059917, "learning_rate": 5.13767512217e-06, "loss": 0.7341, "step": 1490 }, { "epoch": 0.5069704182250935, "grad_norm": 2.0631670268496096, "learning_rate": 5.1321694270545455e-06, "loss": 0.7773, "step": 1491 }, { "epoch": 0.5073104386263175, "grad_norm": 1.874567686624954, "learning_rate": 5.12666357156594e-06, "loss": 0.7862, "step": 1492 }, { "epoch": 0.5076504590275417, "grad_norm": 6.715516555383307, "learning_rate": 5.121157562384936e-06, "loss": 0.8309, "step": 1493 }, { "epoch": 0.5079904794287657, "grad_norm": 1.8974326591409743, "learning_rate": 5.115651406192473e-06, "loss": 0.8229, "step": 1494 }, { "epoch": 0.5083304998299898, "grad_norm": 1.7753191132738406, "learning_rate": 5.110145109669671e-06, "loss": 0.7212, "step": 1495 }, { "epoch": 0.5086705202312138, "grad_norm": 1.8871431993037888, "learning_rate": 5.104638679497818e-06, "loss": 0.7695, "step": 1496 }, { "epoch": 0.5090105406324379, "grad_norm": 2.1209005798272993, "learning_rate": 5.0991321223583655e-06, "loss": 0.8439, "step": 1497 }, { "epoch": 0.509350561033662, "grad_norm": 1.8469772744160644, "learning_rate": 5.093625444932917e-06, "loss": 0.806, "step": 1498 }, { "epoch": 0.5096905814348861, "grad_norm": 2.281929279661747, "learning_rate": 5.088118653903225e-06, "loss": 0.8326, "step": 1499 }, { "epoch": 0.5100306018361102, "grad_norm": 1.9613864164506285, "learning_rate": 5.08261175595118e-06, "loss": 0.6774, "step": 1500 }, { "epoch": 0.5103706222373342, "grad_norm": 3.9527172939988366, "learning_rate": 5.0771047577587995e-06, "loss": 0.7849, "step": 1501 }, { "epoch": 0.5107106426385584, "grad_norm": 3.9507534634382218, "learning_rate": 5.071597666008223e-06, "loss": 0.8327, "step": 1502 }, { "epoch": 0.5110506630397824, "grad_norm": 2.3477801057322587, "learning_rate": 5.066090487381705e-06, "loss": 0.7659, "step": 1503 }, { "epoch": 0.5113906834410065, "grad_norm": 1.6637921910839917, "learning_rate": 5.060583228561604e-06, "loss": 0.7807, "step": 1504 }, { "epoch": 0.5117307038422305, "grad_norm": 1.9869572742030395, "learning_rate": 5.055075896230379e-06, "loss": 0.8009, "step": 1505 }, { "epoch": 0.5120707242434546, "grad_norm": 2.7596350421531493, "learning_rate": 5.0495684970705725e-06, "loss": 0.8015, "step": 1506 }, { "epoch": 0.5124107446446787, "grad_norm": 2.5992292669330306, "learning_rate": 5.044061037764814e-06, "loss": 0.7465, "step": 1507 }, { "epoch": 0.5127507650459028, "grad_norm": 1.7505681170663439, "learning_rate": 5.0385535249958015e-06, "loss": 0.8182, "step": 1508 }, { "epoch": 0.5130907854471268, "grad_norm": 2.1632485122473404, "learning_rate": 5.033045965446303e-06, "loss": 0.7183, "step": 1509 }, { "epoch": 0.5134308058483509, "grad_norm": 1.753555699009244, "learning_rate": 5.027538365799135e-06, "loss": 0.7862, "step": 1510 }, { "epoch": 0.5137708262495749, "grad_norm": 1.9243297591580018, "learning_rate": 5.022030732737172e-06, "loss": 0.8458, "step": 1511 }, { "epoch": 0.5141108466507991, "grad_norm": 2.177617314164665, "learning_rate": 5.016523072943321e-06, "loss": 0.8359, "step": 1512 }, { "epoch": 0.5144508670520231, "grad_norm": 1.4739366045699038, "learning_rate": 5.011015393100529e-06, "loss": 0.7725, "step": 1513 }, { "epoch": 0.5147908874532472, "grad_norm": 1.9809330712521047, "learning_rate": 5.00550769989176e-06, "loss": 0.8364, "step": 1514 }, { "epoch": 0.5151309078544712, "grad_norm": 1.7781499734908552, "learning_rate": 5e-06, "loss": 0.7835, "step": 1515 }, { "epoch": 0.5154709282556953, "grad_norm": 1.8799570009883524, "learning_rate": 4.994492300108241e-06, "loss": 0.7751, "step": 1516 }, { "epoch": 0.5158109486569195, "grad_norm": 3.073785329201592, "learning_rate": 4.988984606899473e-06, "loss": 0.7751, "step": 1517 }, { "epoch": 0.5161509690581435, "grad_norm": 1.776054822698832, "learning_rate": 4.9834769270566805e-06, "loss": 0.853, "step": 1518 }, { "epoch": 0.5164909894593676, "grad_norm": 1.7634367926127321, "learning_rate": 4.977969267262829e-06, "loss": 0.8076, "step": 1519 }, { "epoch": 0.5168310098605916, "grad_norm": 4.086423843722984, "learning_rate": 4.972461634200866e-06, "loss": 0.9011, "step": 1520 }, { "epoch": 0.5171710302618157, "grad_norm": 1.6421236623641557, "learning_rate": 4.966954034553699e-06, "loss": 0.8642, "step": 1521 }, { "epoch": 0.5175110506630398, "grad_norm": 1.8584454764781853, "learning_rate": 4.961446475004199e-06, "loss": 0.7653, "step": 1522 }, { "epoch": 0.5178510710642639, "grad_norm": 1.4656599468459788, "learning_rate": 4.955938962235186e-06, "loss": 0.8445, "step": 1523 }, { "epoch": 0.5181910914654879, "grad_norm": 1.8519620362169094, "learning_rate": 4.950431502929428e-06, "loss": 0.7378, "step": 1524 }, { "epoch": 0.518531111866712, "grad_norm": 2.3725424976917413, "learning_rate": 4.944924103769623e-06, "loss": 0.7779, "step": 1525 }, { "epoch": 0.5188711322679361, "grad_norm": 1.7226943911959502, "learning_rate": 4.939416771438397e-06, "loss": 0.7654, "step": 1526 }, { "epoch": 0.5192111526691602, "grad_norm": 1.8557184031747187, "learning_rate": 4.933909512618298e-06, "loss": 0.863, "step": 1527 }, { "epoch": 0.5195511730703842, "grad_norm": 2.115844790979398, "learning_rate": 4.928402333991777e-06, "loss": 0.7592, "step": 1528 }, { "epoch": 0.5198911934716083, "grad_norm": 1.6348805097468087, "learning_rate": 4.922895242241202e-06, "loss": 0.8855, "step": 1529 }, { "epoch": 0.5202312138728323, "grad_norm": 2.3404047261939858, "learning_rate": 4.91738824404882e-06, "loss": 0.8327, "step": 1530 }, { "epoch": 0.5205712342740565, "grad_norm": 1.8623557309255268, "learning_rate": 4.9118813460967754e-06, "loss": 0.8303, "step": 1531 }, { "epoch": 0.5209112546752805, "grad_norm": 2.2490241499645847, "learning_rate": 4.906374555067085e-06, "loss": 0.8482, "step": 1532 }, { "epoch": 0.5212512750765046, "grad_norm": 2.331643434530923, "learning_rate": 4.900867877641636e-06, "loss": 0.839, "step": 1533 }, { "epoch": 0.5215912954777286, "grad_norm": 2.095247817090777, "learning_rate": 4.895361320502185e-06, "loss": 0.7988, "step": 1534 }, { "epoch": 0.5219313158789527, "grad_norm": 1.723635906341325, "learning_rate": 4.88985489033033e-06, "loss": 0.7934, "step": 1535 }, { "epoch": 0.5222713362801769, "grad_norm": 2.046788329469224, "learning_rate": 4.8843485938075286e-06, "loss": 0.817, "step": 1536 }, { "epoch": 0.5226113566814009, "grad_norm": 2.0488449285989003, "learning_rate": 4.878842437615065e-06, "loss": 0.7112, "step": 1537 }, { "epoch": 0.522951377082625, "grad_norm": 1.9845826095458905, "learning_rate": 4.873336428434062e-06, "loss": 0.759, "step": 1538 }, { "epoch": 0.523291397483849, "grad_norm": 4.138763157740066, "learning_rate": 4.8678305729454545e-06, "loss": 0.8152, "step": 1539 }, { "epoch": 0.5236314178850731, "grad_norm": 3.647861718118265, "learning_rate": 4.862324877830003e-06, "loss": 0.8438, "step": 1540 }, { "epoch": 0.5239714382862972, "grad_norm": 2.2618049383095196, "learning_rate": 4.856819349768262e-06, "loss": 0.7159, "step": 1541 }, { "epoch": 0.5243114586875213, "grad_norm": 1.5426259829995164, "learning_rate": 4.851313995440589e-06, "loss": 0.7474, "step": 1542 }, { "epoch": 0.5246514790887453, "grad_norm": 1.9326266116251898, "learning_rate": 4.845808821527131e-06, "loss": 0.7739, "step": 1543 }, { "epoch": 0.5249914994899694, "grad_norm": 1.838847556325622, "learning_rate": 4.840303834707811e-06, "loss": 0.7753, "step": 1544 }, { "epoch": 0.5253315198911934, "grad_norm": 1.7052452591835734, "learning_rate": 4.834799041662333e-06, "loss": 0.6825, "step": 1545 }, { "epoch": 0.5256715402924176, "grad_norm": 2.2889041372273056, "learning_rate": 4.829294449070161e-06, "loss": 0.8191, "step": 1546 }, { "epoch": 0.5260115606936416, "grad_norm": 3.1528847052040416, "learning_rate": 4.8237900636105154e-06, "loss": 0.8092, "step": 1547 }, { "epoch": 0.5263515810948657, "grad_norm": 4.090914456990795, "learning_rate": 4.818285891962367e-06, "loss": 0.8098, "step": 1548 }, { "epoch": 0.5266916014960897, "grad_norm": 1.659291969185622, "learning_rate": 4.812781940804424e-06, "loss": 0.8033, "step": 1549 }, { "epoch": 0.5270316218973138, "grad_norm": 2.134430251801482, "learning_rate": 4.807278216815132e-06, "loss": 0.8078, "step": 1550 }, { "epoch": 0.527371642298538, "grad_norm": 3.1931811524180778, "learning_rate": 4.801774726672658e-06, "loss": 0.9237, "step": 1551 }, { "epoch": 0.527711662699762, "grad_norm": 2.616206525270748, "learning_rate": 4.796271477054887e-06, "loss": 0.7764, "step": 1552 }, { "epoch": 0.528051683100986, "grad_norm": 2.221145968614602, "learning_rate": 4.790768474639407e-06, "loss": 0.8206, "step": 1553 }, { "epoch": 0.5283917035022101, "grad_norm": 2.892055480983333, "learning_rate": 4.785265726103514e-06, "loss": 0.7451, "step": 1554 }, { "epoch": 0.5287317239034343, "grad_norm": 1.6659559597323008, "learning_rate": 4.77976323812419e-06, "loss": 0.8594, "step": 1555 }, { "epoch": 0.5290717443046583, "grad_norm": 1.9584708522977028, "learning_rate": 4.7742610173781025e-06, "loss": 0.7449, "step": 1556 }, { "epoch": 0.5294117647058824, "grad_norm": 3.613058182805375, "learning_rate": 4.768759070541596e-06, "loss": 0.8322, "step": 1557 }, { "epoch": 0.5297517851071064, "grad_norm": 1.9177605042321149, "learning_rate": 4.76325740429068e-06, "loss": 0.8372, "step": 1558 }, { "epoch": 0.5300918055083305, "grad_norm": 1.9899246030541402, "learning_rate": 4.7577560253010275e-06, "loss": 0.7641, "step": 1559 }, { "epoch": 0.5304318259095546, "grad_norm": 2.3476977109929975, "learning_rate": 4.752254940247956e-06, "loss": 0.8484, "step": 1560 }, { "epoch": 0.5307718463107787, "grad_norm": 1.8469551861867957, "learning_rate": 4.746754155806437e-06, "loss": 0.8196, "step": 1561 }, { "epoch": 0.5311118667120027, "grad_norm": 2.1425767308127495, "learning_rate": 4.741253678651067e-06, "loss": 0.86, "step": 1562 }, { "epoch": 0.5314518871132268, "grad_norm": 2.4488270324011054, "learning_rate": 4.735753515456076e-06, "loss": 0.801, "step": 1563 }, { "epoch": 0.5317919075144508, "grad_norm": 2.5625039316596947, "learning_rate": 4.7302536728953095e-06, "loss": 0.7215, "step": 1564 }, { "epoch": 0.532131927915675, "grad_norm": 2.330648518728265, "learning_rate": 4.724754157642223e-06, "loss": 0.8298, "step": 1565 }, { "epoch": 0.532471948316899, "grad_norm": 1.6442430241473305, "learning_rate": 4.719254976369882e-06, "loss": 0.8346, "step": 1566 }, { "epoch": 0.5328119687181231, "grad_norm": 2.06375287355313, "learning_rate": 4.713756135750939e-06, "loss": 0.8094, "step": 1567 }, { "epoch": 0.5331519891193471, "grad_norm": 1.8032858583233626, "learning_rate": 4.708257642457637e-06, "loss": 0.7847, "step": 1568 }, { "epoch": 0.5334920095205712, "grad_norm": 2.9072319400722106, "learning_rate": 4.702759503161794e-06, "loss": 0.7787, "step": 1569 }, { "epoch": 0.5338320299217953, "grad_norm": 2.737668124200652, "learning_rate": 4.697261724534805e-06, "loss": 0.9145, "step": 1570 }, { "epoch": 0.5341720503230194, "grad_norm": 1.5899160859318942, "learning_rate": 4.691764313247621e-06, "loss": 0.806, "step": 1571 }, { "epoch": 0.5345120707242434, "grad_norm": 2.128696822075121, "learning_rate": 4.686267275970751e-06, "loss": 0.8027, "step": 1572 }, { "epoch": 0.5348520911254675, "grad_norm": 6.327392776694456, "learning_rate": 4.680770619374248e-06, "loss": 0.9375, "step": 1573 }, { "epoch": 0.5351921115266915, "grad_norm": 1.8507733306360983, "learning_rate": 4.675274350127702e-06, "loss": 0.6373, "step": 1574 }, { "epoch": 0.5355321319279157, "grad_norm": 3.2014249362155764, "learning_rate": 4.669778474900241e-06, "loss": 0.7396, "step": 1575 }, { "epoch": 0.5358721523291398, "grad_norm": 1.8220955607716283, "learning_rate": 4.664283000360501e-06, "loss": 0.7536, "step": 1576 }, { "epoch": 0.5362121727303638, "grad_norm": 2.0103763713686202, "learning_rate": 4.6587879331766465e-06, "loss": 0.766, "step": 1577 }, { "epoch": 0.5365521931315879, "grad_norm": 1.9882582274236145, "learning_rate": 4.653293280016335e-06, "loss": 0.7164, "step": 1578 }, { "epoch": 0.536892213532812, "grad_norm": 2.332642295450942, "learning_rate": 4.647799047546733e-06, "loss": 0.804, "step": 1579 }, { "epoch": 0.5372322339340361, "grad_norm": 2.1481887049056367, "learning_rate": 4.642305242434488e-06, "loss": 0.8621, "step": 1580 }, { "epoch": 0.5375722543352601, "grad_norm": 2.233927473620768, "learning_rate": 4.63681187134573e-06, "loss": 0.8682, "step": 1581 }, { "epoch": 0.5379122747364842, "grad_norm": 1.8595284687765963, "learning_rate": 4.6313189409460694e-06, "loss": 0.8078, "step": 1582 }, { "epoch": 0.5382522951377082, "grad_norm": 1.6189064806741513, "learning_rate": 4.625826457900573e-06, "loss": 0.7825, "step": 1583 }, { "epoch": 0.5385923155389324, "grad_norm": 1.7109496499151693, "learning_rate": 4.62033442887377e-06, "loss": 0.8204, "step": 1584 }, { "epoch": 0.5389323359401564, "grad_norm": 1.9628012655876577, "learning_rate": 4.614842860529636e-06, "loss": 0.7718, "step": 1585 }, { "epoch": 0.5392723563413805, "grad_norm": 2.215853939912509, "learning_rate": 4.6093517595315906e-06, "loss": 0.8478, "step": 1586 }, { "epoch": 0.5396123767426045, "grad_norm": 1.7285326667444412, "learning_rate": 4.603861132542484e-06, "loss": 0.7447, "step": 1587 }, { "epoch": 0.5399523971438286, "grad_norm": 1.8991399112924394, "learning_rate": 4.598370986224594e-06, "loss": 0.804, "step": 1588 }, { "epoch": 0.5402924175450527, "grad_norm": 2.1876690348212477, "learning_rate": 4.59288132723961e-06, "loss": 0.827, "step": 1589 }, { "epoch": 0.5406324379462768, "grad_norm": 3.388267144912403, "learning_rate": 4.587392162248631e-06, "loss": 0.9509, "step": 1590 }, { "epoch": 0.5409724583475009, "grad_norm": 2.75901823504875, "learning_rate": 4.581903497912164e-06, "loss": 0.8255, "step": 1591 }, { "epoch": 0.5413124787487249, "grad_norm": 3.0797491504488193, "learning_rate": 4.576415340890101e-06, "loss": 0.9066, "step": 1592 }, { "epoch": 0.541652499149949, "grad_norm": 1.7281255669918878, "learning_rate": 4.570927697841722e-06, "loss": 0.8885, "step": 1593 }, { "epoch": 0.5419925195511731, "grad_norm": 1.780260195431119, "learning_rate": 4.565440575425678e-06, "loss": 0.8186, "step": 1594 }, { "epoch": 0.5423325399523972, "grad_norm": 1.698715980373987, "learning_rate": 4.559953980299998e-06, "loss": 0.7423, "step": 1595 }, { "epoch": 0.5426725603536212, "grad_norm": 1.8233055697908436, "learning_rate": 4.554467919122061e-06, "loss": 0.7461, "step": 1596 }, { "epoch": 0.5430125807548453, "grad_norm": 2.0732693883965747, "learning_rate": 4.548982398548601e-06, "loss": 0.8519, "step": 1597 }, { "epoch": 0.5433526011560693, "grad_norm": 4.076606566555027, "learning_rate": 4.543497425235705e-06, "loss": 0.8375, "step": 1598 }, { "epoch": 0.5436926215572935, "grad_norm": 3.726800785573923, "learning_rate": 4.538013005838781e-06, "loss": 0.8457, "step": 1599 }, { "epoch": 0.5440326419585175, "grad_norm": 1.640942051570328, "learning_rate": 4.532529147012578e-06, "loss": 0.7555, "step": 1600 }, { "epoch": 0.5443726623597416, "grad_norm": 2.1236371326115004, "learning_rate": 4.527045855411153e-06, "loss": 0.7701, "step": 1601 }, { "epoch": 0.5447126827609656, "grad_norm": 1.7181777314185185, "learning_rate": 4.521563137687889e-06, "loss": 0.8164, "step": 1602 }, { "epoch": 0.5450527031621897, "grad_norm": 2.448664581131007, "learning_rate": 4.516081000495458e-06, "loss": 0.8668, "step": 1603 }, { "epoch": 0.5453927235634138, "grad_norm": 2.494208677148641, "learning_rate": 4.510599450485838e-06, "loss": 0.8405, "step": 1604 }, { "epoch": 0.5457327439646379, "grad_norm": 1.9946946250715516, "learning_rate": 4.505118494310289e-06, "loss": 0.8654, "step": 1605 }, { "epoch": 0.5460727643658619, "grad_norm": 2.016493132082253, "learning_rate": 4.499638138619351e-06, "loss": 0.7986, "step": 1606 }, { "epoch": 0.546412784767086, "grad_norm": 5.5575317498533705, "learning_rate": 4.49415839006284e-06, "loss": 0.8583, "step": 1607 }, { "epoch": 0.5467528051683102, "grad_norm": 1.5958424468455283, "learning_rate": 4.488679255289829e-06, "loss": 0.7993, "step": 1608 }, { "epoch": 0.5470928255695342, "grad_norm": 3.075588169517648, "learning_rate": 4.483200740948652e-06, "loss": 0.6526, "step": 1609 }, { "epoch": 0.5474328459707583, "grad_norm": 2.27438391686465, "learning_rate": 4.477722853686883e-06, "loss": 0.7749, "step": 1610 }, { "epoch": 0.5477728663719823, "grad_norm": 1.899436303639678, "learning_rate": 4.472245600151344e-06, "loss": 0.7449, "step": 1611 }, { "epoch": 0.5481128867732064, "grad_norm": 1.8159424708385177, "learning_rate": 4.466768986988082e-06, "loss": 0.7725, "step": 1612 }, { "epoch": 0.5484529071744305, "grad_norm": 2.3206076275584118, "learning_rate": 4.461293020842366e-06, "loss": 0.8011, "step": 1613 }, { "epoch": 0.5487929275756546, "grad_norm": 2.5334818132018735, "learning_rate": 4.4558177083586855e-06, "loss": 0.8291, "step": 1614 }, { "epoch": 0.5491329479768786, "grad_norm": 1.824370986639498, "learning_rate": 4.450343056180731e-06, "loss": 0.8763, "step": 1615 }, { "epoch": 0.5494729683781027, "grad_norm": 2.265085787284215, "learning_rate": 4.444869070951398e-06, "loss": 0.7383, "step": 1616 }, { "epoch": 0.5498129887793267, "grad_norm": 1.5706339248830496, "learning_rate": 4.439395759312765e-06, "loss": 0.7321, "step": 1617 }, { "epoch": 0.5501530091805509, "grad_norm": 1.8450960380829842, "learning_rate": 4.433923127906101e-06, "loss": 0.8253, "step": 1618 }, { "epoch": 0.5504930295817749, "grad_norm": 1.8895954088436864, "learning_rate": 4.428451183371844e-06, "loss": 0.7584, "step": 1619 }, { "epoch": 0.550833049982999, "grad_norm": 1.7647869933641376, "learning_rate": 4.422979932349601e-06, "loss": 0.8461, "step": 1620 }, { "epoch": 0.551173070384223, "grad_norm": 1.726859257880695, "learning_rate": 4.417509381478139e-06, "loss": 0.9478, "step": 1621 }, { "epoch": 0.5515130907854471, "grad_norm": 1.7830292603282358, "learning_rate": 4.412039537395369e-06, "loss": 0.8192, "step": 1622 }, { "epoch": 0.5518531111866712, "grad_norm": 2.135528104500953, "learning_rate": 4.4065704067383526e-06, "loss": 0.789, "step": 1623 }, { "epoch": 0.5521931315878953, "grad_norm": 2.2835744809721947, "learning_rate": 4.401101996143281e-06, "loss": 0.7897, "step": 1624 }, { "epoch": 0.5525331519891193, "grad_norm": 1.9968779612406193, "learning_rate": 4.395634312245473e-06, "loss": 0.8017, "step": 1625 }, { "epoch": 0.5528731723903434, "grad_norm": 1.7386489203169355, "learning_rate": 4.390167361679363e-06, "loss": 0.8258, "step": 1626 }, { "epoch": 0.5532131927915674, "grad_norm": 2.3880896647187813, "learning_rate": 4.384701151078502e-06, "loss": 0.6548, "step": 1627 }, { "epoch": 0.5535532131927916, "grad_norm": 2.3052787463523248, "learning_rate": 4.379235687075538e-06, "loss": 0.8939, "step": 1628 }, { "epoch": 0.5538932335940157, "grad_norm": 1.9456406943708848, "learning_rate": 4.373770976302212e-06, "loss": 0.7207, "step": 1629 }, { "epoch": 0.5542332539952397, "grad_norm": 2.233470695480692, "learning_rate": 4.368307025389355e-06, "loss": 0.9426, "step": 1630 }, { "epoch": 0.5545732743964638, "grad_norm": 1.9001780195038485, "learning_rate": 4.362843840966872e-06, "loss": 0.7396, "step": 1631 }, { "epoch": 0.5549132947976878, "grad_norm": 1.6170663156516558, "learning_rate": 4.357381429663744e-06, "loss": 0.7398, "step": 1632 }, { "epoch": 0.555253315198912, "grad_norm": 1.7903568747147653, "learning_rate": 4.351919798108006e-06, "loss": 0.7973, "step": 1633 }, { "epoch": 0.555593335600136, "grad_norm": 1.8519949395264552, "learning_rate": 4.346458952926754e-06, "loss": 0.7845, "step": 1634 }, { "epoch": 0.5559333560013601, "grad_norm": 2.312852525979355, "learning_rate": 4.340998900746123e-06, "loss": 0.7661, "step": 1635 }, { "epoch": 0.5562733764025841, "grad_norm": 1.9538845003355985, "learning_rate": 4.335539648191295e-06, "loss": 0.8089, "step": 1636 }, { "epoch": 0.5566133968038083, "grad_norm": 2.0572388515661495, "learning_rate": 4.330081201886473e-06, "loss": 0.8594, "step": 1637 }, { "epoch": 0.5569534172050323, "grad_norm": 1.7570956508285698, "learning_rate": 4.324623568454881e-06, "loss": 0.7019, "step": 1638 }, { "epoch": 0.5572934376062564, "grad_norm": 2.6666409487156297, "learning_rate": 4.319166754518768e-06, "loss": 0.8802, "step": 1639 }, { "epoch": 0.5576334580074804, "grad_norm": 8.161817414104679, "learning_rate": 4.313710766699377e-06, "loss": 0.8173, "step": 1640 }, { "epoch": 0.5579734784087045, "grad_norm": 1.5517265216589782, "learning_rate": 4.308255611616954e-06, "loss": 0.7627, "step": 1641 }, { "epoch": 0.5583134988099286, "grad_norm": 2.276892346390744, "learning_rate": 4.302801295890731e-06, "loss": 0.8266, "step": 1642 }, { "epoch": 0.5586535192111527, "grad_norm": 1.5667650901676113, "learning_rate": 4.297347826138929e-06, "loss": 0.7707, "step": 1643 }, { "epoch": 0.5589935396123767, "grad_norm": 1.6949899190570468, "learning_rate": 4.291895208978734e-06, "loss": 0.7413, "step": 1644 }, { "epoch": 0.5593335600136008, "grad_norm": 2.006877875279477, "learning_rate": 4.2864434510263e-06, "loss": 0.7829, "step": 1645 }, { "epoch": 0.5596735804148248, "grad_norm": 3.095263458414816, "learning_rate": 4.280992558896742e-06, "loss": 0.7722, "step": 1646 }, { "epoch": 0.560013600816049, "grad_norm": 2.240127359014377, "learning_rate": 4.275542539204118e-06, "loss": 0.7562, "step": 1647 }, { "epoch": 0.5603536212172731, "grad_norm": 2.1541576931196107, "learning_rate": 4.270093398561437e-06, "loss": 0.7223, "step": 1648 }, { "epoch": 0.5606936416184971, "grad_norm": 1.890091367677857, "learning_rate": 4.26464514358063e-06, "loss": 0.7961, "step": 1649 }, { "epoch": 0.5610336620197212, "grad_norm": 2.1168877878836514, "learning_rate": 4.259197780872562e-06, "loss": 0.8332, "step": 1650 }, { "epoch": 0.5613736824209452, "grad_norm": 1.7140754905216478, "learning_rate": 4.2537513170470105e-06, "loss": 0.8327, "step": 1651 }, { "epoch": 0.5617137028221694, "grad_norm": 2.441707848688923, "learning_rate": 4.248305758712666e-06, "loss": 0.7136, "step": 1652 }, { "epoch": 0.5620537232233934, "grad_norm": 2.2663018210778483, "learning_rate": 4.2428611124771184e-06, "loss": 0.7338, "step": 1653 }, { "epoch": 0.5623937436246175, "grad_norm": 1.7042736119872506, "learning_rate": 4.237417384946846e-06, "loss": 0.8221, "step": 1654 }, { "epoch": 0.5627337640258415, "grad_norm": 1.7857448847407418, "learning_rate": 4.231974582727223e-06, "loss": 0.8938, "step": 1655 }, { "epoch": 0.5630737844270656, "grad_norm": 8.534990207003116, "learning_rate": 4.226532712422492e-06, "loss": 0.8593, "step": 1656 }, { "epoch": 0.5634138048282897, "grad_norm": 2.0702826945140456, "learning_rate": 4.221091780635768e-06, "loss": 0.8043, "step": 1657 }, { "epoch": 0.5637538252295138, "grad_norm": 1.880149571201394, "learning_rate": 4.215651793969026e-06, "loss": 0.7408, "step": 1658 }, { "epoch": 0.5640938456307378, "grad_norm": 1.9094946983165033, "learning_rate": 4.210212759023099e-06, "loss": 0.85, "step": 1659 }, { "epoch": 0.5644338660319619, "grad_norm": 2.0826267533303144, "learning_rate": 4.204774682397658e-06, "loss": 0.7968, "step": 1660 }, { "epoch": 0.564773886433186, "grad_norm": 1.9576152950783854, "learning_rate": 4.199337570691214e-06, "loss": 0.7934, "step": 1661 }, { "epoch": 0.5651139068344101, "grad_norm": 1.8605000019574227, "learning_rate": 4.1939014305011116e-06, "loss": 0.7489, "step": 1662 }, { "epoch": 0.5654539272356341, "grad_norm": 1.7452937853609298, "learning_rate": 4.188466268423507e-06, "loss": 0.798, "step": 1663 }, { "epoch": 0.5657939476368582, "grad_norm": 2.2351881221107233, "learning_rate": 4.183032091053381e-06, "loss": 0.7977, "step": 1664 }, { "epoch": 0.5661339680380822, "grad_norm": 1.949488664977891, "learning_rate": 4.1775989049845105e-06, "loss": 0.7882, "step": 1665 }, { "epoch": 0.5664739884393064, "grad_norm": 1.9412918778769286, "learning_rate": 4.172166716809475e-06, "loss": 0.8033, "step": 1666 }, { "epoch": 0.5668140088405305, "grad_norm": 1.8838839464107233, "learning_rate": 4.166735533119638e-06, "loss": 0.7347, "step": 1667 }, { "epoch": 0.5671540292417545, "grad_norm": 2.4094539519885823, "learning_rate": 4.16130536050515e-06, "loss": 0.8985, "step": 1668 }, { "epoch": 0.5674940496429786, "grad_norm": 1.832393433721923, "learning_rate": 4.155876205554931e-06, "loss": 0.7948, "step": 1669 }, { "epoch": 0.5678340700442026, "grad_norm": 1.7842845158543639, "learning_rate": 4.150448074856667e-06, "loss": 0.856, "step": 1670 }, { "epoch": 0.5681740904454268, "grad_norm": 5.093298965869656, "learning_rate": 4.145020974996802e-06, "loss": 0.8544, "step": 1671 }, { "epoch": 0.5685141108466508, "grad_norm": 1.8288966692154494, "learning_rate": 4.139594912560526e-06, "loss": 0.7695, "step": 1672 }, { "epoch": 0.5688541312478749, "grad_norm": 2.5087803047490196, "learning_rate": 4.134169894131776e-06, "loss": 0.8, "step": 1673 }, { "epoch": 0.5691941516490989, "grad_norm": 3.9759397871617006, "learning_rate": 4.1287459262932164e-06, "loss": 0.8681, "step": 1674 }, { "epoch": 0.569534172050323, "grad_norm": 1.7556306149575895, "learning_rate": 4.123323015626241e-06, "loss": 0.9425, "step": 1675 }, { "epoch": 0.5698741924515471, "grad_norm": 1.822886620788618, "learning_rate": 4.11790116871096e-06, "loss": 0.8339, "step": 1676 }, { "epoch": 0.5702142128527712, "grad_norm": 2.32577771957017, "learning_rate": 4.112480392126187e-06, "loss": 0.7799, "step": 1677 }, { "epoch": 0.5705542332539952, "grad_norm": 2.3654748461284267, "learning_rate": 4.107060692449447e-06, "loss": 0.7794, "step": 1678 }, { "epoch": 0.5708942536552193, "grad_norm": 2.1326059990159, "learning_rate": 4.1016420762569496e-06, "loss": 0.6922, "step": 1679 }, { "epoch": 0.5712342740564433, "grad_norm": 1.6500811511500117, "learning_rate": 4.096224550123597e-06, "loss": 0.9321, "step": 1680 }, { "epoch": 0.5715742944576675, "grad_norm": 2.288324199496334, "learning_rate": 4.090808120622961e-06, "loss": 0.8088, "step": 1681 }, { "epoch": 0.5719143148588915, "grad_norm": 3.335932302163143, "learning_rate": 4.08539279432729e-06, "loss": 0.7918, "step": 1682 }, { "epoch": 0.5722543352601156, "grad_norm": 2.018206897778278, "learning_rate": 4.079978577807487e-06, "loss": 0.8091, "step": 1683 }, { "epoch": 0.5725943556613396, "grad_norm": 1.8917366828566053, "learning_rate": 4.074565477633117e-06, "loss": 0.8174, "step": 1684 }, { "epoch": 0.5729343760625637, "grad_norm": 1.6840308709452019, "learning_rate": 4.069153500372382e-06, "loss": 0.794, "step": 1685 }, { "epoch": 0.5732743964637879, "grad_norm": 2.0560413539850972, "learning_rate": 4.063742652592125e-06, "loss": 0.8338, "step": 1686 }, { "epoch": 0.5736144168650119, "grad_norm": 2.715704335230041, "learning_rate": 4.0583329408578185e-06, "loss": 0.8608, "step": 1687 }, { "epoch": 0.573954437266236, "grad_norm": 1.8308222638540965, "learning_rate": 4.052924371733555e-06, "loss": 0.7391, "step": 1688 }, { "epoch": 0.57429445766746, "grad_norm": 3.93217327404691, "learning_rate": 4.047516951782046e-06, "loss": 0.8336, "step": 1689 }, { "epoch": 0.5746344780686842, "grad_norm": 1.7155299358553424, "learning_rate": 4.0421106875646e-06, "loss": 0.7387, "step": 1690 }, { "epoch": 0.5749744984699082, "grad_norm": 2.924796486558408, "learning_rate": 4.036705585641131e-06, "loss": 0.8656, "step": 1691 }, { "epoch": 0.5753145188711323, "grad_norm": 2.1154010899917015, "learning_rate": 4.031301652570139e-06, "loss": 0.8103, "step": 1692 }, { "epoch": 0.5756545392723563, "grad_norm": 1.7593975839358962, "learning_rate": 4.0258988949087015e-06, "loss": 0.7343, "step": 1693 }, { "epoch": 0.5759945596735804, "grad_norm": 7.277324615448209, "learning_rate": 4.020497319212482e-06, "loss": 0.9342, "step": 1694 }, { "epoch": 0.5763345800748045, "grad_norm": 1.8762066741865282, "learning_rate": 4.015096932035695e-06, "loss": 0.8569, "step": 1695 }, { "epoch": 0.5766746004760286, "grad_norm": 2.5094661743116227, "learning_rate": 4.009697739931125e-06, "loss": 0.7803, "step": 1696 }, { "epoch": 0.5770146208772526, "grad_norm": 2.368718744294282, "learning_rate": 4.004299749450099e-06, "loss": 0.7593, "step": 1697 }, { "epoch": 0.5773546412784767, "grad_norm": 1.7379622792872598, "learning_rate": 3.99890296714249e-06, "loss": 0.8102, "step": 1698 }, { "epoch": 0.5776946616797007, "grad_norm": 1.7950661253826894, "learning_rate": 3.993507399556699e-06, "loss": 0.8261, "step": 1699 }, { "epoch": 0.5780346820809249, "grad_norm": 1.7895033857380052, "learning_rate": 3.988113053239664e-06, "loss": 0.7831, "step": 1700 }, { "epoch": 0.578374702482149, "grad_norm": 2.513724608663744, "learning_rate": 3.982719934736832e-06, "loss": 0.7863, "step": 1701 }, { "epoch": 0.578714722883373, "grad_norm": 1.7341110425502526, "learning_rate": 3.977328050592161e-06, "loss": 0.9247, "step": 1702 }, { "epoch": 0.579054743284597, "grad_norm": 1.8845071679357839, "learning_rate": 3.971937407348115e-06, "loss": 0.8488, "step": 1703 }, { "epoch": 0.5793947636858211, "grad_norm": 1.8072089808348282, "learning_rate": 3.966548011545648e-06, "loss": 0.7179, "step": 1704 }, { "epoch": 0.5797347840870453, "grad_norm": 3.0702338796288307, "learning_rate": 3.961159869724207e-06, "loss": 0.7202, "step": 1705 }, { "epoch": 0.5800748044882693, "grad_norm": 1.759787279998497, "learning_rate": 3.955772988421709e-06, "loss": 0.768, "step": 1706 }, { "epoch": 0.5804148248894934, "grad_norm": 2.3433623345553034, "learning_rate": 3.950387374174548e-06, "loss": 0.6933, "step": 1707 }, { "epoch": 0.5807548452907174, "grad_norm": 1.7657990260540302, "learning_rate": 3.945003033517578e-06, "loss": 0.7882, "step": 1708 }, { "epoch": 0.5810948656919415, "grad_norm": 1.6247662467902115, "learning_rate": 3.9396199729841044e-06, "loss": 0.7497, "step": 1709 }, { "epoch": 0.5814348860931656, "grad_norm": 4.31861063498843, "learning_rate": 3.934238199105887e-06, "loss": 0.6626, "step": 1710 }, { "epoch": 0.5817749064943897, "grad_norm": 1.9596435537578243, "learning_rate": 3.928857718413119e-06, "loss": 0.6802, "step": 1711 }, { "epoch": 0.5821149268956137, "grad_norm": 2.133099365831387, "learning_rate": 3.9234785374344264e-06, "loss": 0.8929, "step": 1712 }, { "epoch": 0.5824549472968378, "grad_norm": 1.6595599877288754, "learning_rate": 3.918100662696853e-06, "loss": 0.9019, "step": 1713 }, { "epoch": 0.5827949676980619, "grad_norm": 4.129797231985507, "learning_rate": 3.9127241007258695e-06, "loss": 0.704, "step": 1714 }, { "epoch": 0.583134988099286, "grad_norm": 2.2096678154176854, "learning_rate": 3.907348858045338e-06, "loss": 0.767, "step": 1715 }, { "epoch": 0.58347500850051, "grad_norm": 2.050103065477324, "learning_rate": 3.9019749411775336e-06, "loss": 0.7629, "step": 1716 }, { "epoch": 0.5838150289017341, "grad_norm": 2.224362598207352, "learning_rate": 3.8966023566431154e-06, "loss": 0.7301, "step": 1717 }, { "epoch": 0.5841550493029581, "grad_norm": 3.471505729243675, "learning_rate": 3.891231110961126e-06, "loss": 0.8771, "step": 1718 }, { "epoch": 0.5844950697041823, "grad_norm": 2.2233454405320017, "learning_rate": 3.885861210648987e-06, "loss": 0.8412, "step": 1719 }, { "epoch": 0.5848350901054064, "grad_norm": 1.8614586671959485, "learning_rate": 3.880492662222483e-06, "loss": 0.7183, "step": 1720 }, { "epoch": 0.5851751105066304, "grad_norm": 3.3042359139392645, "learning_rate": 3.875125472195764e-06, "loss": 0.7574, "step": 1721 }, { "epoch": 0.5855151309078545, "grad_norm": 1.6934396731563182, "learning_rate": 3.869759647081326e-06, "loss": 0.7454, "step": 1722 }, { "epoch": 0.5858551513090785, "grad_norm": 1.9354501363208503, "learning_rate": 3.8643951933900125e-06, "loss": 0.8003, "step": 1723 }, { "epoch": 0.5861951717103027, "grad_norm": 2.033785372249832, "learning_rate": 3.859032117631002e-06, "loss": 0.9099, "step": 1724 }, { "epoch": 0.5865351921115267, "grad_norm": 1.6684392122823892, "learning_rate": 3.853670426311797e-06, "loss": 0.7391, "step": 1725 }, { "epoch": 0.5868752125127508, "grad_norm": 2.023118010181095, "learning_rate": 3.848310125938229e-06, "loss": 0.8358, "step": 1726 }, { "epoch": 0.5872152329139748, "grad_norm": 2.3269706588518604, "learning_rate": 3.842951223014433e-06, "loss": 0.8102, "step": 1727 }, { "epoch": 0.5875552533151989, "grad_norm": 1.9400944184932607, "learning_rate": 3.837593724042854e-06, "loss": 0.7688, "step": 1728 }, { "epoch": 0.587895273716423, "grad_norm": 1.6651797388262217, "learning_rate": 3.832237635524229e-06, "loss": 0.7588, "step": 1729 }, { "epoch": 0.5882352941176471, "grad_norm": 1.5455784587197665, "learning_rate": 3.826882963957589e-06, "loss": 0.7464, "step": 1730 }, { "epoch": 0.5885753145188711, "grad_norm": 1.85747121655971, "learning_rate": 3.821529715840241e-06, "loss": 0.7595, "step": 1731 }, { "epoch": 0.5889153349200952, "grad_norm": 2.6341826537925495, "learning_rate": 3.816177897667767e-06, "loss": 0.7596, "step": 1732 }, { "epoch": 0.5892553553213192, "grad_norm": 2.3495118501585717, "learning_rate": 3.810827515934013e-06, "loss": 0.7428, "step": 1733 }, { "epoch": 0.5895953757225434, "grad_norm": 4.349211175598281, "learning_rate": 3.8054785771310817e-06, "loss": 0.7613, "step": 1734 }, { "epoch": 0.5899353961237674, "grad_norm": 2.1363024851103876, "learning_rate": 3.8001310877493265e-06, "loss": 0.844, "step": 1735 }, { "epoch": 0.5902754165249915, "grad_norm": 4.0769838432188505, "learning_rate": 3.7947850542773396e-06, "loss": 0.7463, "step": 1736 }, { "epoch": 0.5906154369262155, "grad_norm": 1.6554083285238113, "learning_rate": 3.7894404832019514e-06, "loss": 0.8421, "step": 1737 }, { "epoch": 0.5909554573274396, "grad_norm": 1.503047933162913, "learning_rate": 3.784097381008212e-06, "loss": 0.7792, "step": 1738 }, { "epoch": 0.5912954777286638, "grad_norm": 5.221808674383001, "learning_rate": 3.778755754179394e-06, "loss": 0.9246, "step": 1739 }, { "epoch": 0.5916354981298878, "grad_norm": 1.679707592266952, "learning_rate": 3.7734156091969766e-06, "loss": 0.8391, "step": 1740 }, { "epoch": 0.5919755185311119, "grad_norm": 2.1866548043016145, "learning_rate": 3.7680769525406398e-06, "loss": 0.8404, "step": 1741 }, { "epoch": 0.5923155389323359, "grad_norm": 1.9227767898016401, "learning_rate": 3.762739790688264e-06, "loss": 0.6675, "step": 1742 }, { "epoch": 0.5926555593335601, "grad_norm": 2.2649866796850584, "learning_rate": 3.757404130115909e-06, "loss": 0.7401, "step": 1743 }, { "epoch": 0.5929955797347841, "grad_norm": 1.972603607998956, "learning_rate": 3.752069977297817e-06, "loss": 0.7905, "step": 1744 }, { "epoch": 0.5933356001360082, "grad_norm": 2.2111615655437995, "learning_rate": 3.7467373387063973e-06, "loss": 0.7023, "step": 1745 }, { "epoch": 0.5936756205372322, "grad_norm": 1.9245235611037521, "learning_rate": 3.741406220812227e-06, "loss": 0.9047, "step": 1746 }, { "epoch": 0.5940156409384563, "grad_norm": 1.733840029867424, "learning_rate": 3.7360766300840323e-06, "loss": 0.7679, "step": 1747 }, { "epoch": 0.5943556613396804, "grad_norm": 1.679265103876112, "learning_rate": 3.7307485729886917e-06, "loss": 0.835, "step": 1748 }, { "epoch": 0.5946956817409045, "grad_norm": 1.751220728165291, "learning_rate": 3.725422055991218e-06, "loss": 0.7547, "step": 1749 }, { "epoch": 0.5950357021421285, "grad_norm": 1.8592254513227962, "learning_rate": 3.720097085554756e-06, "loss": 0.789, "step": 1750 }, { "epoch": 0.5953757225433526, "grad_norm": 1.5578672257415482, "learning_rate": 3.7147736681405784e-06, "loss": 0.7403, "step": 1751 }, { "epoch": 0.5957157429445766, "grad_norm": 1.7162665067510214, "learning_rate": 3.709451810208068e-06, "loss": 0.7607, "step": 1752 }, { "epoch": 0.5960557633458008, "grad_norm": 1.8084933447047609, "learning_rate": 3.7041315182147203e-06, "loss": 0.7423, "step": 1753 }, { "epoch": 0.5963957837470248, "grad_norm": 1.6360987508622518, "learning_rate": 3.6988127986161247e-06, "loss": 0.7364, "step": 1754 }, { "epoch": 0.5967358041482489, "grad_norm": 1.620889477822385, "learning_rate": 3.6934956578659697e-06, "loss": 0.7419, "step": 1755 }, { "epoch": 0.5970758245494729, "grad_norm": 2.005053928359656, "learning_rate": 3.688180102416022e-06, "loss": 0.8189, "step": 1756 }, { "epoch": 0.597415844950697, "grad_norm": 2.49591536599233, "learning_rate": 3.682866138716126e-06, "loss": 0.7555, "step": 1757 }, { "epoch": 0.5977558653519212, "grad_norm": 1.9099333716331244, "learning_rate": 3.6775537732141986e-06, "loss": 0.6334, "step": 1758 }, { "epoch": 0.5980958857531452, "grad_norm": 1.6443659221282751, "learning_rate": 3.6722430123562124e-06, "loss": 0.8068, "step": 1759 }, { "epoch": 0.5984359061543693, "grad_norm": 2.623550934402015, "learning_rate": 3.6669338625861983e-06, "loss": 0.8823, "step": 1760 }, { "epoch": 0.5987759265555933, "grad_norm": 2.7847663723361222, "learning_rate": 3.661626330346224e-06, "loss": 0.7644, "step": 1761 }, { "epoch": 0.5991159469568174, "grad_norm": 1.978249284886559, "learning_rate": 3.656320422076406e-06, "loss": 0.6911, "step": 1762 }, { "epoch": 0.5994559673580415, "grad_norm": 1.6370605423957132, "learning_rate": 3.6510161442148783e-06, "loss": 0.7779, "step": 1763 }, { "epoch": 0.5997959877592656, "grad_norm": 1.9618067800993437, "learning_rate": 3.6457135031978077e-06, "loss": 0.8111, "step": 1764 }, { "epoch": 0.6001360081604896, "grad_norm": 2.0871896457836785, "learning_rate": 3.6404125054593653e-06, "loss": 0.6968, "step": 1765 }, { "epoch": 0.6004760285617137, "grad_norm": 3.6449233310433375, "learning_rate": 3.635113157431732e-06, "loss": 0.9092, "step": 1766 }, { "epoch": 0.6008160489629377, "grad_norm": 2.43520006058048, "learning_rate": 3.629815465545091e-06, "loss": 0.8429, "step": 1767 }, { "epoch": 0.6011560693641619, "grad_norm": 2.1109713075027723, "learning_rate": 3.62451943622761e-06, "loss": 0.6774, "step": 1768 }, { "epoch": 0.6014960897653859, "grad_norm": 1.7035409015911598, "learning_rate": 3.6192250759054427e-06, "loss": 0.8139, "step": 1769 }, { "epoch": 0.60183611016661, "grad_norm": 2.7589386792158628, "learning_rate": 3.6139323910027136e-06, "loss": 0.7972, "step": 1770 }, { "epoch": 0.602176130567834, "grad_norm": 2.02117501459737, "learning_rate": 3.608641387941523e-06, "loss": 0.8181, "step": 1771 }, { "epoch": 0.6025161509690582, "grad_norm": 1.9303717752979608, "learning_rate": 3.6033520731419214e-06, "loss": 0.8203, "step": 1772 }, { "epoch": 0.6028561713702822, "grad_norm": 1.884365094703858, "learning_rate": 3.598064453021911e-06, "loss": 0.7987, "step": 1773 }, { "epoch": 0.6031961917715063, "grad_norm": 2.7068608465393287, "learning_rate": 3.592778533997446e-06, "loss": 0.7508, "step": 1774 }, { "epoch": 0.6035362121727303, "grad_norm": 1.987215906609273, "learning_rate": 3.5874943224824097e-06, "loss": 0.6987, "step": 1775 }, { "epoch": 0.6038762325739544, "grad_norm": 2.2665245253039443, "learning_rate": 3.582211824888615e-06, "loss": 0.7874, "step": 1776 }, { "epoch": 0.6042162529751786, "grad_norm": 2.079144255350973, "learning_rate": 3.5769310476257935e-06, "loss": 0.8801, "step": 1777 }, { "epoch": 0.6045562733764026, "grad_norm": 1.684740476192313, "learning_rate": 3.5716519971015947e-06, "loss": 0.8109, "step": 1778 }, { "epoch": 0.6048962937776267, "grad_norm": 2.356591268153879, "learning_rate": 3.5663746797215658e-06, "loss": 0.7333, "step": 1779 }, { "epoch": 0.6052363141788507, "grad_norm": 1.9527315149213702, "learning_rate": 3.561099101889158e-06, "loss": 0.8158, "step": 1780 }, { "epoch": 0.6055763345800748, "grad_norm": 2.0298727319213175, "learning_rate": 3.555825270005707e-06, "loss": 0.8055, "step": 1781 }, { "epoch": 0.6059163549812989, "grad_norm": 2.1789885450127557, "learning_rate": 3.5505531904704287e-06, "loss": 0.8846, "step": 1782 }, { "epoch": 0.606256375382523, "grad_norm": 2.190682206775641, "learning_rate": 3.5452828696804196e-06, "loss": 0.8113, "step": 1783 }, { "epoch": 0.606596395783747, "grad_norm": 1.7727471010909939, "learning_rate": 3.5400143140306355e-06, "loss": 0.8189, "step": 1784 }, { "epoch": 0.6069364161849711, "grad_norm": 1.9034533372715055, "learning_rate": 3.5347475299138932e-06, "loss": 0.8361, "step": 1785 }, { "epoch": 0.6072764365861951, "grad_norm": 1.7827761115461676, "learning_rate": 3.5294825237208573e-06, "loss": 0.7705, "step": 1786 }, { "epoch": 0.6076164569874193, "grad_norm": 2.130570518035593, "learning_rate": 3.524219301840043e-06, "loss": 0.8345, "step": 1787 }, { "epoch": 0.6079564773886433, "grad_norm": 2.377972509331428, "learning_rate": 3.5189578706577896e-06, "loss": 0.8651, "step": 1788 }, { "epoch": 0.6082964977898674, "grad_norm": 2.0170332375599647, "learning_rate": 3.5136982365582704e-06, "loss": 0.7541, "step": 1789 }, { "epoch": 0.6086365181910914, "grad_norm": 2.0487060631425535, "learning_rate": 3.5084404059234773e-06, "loss": 0.747, "step": 1790 }, { "epoch": 0.6089765385923155, "grad_norm": 1.5497246080487372, "learning_rate": 3.5031843851332105e-06, "loss": 0.7551, "step": 1791 }, { "epoch": 0.6093165589935396, "grad_norm": 1.8661412507465704, "learning_rate": 3.4979301805650805e-06, "loss": 0.7471, "step": 1792 }, { "epoch": 0.6096565793947637, "grad_norm": 1.9081392430588895, "learning_rate": 3.492677798594486e-06, "loss": 0.6867, "step": 1793 }, { "epoch": 0.6099965997959877, "grad_norm": 3.953731191700172, "learning_rate": 3.4874272455946217e-06, "loss": 0.863, "step": 1794 }, { "epoch": 0.6103366201972118, "grad_norm": 1.7838153201393292, "learning_rate": 3.4821785279364585e-06, "loss": 0.9178, "step": 1795 }, { "epoch": 0.610676640598436, "grad_norm": 1.9609099043585159, "learning_rate": 3.476931651988742e-06, "loss": 0.7292, "step": 1796 }, { "epoch": 0.61101666099966, "grad_norm": 1.7849795981254395, "learning_rate": 3.471686624117982e-06, "loss": 0.8199, "step": 1797 }, { "epoch": 0.6113566814008841, "grad_norm": 2.1270736856013337, "learning_rate": 3.466443450688445e-06, "loss": 0.875, "step": 1798 }, { "epoch": 0.6116967018021081, "grad_norm": 2.2091957079882945, "learning_rate": 3.461202138062153e-06, "loss": 0.6927, "step": 1799 }, { "epoch": 0.6120367222033322, "grad_norm": 1.5834963127345454, "learning_rate": 3.4559626925988623e-06, "loss": 0.7318, "step": 1800 }, { "epoch": 0.6123767426045563, "grad_norm": 2.0496819681593506, "learning_rate": 3.450725120656069e-06, "loss": 0.7506, "step": 1801 }, { "epoch": 0.6127167630057804, "grad_norm": 2.2013789066829, "learning_rate": 3.4454894285889916e-06, "loss": 0.8603, "step": 1802 }, { "epoch": 0.6130567834070044, "grad_norm": 2.09843751252011, "learning_rate": 3.4402556227505746e-06, "loss": 0.7768, "step": 1803 }, { "epoch": 0.6133968038082285, "grad_norm": 2.3451379947690607, "learning_rate": 3.435023709491467e-06, "loss": 0.8272, "step": 1804 }, { "epoch": 0.6137368242094525, "grad_norm": 3.5327166657343456, "learning_rate": 3.4297936951600217e-06, "loss": 0.7466, "step": 1805 }, { "epoch": 0.6140768446106767, "grad_norm": 1.905762345831794, "learning_rate": 3.424565586102293e-06, "loss": 0.8158, "step": 1806 }, { "epoch": 0.6144168650119007, "grad_norm": 2.3123581203476804, "learning_rate": 3.4193393886620153e-06, "loss": 0.8462, "step": 1807 }, { "epoch": 0.6147568854131248, "grad_norm": 1.946148455776188, "learning_rate": 3.4141151091806134e-06, "loss": 0.704, "step": 1808 }, { "epoch": 0.6150969058143488, "grad_norm": 1.3443343547386855, "learning_rate": 3.408892753997175e-06, "loss": 0.693, "step": 1809 }, { "epoch": 0.6154369262155729, "grad_norm": 2.704265394408822, "learning_rate": 3.40367232944846e-06, "loss": 0.7594, "step": 1810 }, { "epoch": 0.615776946616797, "grad_norm": 1.8211554347276468, "learning_rate": 3.3984538418688795e-06, "loss": 0.669, "step": 1811 }, { "epoch": 0.6161169670180211, "grad_norm": 2.0413514509876602, "learning_rate": 3.3932372975905027e-06, "loss": 0.868, "step": 1812 }, { "epoch": 0.6164569874192452, "grad_norm": 2.121098812311723, "learning_rate": 3.3880227029430335e-06, "loss": 0.7582, "step": 1813 }, { "epoch": 0.6167970078204692, "grad_norm": 1.6909069939174246, "learning_rate": 3.3828100642538097e-06, "loss": 0.7327, "step": 1814 }, { "epoch": 0.6171370282216933, "grad_norm": 1.6334734116518446, "learning_rate": 3.377599387847803e-06, "loss": 0.8684, "step": 1815 }, { "epoch": 0.6174770486229174, "grad_norm": 2.1432970505365043, "learning_rate": 3.372390680047597e-06, "loss": 0.8207, "step": 1816 }, { "epoch": 0.6178170690241415, "grad_norm": 1.8165425685515983, "learning_rate": 3.3671839471733906e-06, "loss": 0.797, "step": 1817 }, { "epoch": 0.6181570894253655, "grad_norm": 2.1615351800791354, "learning_rate": 3.3619791955429826e-06, "loss": 0.773, "step": 1818 }, { "epoch": 0.6184971098265896, "grad_norm": 2.6904615455880614, "learning_rate": 3.3567764314717744e-06, "loss": 0.7907, "step": 1819 }, { "epoch": 0.6188371302278136, "grad_norm": 2.834095748254144, "learning_rate": 3.351575661272749e-06, "loss": 0.8717, "step": 1820 }, { "epoch": 0.6191771506290378, "grad_norm": 2.145877874306492, "learning_rate": 3.346376891256471e-06, "loss": 0.8086, "step": 1821 }, { "epoch": 0.6195171710302618, "grad_norm": 2.023497317198109, "learning_rate": 3.341180127731083e-06, "loss": 0.8679, "step": 1822 }, { "epoch": 0.6198571914314859, "grad_norm": 2.1023255103173564, "learning_rate": 3.335985377002285e-06, "loss": 0.8146, "step": 1823 }, { "epoch": 0.6201972118327099, "grad_norm": 1.731210236073491, "learning_rate": 3.330792645373344e-06, "loss": 0.7683, "step": 1824 }, { "epoch": 0.6205372322339341, "grad_norm": 1.9233595089290563, "learning_rate": 3.3256019391450696e-06, "loss": 0.7869, "step": 1825 }, { "epoch": 0.6208772526351581, "grad_norm": 2.208075651046932, "learning_rate": 3.320413264615817e-06, "loss": 0.6999, "step": 1826 }, { "epoch": 0.6212172730363822, "grad_norm": 1.8428253882643195, "learning_rate": 3.315226628081475e-06, "loss": 0.724, "step": 1827 }, { "epoch": 0.6215572934376062, "grad_norm": 2.5874398711101665, "learning_rate": 3.3100420358354614e-06, "loss": 0.8652, "step": 1828 }, { "epoch": 0.6218973138388303, "grad_norm": 1.673959608619458, "learning_rate": 3.3048594941687117e-06, "loss": 0.8139, "step": 1829 }, { "epoch": 0.6222373342400545, "grad_norm": 1.8058623104229596, "learning_rate": 3.299679009369672e-06, "loss": 0.7287, "step": 1830 }, { "epoch": 0.6225773546412785, "grad_norm": 1.7517624272915204, "learning_rate": 3.2945005877242975e-06, "loss": 0.767, "step": 1831 }, { "epoch": 0.6229173750425026, "grad_norm": 1.9637001633251892, "learning_rate": 3.2893242355160327e-06, "loss": 0.7267, "step": 1832 }, { "epoch": 0.6232573954437266, "grad_norm": 2.0442734243489378, "learning_rate": 3.28414995902582e-06, "loss": 0.7311, "step": 1833 }, { "epoch": 0.6235974158449507, "grad_norm": 4.43268536813694, "learning_rate": 3.2789777645320736e-06, "loss": 0.6776, "step": 1834 }, { "epoch": 0.6239374362461748, "grad_norm": 2.120174804949898, "learning_rate": 3.2738076583106903e-06, "loss": 0.8519, "step": 1835 }, { "epoch": 0.6242774566473989, "grad_norm": 3.2551292864590056, "learning_rate": 3.268639646635027e-06, "loss": 0.9032, "step": 1836 }, { "epoch": 0.6246174770486229, "grad_norm": 1.601086184729794, "learning_rate": 3.2634737357758994e-06, "loss": 0.8248, "step": 1837 }, { "epoch": 0.624957497449847, "grad_norm": 2.240230510804796, "learning_rate": 3.2583099320015787e-06, "loss": 0.7506, "step": 1838 }, { "epoch": 0.625297517851071, "grad_norm": 1.9608041776165144, "learning_rate": 3.253148241577773e-06, "loss": 0.7333, "step": 1839 }, { "epoch": 0.6256375382522952, "grad_norm": 1.9849085294573612, "learning_rate": 3.2479886707676323e-06, "loss": 0.7508, "step": 1840 }, { "epoch": 0.6259775586535192, "grad_norm": 1.8228816637234933, "learning_rate": 3.2428312258317306e-06, "loss": 0.7946, "step": 1841 }, { "epoch": 0.6263175790547433, "grad_norm": 1.7804154421261742, "learning_rate": 3.2376759130280644e-06, "loss": 0.7698, "step": 1842 }, { "epoch": 0.6266575994559673, "grad_norm": 1.8927973749913074, "learning_rate": 3.23252273861204e-06, "loss": 0.8284, "step": 1843 }, { "epoch": 0.6269976198571914, "grad_norm": 1.8550081010363115, "learning_rate": 3.2273717088364743e-06, "loss": 0.7924, "step": 1844 }, { "epoch": 0.6273376402584155, "grad_norm": 2.1922949801484966, "learning_rate": 3.222222829951578e-06, "loss": 0.8388, "step": 1845 }, { "epoch": 0.6276776606596396, "grad_norm": 2.1626087410744264, "learning_rate": 3.2170761082049504e-06, "loss": 0.7447, "step": 1846 }, { "epoch": 0.6280176810608636, "grad_norm": 1.956904781654036, "learning_rate": 3.21193154984158e-06, "loss": 0.6497, "step": 1847 }, { "epoch": 0.6283577014620877, "grad_norm": 5.531479827665114, "learning_rate": 3.2067891611038203e-06, "loss": 0.8239, "step": 1848 }, { "epoch": 0.6286977218633119, "grad_norm": 2.0361393715810956, "learning_rate": 3.201648948231404e-06, "loss": 0.805, "step": 1849 }, { "epoch": 0.6290377422645359, "grad_norm": 2.4082749710623523, "learning_rate": 3.196510917461414e-06, "loss": 0.7744, "step": 1850 }, { "epoch": 0.62937776266576, "grad_norm": 1.8187471571807, "learning_rate": 3.191375075028291e-06, "loss": 0.7981, "step": 1851 }, { "epoch": 0.629717783066984, "grad_norm": 1.8526733968838733, "learning_rate": 3.1862414271638163e-06, "loss": 0.7936, "step": 1852 }, { "epoch": 0.630057803468208, "grad_norm": 2.1219438112421853, "learning_rate": 3.181109980097111e-06, "loss": 0.8523, "step": 1853 }, { "epoch": 0.6303978238694322, "grad_norm": 2.3455237931427178, "learning_rate": 3.1759807400546266e-06, "loss": 0.7498, "step": 1854 }, { "epoch": 0.6307378442706563, "grad_norm": 2.126668312041449, "learning_rate": 3.1708537132601324e-06, "loss": 0.8679, "step": 1855 }, { "epoch": 0.6310778646718803, "grad_norm": 2.298564439126398, "learning_rate": 3.1657289059347184e-06, "loss": 0.7885, "step": 1856 }, { "epoch": 0.6314178850731044, "grad_norm": 1.7168007294839274, "learning_rate": 3.1606063242967753e-06, "loss": 0.866, "step": 1857 }, { "epoch": 0.6317579054743284, "grad_norm": 1.8392023959375048, "learning_rate": 3.1554859745619986e-06, "loss": 0.7636, "step": 1858 }, { "epoch": 0.6320979258755526, "grad_norm": 2.2699088048706235, "learning_rate": 3.15036786294337e-06, "loss": 0.837, "step": 1859 }, { "epoch": 0.6324379462767766, "grad_norm": 2.9330773051419263, "learning_rate": 3.145251995651162e-06, "loss": 0.8315, "step": 1860 }, { "epoch": 0.6327779666780007, "grad_norm": 2.1837196368742133, "learning_rate": 3.1401383788929175e-06, "loss": 0.7574, "step": 1861 }, { "epoch": 0.6331179870792247, "grad_norm": 1.9146680400998761, "learning_rate": 3.1350270188734523e-06, "loss": 0.7177, "step": 1862 }, { "epoch": 0.6334580074804488, "grad_norm": 1.3903306685323171, "learning_rate": 3.129917921794844e-06, "loss": 0.693, "step": 1863 }, { "epoch": 0.6337980278816729, "grad_norm": 2.1306437683234325, "learning_rate": 3.1248110938564202e-06, "loss": 0.7523, "step": 1864 }, { "epoch": 0.634138048282897, "grad_norm": 1.9181652277534635, "learning_rate": 3.1197065412547632e-06, "loss": 0.8886, "step": 1865 }, { "epoch": 0.634478068684121, "grad_norm": 1.7562861259048224, "learning_rate": 3.1146042701836865e-06, "loss": 0.7553, "step": 1866 }, { "epoch": 0.6348180890853451, "grad_norm": 1.7325396538375666, "learning_rate": 3.10950428683424e-06, "loss": 0.8295, "step": 1867 }, { "epoch": 0.6351581094865691, "grad_norm": 1.8009663516914018, "learning_rate": 3.1044065973946945e-06, "loss": 0.7395, "step": 1868 }, { "epoch": 0.6354981298877933, "grad_norm": 1.6392428927823481, "learning_rate": 3.0993112080505383e-06, "loss": 0.872, "step": 1869 }, { "epoch": 0.6358381502890174, "grad_norm": 1.5989553503629714, "learning_rate": 3.0942181249844726e-06, "loss": 0.6544, "step": 1870 }, { "epoch": 0.6361781706902414, "grad_norm": 2.6768868064774374, "learning_rate": 3.089127354376393e-06, "loss": 0.7539, "step": 1871 }, { "epoch": 0.6365181910914655, "grad_norm": 4.5238927522554535, "learning_rate": 3.084038902403398e-06, "loss": 0.7274, "step": 1872 }, { "epoch": 0.6368582114926895, "grad_norm": 1.6009647281884545, "learning_rate": 3.0789527752397624e-06, "loss": 0.7217, "step": 1873 }, { "epoch": 0.6371982318939137, "grad_norm": 2.7730101673025573, "learning_rate": 3.07386897905695e-06, "loss": 0.8029, "step": 1874 }, { "epoch": 0.6375382522951377, "grad_norm": 2.6935224464307757, "learning_rate": 3.068787520023587e-06, "loss": 0.8331, "step": 1875 }, { "epoch": 0.6378782726963618, "grad_norm": 2.128963112443544, "learning_rate": 3.0637084043054744e-06, "loss": 0.8188, "step": 1876 }, { "epoch": 0.6382182930975858, "grad_norm": 2.21461662243339, "learning_rate": 3.058631638065561e-06, "loss": 0.8867, "step": 1877 }, { "epoch": 0.63855831349881, "grad_norm": 2.0004837364007018, "learning_rate": 3.0535572274639456e-06, "loss": 0.7891, "step": 1878 }, { "epoch": 0.638898333900034, "grad_norm": 2.645633375201975, "learning_rate": 3.048485178657875e-06, "loss": 0.7086, "step": 1879 }, { "epoch": 0.6392383543012581, "grad_norm": 1.9850643249437119, "learning_rate": 3.0434154978017215e-06, "loss": 0.797, "step": 1880 }, { "epoch": 0.6395783747024821, "grad_norm": 1.4653698856711659, "learning_rate": 3.0383481910469936e-06, "loss": 0.793, "step": 1881 }, { "epoch": 0.6399183951037062, "grad_norm": 2.4247940024767525, "learning_rate": 3.03328326454231e-06, "loss": 0.7956, "step": 1882 }, { "epoch": 0.6402584155049303, "grad_norm": 1.7278252319233067, "learning_rate": 3.0282207244334084e-06, "loss": 0.7589, "step": 1883 }, { "epoch": 0.6405984359061544, "grad_norm": 1.7440612715525123, "learning_rate": 3.0231605768631256e-06, "loss": 0.8077, "step": 1884 }, { "epoch": 0.6409384563073784, "grad_norm": 1.7115699043404717, "learning_rate": 3.018102827971397e-06, "loss": 0.7597, "step": 1885 }, { "epoch": 0.6412784767086025, "grad_norm": 1.5585975933489125, "learning_rate": 3.0130474838952518e-06, "loss": 0.77, "step": 1886 }, { "epoch": 0.6416184971098265, "grad_norm": 1.6776789416972602, "learning_rate": 3.007994550768793e-06, "loss": 0.8699, "step": 1887 }, { "epoch": 0.6419585175110507, "grad_norm": 2.0656741398519305, "learning_rate": 3.0029440347232064e-06, "loss": 0.7675, "step": 1888 }, { "epoch": 0.6422985379122748, "grad_norm": 2.423344521347543, "learning_rate": 2.997895941886737e-06, "loss": 0.7608, "step": 1889 }, { "epoch": 0.6426385583134988, "grad_norm": 3.3517642400306653, "learning_rate": 2.9928502783846987e-06, "loss": 0.7183, "step": 1890 }, { "epoch": 0.6429785787147229, "grad_norm": 4.199360112371653, "learning_rate": 2.9878070503394484e-06, "loss": 0.8042, "step": 1891 }, { "epoch": 0.6433185991159469, "grad_norm": 2.348727029739639, "learning_rate": 2.982766263870395e-06, "loss": 0.8536, "step": 1892 }, { "epoch": 0.6436586195171711, "grad_norm": 2.3407286077425815, "learning_rate": 2.977727925093981e-06, "loss": 0.7965, "step": 1893 }, { "epoch": 0.6439986399183951, "grad_norm": 2.2628555536887243, "learning_rate": 2.972692040123678e-06, "loss": 0.8354, "step": 1894 }, { "epoch": 0.6443386603196192, "grad_norm": 1.9098209839122338, "learning_rate": 2.9676586150699843e-06, "loss": 0.7583, "step": 1895 }, { "epoch": 0.6446786807208432, "grad_norm": 2.030613992719097, "learning_rate": 2.962627656040408e-06, "loss": 0.6792, "step": 1896 }, { "epoch": 0.6450187011220673, "grad_norm": 2.206570139271304, "learning_rate": 2.957599169139472e-06, "loss": 0.7251, "step": 1897 }, { "epoch": 0.6453587215232914, "grad_norm": 2.2690884747822615, "learning_rate": 2.9525731604686925e-06, "loss": 0.7452, "step": 1898 }, { "epoch": 0.6456987419245155, "grad_norm": 1.6036554282102649, "learning_rate": 2.9475496361265834e-06, "loss": 0.8009, "step": 1899 }, { "epoch": 0.6460387623257395, "grad_norm": 1.5691756233346612, "learning_rate": 2.942528602208642e-06, "loss": 0.7995, "step": 1900 }, { "epoch": 0.6463787827269636, "grad_norm": 2.5262095313197785, "learning_rate": 2.9375100648073413e-06, "loss": 0.7977, "step": 1901 }, { "epoch": 0.6467188031281876, "grad_norm": 2.329442430159881, "learning_rate": 2.9324940300121325e-06, "loss": 0.7225, "step": 1902 }, { "epoch": 0.6470588235294118, "grad_norm": 2.2795978824555316, "learning_rate": 2.9274805039094225e-06, "loss": 0.7248, "step": 1903 }, { "epoch": 0.6473988439306358, "grad_norm": 1.8928729552824115, "learning_rate": 2.922469492582578e-06, "loss": 0.7438, "step": 1904 }, { "epoch": 0.6477388643318599, "grad_norm": 1.645826518622666, "learning_rate": 2.9174610021119136e-06, "loss": 0.7018, "step": 1905 }, { "epoch": 0.648078884733084, "grad_norm": 1.887335955741179, "learning_rate": 2.912455038574686e-06, "loss": 0.7666, "step": 1906 }, { "epoch": 0.6484189051343081, "grad_norm": 2.6577609754031966, "learning_rate": 2.907451608045081e-06, "loss": 0.7754, "step": 1907 }, { "epoch": 0.6487589255355322, "grad_norm": 1.424048835180028, "learning_rate": 2.9024507165942196e-06, "loss": 0.8108, "step": 1908 }, { "epoch": 0.6490989459367562, "grad_norm": 2.3645854173941014, "learning_rate": 2.8974523702901346e-06, "loss": 0.9007, "step": 1909 }, { "epoch": 0.6494389663379803, "grad_norm": 1.9228751348732287, "learning_rate": 2.892456575197771e-06, "loss": 0.8843, "step": 1910 }, { "epoch": 0.6497789867392043, "grad_norm": 1.7424474860546162, "learning_rate": 2.8874633373789848e-06, "loss": 0.812, "step": 1911 }, { "epoch": 0.6501190071404285, "grad_norm": 2.183670810574612, "learning_rate": 2.8824726628925204e-06, "loss": 0.844, "step": 1912 }, { "epoch": 0.6504590275416525, "grad_norm": 3.2905500209946386, "learning_rate": 2.877484557794017e-06, "loss": 0.7829, "step": 1913 }, { "epoch": 0.6507990479428766, "grad_norm": 1.8504014727826574, "learning_rate": 2.872499028135993e-06, "loss": 0.8476, "step": 1914 }, { "epoch": 0.6511390683441006, "grad_norm": 1.786637343753024, "learning_rate": 2.8675160799678483e-06, "loss": 0.7481, "step": 1915 }, { "epoch": 0.6514790887453247, "grad_norm": 1.7737603638634836, "learning_rate": 2.8625357193358416e-06, "loss": 0.6805, "step": 1916 }, { "epoch": 0.6518191091465488, "grad_norm": 2.3513830613944897, "learning_rate": 2.8575579522830965e-06, "loss": 0.8911, "step": 1917 }, { "epoch": 0.6521591295477729, "grad_norm": 2.308686347074699, "learning_rate": 2.8525827848495912e-06, "loss": 0.7006, "step": 1918 }, { "epoch": 0.6524991499489969, "grad_norm": 3.0666789675701036, "learning_rate": 2.847610223072145e-06, "loss": 0.7716, "step": 1919 }, { "epoch": 0.652839170350221, "grad_norm": 1.6176969102311178, "learning_rate": 2.842640272984422e-06, "loss": 0.7157, "step": 1920 }, { "epoch": 0.653179190751445, "grad_norm": 1.9114007467129297, "learning_rate": 2.837672940616911e-06, "loss": 0.8591, "step": 1921 }, { "epoch": 0.6535192111526692, "grad_norm": 2.673597039865379, "learning_rate": 2.8327082319969268e-06, "loss": 0.7577, "step": 1922 }, { "epoch": 0.6538592315538932, "grad_norm": 1.6371578518735068, "learning_rate": 2.8277461531485985e-06, "loss": 0.7447, "step": 1923 }, { "epoch": 0.6541992519551173, "grad_norm": 1.6541414287928828, "learning_rate": 2.8227867100928706e-06, "loss": 0.7508, "step": 1924 }, { "epoch": 0.6545392723563414, "grad_norm": 2.6084708691300764, "learning_rate": 2.8178299088474836e-06, "loss": 0.7826, "step": 1925 }, { "epoch": 0.6548792927575654, "grad_norm": 2.8225766202041194, "learning_rate": 2.8128757554269716e-06, "loss": 0.9343, "step": 1926 }, { "epoch": 0.6552193131587896, "grad_norm": 1.8679165197646086, "learning_rate": 2.8079242558426612e-06, "loss": 0.7858, "step": 1927 }, { "epoch": 0.6555593335600136, "grad_norm": 1.6626905717574614, "learning_rate": 2.8029754161026535e-06, "loss": 0.8268, "step": 1928 }, { "epoch": 0.6558993539612377, "grad_norm": 1.7902883623187187, "learning_rate": 2.7980292422118282e-06, "loss": 0.7367, "step": 1929 }, { "epoch": 0.6562393743624617, "grad_norm": 2.28189192357985, "learning_rate": 2.7930857401718244e-06, "loss": 0.8161, "step": 1930 }, { "epoch": 0.6565793947636859, "grad_norm": 1.776198319138567, "learning_rate": 2.7881449159810416e-06, "loss": 0.6802, "step": 1931 }, { "epoch": 0.6569194151649099, "grad_norm": 2.2447524877488156, "learning_rate": 2.7832067756346293e-06, "loss": 0.765, "step": 1932 }, { "epoch": 0.657259435566134, "grad_norm": 1.626806606726708, "learning_rate": 2.7782713251244797e-06, "loss": 0.786, "step": 1933 }, { "epoch": 0.657599455967358, "grad_norm": 2.2354711587791236, "learning_rate": 2.7733385704392257e-06, "loss": 0.832, "step": 1934 }, { "epoch": 0.6579394763685821, "grad_norm": 2.0096403282944912, "learning_rate": 2.768408517564224e-06, "loss": 0.7716, "step": 1935 }, { "epoch": 0.6582794967698062, "grad_norm": 1.9832226393988026, "learning_rate": 2.763481172481556e-06, "loss": 0.7496, "step": 1936 }, { "epoch": 0.6586195171710303, "grad_norm": 1.9113293186754516, "learning_rate": 2.7585565411700164e-06, "loss": 0.7712, "step": 1937 }, { "epoch": 0.6589595375722543, "grad_norm": 1.800190920309849, "learning_rate": 2.7536346296051063e-06, "loss": 0.775, "step": 1938 }, { "epoch": 0.6592995579734784, "grad_norm": 2.2869209794289835, "learning_rate": 2.7487154437590252e-06, "loss": 0.7114, "step": 1939 }, { "epoch": 0.6596395783747024, "grad_norm": 1.752548967169787, "learning_rate": 2.743798989600672e-06, "loss": 0.8391, "step": 1940 }, { "epoch": 0.6599795987759266, "grad_norm": 1.9696733758583753, "learning_rate": 2.738885273095624e-06, "loss": 0.6906, "step": 1941 }, { "epoch": 0.6603196191771507, "grad_norm": 1.756956191108405, "learning_rate": 2.733974300206137e-06, "loss": 0.8137, "step": 1942 }, { "epoch": 0.6606596395783747, "grad_norm": 2.5904794588208433, "learning_rate": 2.7290660768911435e-06, "loss": 0.8262, "step": 1943 }, { "epoch": 0.6609996599795988, "grad_norm": 2.718207473803795, "learning_rate": 2.7241606091062334e-06, "loss": 0.7992, "step": 1944 }, { "epoch": 0.6613396803808228, "grad_norm": 1.9225267669432748, "learning_rate": 2.719257902803658e-06, "loss": 0.8342, "step": 1945 }, { "epoch": 0.661679700782047, "grad_norm": 1.8175234136298053, "learning_rate": 2.7143579639323146e-06, "loss": 0.7721, "step": 1946 }, { "epoch": 0.662019721183271, "grad_norm": 2.2267444521145365, "learning_rate": 2.7094607984377423e-06, "loss": 0.7256, "step": 1947 }, { "epoch": 0.6623597415844951, "grad_norm": 1.6676057305902945, "learning_rate": 2.7045664122621173e-06, "loss": 0.7588, "step": 1948 }, { "epoch": 0.6626997619857191, "grad_norm": 1.9495109160790398, "learning_rate": 2.6996748113442397e-06, "loss": 0.7012, "step": 1949 }, { "epoch": 0.6630397823869432, "grad_norm": 2.275323709986825, "learning_rate": 2.6947860016195372e-06, "loss": 0.809, "step": 1950 }, { "epoch": 0.6633798027881673, "grad_norm": 1.7705415744996646, "learning_rate": 2.6898999890200405e-06, "loss": 0.7813, "step": 1951 }, { "epoch": 0.6637198231893914, "grad_norm": 1.8201284706907614, "learning_rate": 2.6850167794743966e-06, "loss": 0.7378, "step": 1952 }, { "epoch": 0.6640598435906154, "grad_norm": 1.7763246698039323, "learning_rate": 2.680136378907845e-06, "loss": 0.8054, "step": 1953 }, { "epoch": 0.6643998639918395, "grad_norm": 2.1067341590101787, "learning_rate": 2.6752587932422175e-06, "loss": 0.8473, "step": 1954 }, { "epoch": 0.6647398843930635, "grad_norm": 2.1826612345959284, "learning_rate": 2.67038402839593e-06, "loss": 0.8311, "step": 1955 }, { "epoch": 0.6650799047942877, "grad_norm": 2.151119086744043, "learning_rate": 2.6655120902839802e-06, "loss": 0.7625, "step": 1956 }, { "epoch": 0.6654199251955117, "grad_norm": 2.5514025172235963, "learning_rate": 2.6606429848179306e-06, "loss": 0.7488, "step": 1957 }, { "epoch": 0.6657599455967358, "grad_norm": 2.4317656380313477, "learning_rate": 2.655776717905906e-06, "loss": 0.7954, "step": 1958 }, { "epoch": 0.6660999659979598, "grad_norm": 2.3399416449946364, "learning_rate": 2.6509132954525946e-06, "loss": 0.7008, "step": 1959 }, { "epoch": 0.666439986399184, "grad_norm": 2.7631747409481386, "learning_rate": 2.6460527233592225e-06, "loss": 0.7061, "step": 1960 }, { "epoch": 0.666780006800408, "grad_norm": 1.9223571690932921, "learning_rate": 2.641195007523568e-06, "loss": 0.8037, "step": 1961 }, { "epoch": 0.6671200272016321, "grad_norm": 2.0570084331599454, "learning_rate": 2.636340153839935e-06, "loss": 0.7771, "step": 1962 }, { "epoch": 0.6674600476028562, "grad_norm": 1.73207328780135, "learning_rate": 2.631488168199159e-06, "loss": 0.8048, "step": 1963 }, { "epoch": 0.6678000680040802, "grad_norm": 1.7955944649193327, "learning_rate": 2.626639056488593e-06, "loss": 0.7144, "step": 1964 }, { "epoch": 0.6681400884053044, "grad_norm": 1.899052873587027, "learning_rate": 2.621792824592103e-06, "loss": 0.8188, "step": 1965 }, { "epoch": 0.6684801088065284, "grad_norm": 1.669623057694321, "learning_rate": 2.616949478390065e-06, "loss": 0.7515, "step": 1966 }, { "epoch": 0.6688201292077525, "grad_norm": 2.6012423751773817, "learning_rate": 2.612109023759346e-06, "loss": 0.724, "step": 1967 }, { "epoch": 0.6691601496089765, "grad_norm": 1.7503290521049435, "learning_rate": 2.6072714665733135e-06, "loss": 0.7963, "step": 1968 }, { "epoch": 0.6695001700102006, "grad_norm": 1.7646392017997337, "learning_rate": 2.60243681270181e-06, "loss": 0.7704, "step": 1969 }, { "epoch": 0.6698401904114247, "grad_norm": 2.23434001820653, "learning_rate": 2.597605068011163e-06, "loss": 0.7679, "step": 1970 }, { "epoch": 0.6701802108126488, "grad_norm": 2.2030201912789242, "learning_rate": 2.5927762383641657e-06, "loss": 0.8307, "step": 1971 }, { "epoch": 0.6705202312138728, "grad_norm": 1.4053187069136137, "learning_rate": 2.5879503296200736e-06, "loss": 0.6733, "step": 1972 }, { "epoch": 0.6708602516150969, "grad_norm": 2.0200673415876076, "learning_rate": 2.583127347634601e-06, "loss": 0.7548, "step": 1973 }, { "epoch": 0.6712002720163209, "grad_norm": 7.747179478216865, "learning_rate": 2.5783072982599057e-06, "loss": 0.8986, "step": 1974 }, { "epoch": 0.6715402924175451, "grad_norm": 4.347151217698582, "learning_rate": 2.573490187344596e-06, "loss": 0.8158, "step": 1975 }, { "epoch": 0.6718803128187691, "grad_norm": 1.5960191169683489, "learning_rate": 2.5686760207337045e-06, "loss": 0.6319, "step": 1976 }, { "epoch": 0.6722203332199932, "grad_norm": 2.0210012377694633, "learning_rate": 2.563864804268701e-06, "loss": 0.8267, "step": 1977 }, { "epoch": 0.6725603536212172, "grad_norm": 2.121999429293373, "learning_rate": 2.559056543787468e-06, "loss": 0.7567, "step": 1978 }, { "epoch": 0.6729003740224413, "grad_norm": 2.5273540123412044, "learning_rate": 2.554251245124305e-06, "loss": 0.6441, "step": 1979 }, { "epoch": 0.6732403944236655, "grad_norm": 2.0741394840030853, "learning_rate": 2.5494489141099155e-06, "loss": 0.8274, "step": 1980 }, { "epoch": 0.6735804148248895, "grad_norm": 1.67166625283495, "learning_rate": 2.5446495565714024e-06, "loss": 0.7647, "step": 1981 }, { "epoch": 0.6739204352261136, "grad_norm": 1.915541986064764, "learning_rate": 2.539853178332265e-06, "loss": 0.8623, "step": 1982 }, { "epoch": 0.6742604556273376, "grad_norm": 2.0838754278048075, "learning_rate": 2.5350597852123798e-06, "loss": 0.9025, "step": 1983 }, { "epoch": 0.6746004760285618, "grad_norm": 1.8777603204555824, "learning_rate": 2.530269383028009e-06, "loss": 0.805, "step": 1984 }, { "epoch": 0.6749404964297858, "grad_norm": 1.6370793848887677, "learning_rate": 2.5254819775917795e-06, "loss": 0.7331, "step": 1985 }, { "epoch": 0.6752805168310099, "grad_norm": 1.9510489857076145, "learning_rate": 2.5206975747126873e-06, "loss": 0.6924, "step": 1986 }, { "epoch": 0.6756205372322339, "grad_norm": 1.8840509441418891, "learning_rate": 2.51591618019608e-06, "loss": 0.7736, "step": 1987 }, { "epoch": 0.675960557633458, "grad_norm": 2.1944785850385866, "learning_rate": 2.511137799843658e-06, "loss": 0.7507, "step": 1988 }, { "epoch": 0.6763005780346821, "grad_norm": 2.563444560543499, "learning_rate": 2.506362439453463e-06, "loss": 0.8487, "step": 1989 }, { "epoch": 0.6766405984359062, "grad_norm": 1.8896121725043236, "learning_rate": 2.5015901048198716e-06, "loss": 0.7212, "step": 1990 }, { "epoch": 0.6769806188371302, "grad_norm": 1.6981617236104471, "learning_rate": 2.4968208017335936e-06, "loss": 0.8074, "step": 1991 }, { "epoch": 0.6773206392383543, "grad_norm": 2.1743389190119093, "learning_rate": 2.4920545359816533e-06, "loss": 0.8572, "step": 1992 }, { "epoch": 0.6776606596395783, "grad_norm": 1.7914808391383161, "learning_rate": 2.487291313347397e-06, "loss": 0.8223, "step": 1993 }, { "epoch": 0.6780006800408025, "grad_norm": 1.715883330092319, "learning_rate": 2.4825311396104727e-06, "loss": 0.8229, "step": 1994 }, { "epoch": 0.6783407004420265, "grad_norm": 1.9340151796766085, "learning_rate": 2.477774020546831e-06, "loss": 0.7705, "step": 1995 }, { "epoch": 0.6786807208432506, "grad_norm": 2.1553476660518376, "learning_rate": 2.473019961928716e-06, "loss": 0.8944, "step": 1996 }, { "epoch": 0.6790207412444746, "grad_norm": 2.3845638574577306, "learning_rate": 2.4682689695246557e-06, "loss": 0.7879, "step": 1997 }, { "epoch": 0.6793607616456987, "grad_norm": 2.8669737688434775, "learning_rate": 2.4635210490994648e-06, "loss": 0.7056, "step": 1998 }, { "epoch": 0.6797007820469229, "grad_norm": 3.049192996998203, "learning_rate": 2.458776206414221e-06, "loss": 0.8073, "step": 1999 }, { "epoch": 0.6800408024481469, "grad_norm": 1.8798273806791783, "learning_rate": 2.4540344472262766e-06, "loss": 0.7979, "step": 2000 }, { "epoch": 0.680380822849371, "grad_norm": 2.8826970776464167, "learning_rate": 2.4492957772892345e-06, "loss": 0.7671, "step": 2001 }, { "epoch": 0.680720843250595, "grad_norm": 1.7248005566280082, "learning_rate": 2.4445602023529558e-06, "loss": 0.7626, "step": 2002 }, { "epoch": 0.6810608636518191, "grad_norm": 1.6157884725720708, "learning_rate": 2.439827728163542e-06, "loss": 0.6729, "step": 2003 }, { "epoch": 0.6814008840530432, "grad_norm": 2.015980236780887, "learning_rate": 2.4350983604633323e-06, "loss": 0.7427, "step": 2004 }, { "epoch": 0.6817409044542673, "grad_norm": 2.213874167082533, "learning_rate": 2.4303721049908973e-06, "loss": 0.8243, "step": 2005 }, { "epoch": 0.6820809248554913, "grad_norm": 1.8548248745686642, "learning_rate": 2.425648967481031e-06, "loss": 0.7634, "step": 2006 }, { "epoch": 0.6824209452567154, "grad_norm": 1.652684543766916, "learning_rate": 2.4209289536647467e-06, "loss": 0.8613, "step": 2007 }, { "epoch": 0.6827609656579394, "grad_norm": 1.9971828674188044, "learning_rate": 2.4162120692692623e-06, "loss": 0.7493, "step": 2008 }, { "epoch": 0.6831009860591636, "grad_norm": 1.599253674855469, "learning_rate": 2.4114983200180053e-06, "loss": 0.7948, "step": 2009 }, { "epoch": 0.6834410064603876, "grad_norm": 2.2583508234748653, "learning_rate": 2.406787711630591e-06, "loss": 0.7357, "step": 2010 }, { "epoch": 0.6837810268616117, "grad_norm": 2.0791026003084396, "learning_rate": 2.4020802498228333e-06, "loss": 0.8317, "step": 2011 }, { "epoch": 0.6841210472628357, "grad_norm": 1.9114254449996253, "learning_rate": 2.3973759403067175e-06, "loss": 0.8558, "step": 2012 }, { "epoch": 0.6844610676640599, "grad_norm": 1.6266872694061658, "learning_rate": 2.3926747887904084e-06, "loss": 0.8107, "step": 2013 }, { "epoch": 0.684801088065284, "grad_norm": 2.2202773218114706, "learning_rate": 2.3879768009782434e-06, "loss": 0.8187, "step": 2014 }, { "epoch": 0.685141108466508, "grad_norm": 1.8724141832862042, "learning_rate": 2.3832819825707136e-06, "loss": 0.7582, "step": 2015 }, { "epoch": 0.685481128867732, "grad_norm": 2.6048840426082043, "learning_rate": 2.3785903392644714e-06, "loss": 0.7355, "step": 2016 }, { "epoch": 0.6858211492689561, "grad_norm": 2.4069817743883695, "learning_rate": 2.37390187675231e-06, "loss": 0.8101, "step": 2017 }, { "epoch": 0.6861611696701803, "grad_norm": 2.3122636622751997, "learning_rate": 2.3692166007231686e-06, "loss": 0.796, "step": 2018 }, { "epoch": 0.6865011900714043, "grad_norm": 2.7148809354845103, "learning_rate": 2.364534516862117e-06, "loss": 0.7821, "step": 2019 }, { "epoch": 0.6868412104726284, "grad_norm": 1.7589729603840554, "learning_rate": 2.359855630850352e-06, "loss": 0.805, "step": 2020 }, { "epoch": 0.6871812308738524, "grad_norm": 2.188045522331186, "learning_rate": 2.3551799483651894e-06, "loss": 0.7042, "step": 2021 }, { "epoch": 0.6875212512750765, "grad_norm": 1.6884288952943125, "learning_rate": 2.3505074750800585e-06, "loss": 0.7188, "step": 2022 }, { "epoch": 0.6878612716763006, "grad_norm": 1.6892834429534136, "learning_rate": 2.3458382166644967e-06, "loss": 0.6986, "step": 2023 }, { "epoch": 0.6882012920775247, "grad_norm": 2.0889129257298324, "learning_rate": 2.3411721787841363e-06, "loss": 0.671, "step": 2024 }, { "epoch": 0.6885413124787487, "grad_norm": 1.9926930177927726, "learning_rate": 2.3365093671007078e-06, "loss": 0.7946, "step": 2025 }, { "epoch": 0.6888813328799728, "grad_norm": 2.050530946497787, "learning_rate": 2.3318497872720193e-06, "loss": 0.7665, "step": 2026 }, { "epoch": 0.6892213532811968, "grad_norm": 3.331008521125105, "learning_rate": 2.327193444951966e-06, "loss": 0.7251, "step": 2027 }, { "epoch": 0.689561373682421, "grad_norm": 1.558219400732492, "learning_rate": 2.322540345790508e-06, "loss": 0.8328, "step": 2028 }, { "epoch": 0.689901394083645, "grad_norm": 1.6259596450082816, "learning_rate": 2.3178904954336718e-06, "loss": 0.7147, "step": 2029 }, { "epoch": 0.6902414144848691, "grad_norm": 1.8488785938546937, "learning_rate": 2.313243899523544e-06, "loss": 0.8313, "step": 2030 }, { "epoch": 0.6905814348860931, "grad_norm": 1.8394321602635277, "learning_rate": 2.3086005636982582e-06, "loss": 0.8232, "step": 2031 }, { "epoch": 0.6909214552873172, "grad_norm": 1.9494231310016805, "learning_rate": 2.303960493591999e-06, "loss": 0.6783, "step": 2032 }, { "epoch": 0.6912614756885413, "grad_norm": 2.0407917593034863, "learning_rate": 2.29932369483498e-06, "loss": 0.8164, "step": 2033 }, { "epoch": 0.6916014960897654, "grad_norm": 1.5657183822956182, "learning_rate": 2.2946901730534533e-06, "loss": 0.8238, "step": 2034 }, { "epoch": 0.6919415164909895, "grad_norm": 2.1179360693491556, "learning_rate": 2.29005993386969e-06, "loss": 0.6922, "step": 2035 }, { "epoch": 0.6922815368922135, "grad_norm": 2.838283362200117, "learning_rate": 2.285432982901979e-06, "loss": 0.7736, "step": 2036 }, { "epoch": 0.6926215572934376, "grad_norm": 1.9673971851860206, "learning_rate": 2.2808093257646184e-06, "loss": 0.8444, "step": 2037 }, { "epoch": 0.6929615776946617, "grad_norm": 2.112143965612654, "learning_rate": 2.2761889680679106e-06, "loss": 0.7465, "step": 2038 }, { "epoch": 0.6933015980958858, "grad_norm": 3.5658767108987313, "learning_rate": 2.271571915418157e-06, "loss": 0.7382, "step": 2039 }, { "epoch": 0.6936416184971098, "grad_norm": 1.7890203918330567, "learning_rate": 2.266958173417644e-06, "loss": 0.7754, "step": 2040 }, { "epoch": 0.6939816388983339, "grad_norm": 1.879135172415941, "learning_rate": 2.2623477476646447e-06, "loss": 0.9036, "step": 2041 }, { "epoch": 0.694321659299558, "grad_norm": 2.469200512596644, "learning_rate": 2.2577406437534055e-06, "loss": 0.7346, "step": 2042 }, { "epoch": 0.6946616797007821, "grad_norm": 1.7547922743933462, "learning_rate": 2.253136867274146e-06, "loss": 0.837, "step": 2043 }, { "epoch": 0.6950017001020061, "grad_norm": 2.0761435680953837, "learning_rate": 2.2485364238130435e-06, "loss": 0.7821, "step": 2044 }, { "epoch": 0.6953417205032302, "grad_norm": 1.868569645752557, "learning_rate": 2.243939318952234e-06, "loss": 0.8159, "step": 2045 }, { "epoch": 0.6956817409044542, "grad_norm": 2.2407320960892743, "learning_rate": 2.239345558269801e-06, "loss": 0.8396, "step": 2046 }, { "epoch": 0.6960217613056784, "grad_norm": 1.7022760601328137, "learning_rate": 2.23475514733977e-06, "loss": 0.817, "step": 2047 }, { "epoch": 0.6963617817069024, "grad_norm": 1.6301569903177133, "learning_rate": 2.230168091732106e-06, "loss": 0.615, "step": 2048 }, { "epoch": 0.6967018021081265, "grad_norm": 1.9046707956588385, "learning_rate": 2.2255843970126957e-06, "loss": 0.7858, "step": 2049 }, { "epoch": 0.6970418225093505, "grad_norm": 1.9673929880121703, "learning_rate": 2.221004068743356e-06, "loss": 0.845, "step": 2050 }, { "epoch": 0.6973818429105746, "grad_norm": 1.782595694166562, "learning_rate": 2.2164271124818103e-06, "loss": 0.7656, "step": 2051 }, { "epoch": 0.6977218633117988, "grad_norm": 1.6386330711294346, "learning_rate": 2.2118535337817003e-06, "loss": 0.7537, "step": 2052 }, { "epoch": 0.6980618837130228, "grad_norm": 1.4316705818253959, "learning_rate": 2.207283338192559e-06, "loss": 0.6975, "step": 2053 }, { "epoch": 0.6984019041142469, "grad_norm": 1.5611707150455605, "learning_rate": 2.2027165312598185e-06, "loss": 0.6761, "step": 2054 }, { "epoch": 0.6987419245154709, "grad_norm": 1.5355495259096092, "learning_rate": 2.1981531185248034e-06, "loss": 0.6972, "step": 2055 }, { "epoch": 0.699081944916695, "grad_norm": 1.9515735201013045, "learning_rate": 2.1935931055247127e-06, "loss": 0.7739, "step": 2056 }, { "epoch": 0.6994219653179191, "grad_norm": 1.707681607294952, "learning_rate": 2.1890364977926283e-06, "loss": 0.8014, "step": 2057 }, { "epoch": 0.6997619857191432, "grad_norm": 1.9416088276270818, "learning_rate": 2.18448330085749e-06, "loss": 0.7066, "step": 2058 }, { "epoch": 0.7001020061203672, "grad_norm": 1.816549981430375, "learning_rate": 2.1799335202441104e-06, "loss": 0.8464, "step": 2059 }, { "epoch": 0.7004420265215913, "grad_norm": 1.9376066291343164, "learning_rate": 2.1753871614731474e-06, "loss": 0.7222, "step": 2060 }, { "epoch": 0.7007820469228153, "grad_norm": 2.118492372862069, "learning_rate": 2.1708442300611115e-06, "loss": 0.7918, "step": 2061 }, { "epoch": 0.7011220673240395, "grad_norm": 2.0839760418604376, "learning_rate": 2.1663047315203533e-06, "loss": 0.8174, "step": 2062 }, { "epoch": 0.7014620877252635, "grad_norm": 2.3407523269538313, "learning_rate": 2.1617686713590557e-06, "loss": 0.7331, "step": 2063 }, { "epoch": 0.7018021081264876, "grad_norm": 2.3029622189762757, "learning_rate": 2.1572360550812354e-06, "loss": 0.8031, "step": 2064 }, { "epoch": 0.7021421285277116, "grad_norm": 2.1267697856713657, "learning_rate": 2.1527068881867243e-06, "loss": 0.7973, "step": 2065 }, { "epoch": 0.7024821489289358, "grad_norm": 1.8215858343618574, "learning_rate": 2.148181176171174e-06, "loss": 0.8117, "step": 2066 }, { "epoch": 0.7028221693301598, "grad_norm": 1.4157717285401026, "learning_rate": 2.1436589245260375e-06, "loss": 0.9047, "step": 2067 }, { "epoch": 0.7031621897313839, "grad_norm": 1.7979159155184197, "learning_rate": 2.1391401387385773e-06, "loss": 0.8326, "step": 2068 }, { "epoch": 0.7035022101326079, "grad_norm": 2.0914051675300835, "learning_rate": 2.134624824291846e-06, "loss": 0.8622, "step": 2069 }, { "epoch": 0.703842230533832, "grad_norm": 2.1070541206963056, "learning_rate": 2.1301129866646774e-06, "loss": 0.8943, "step": 2070 }, { "epoch": 0.7041822509350562, "grad_norm": 2.13463784164494, "learning_rate": 2.1256046313317002e-06, "loss": 0.8321, "step": 2071 }, { "epoch": 0.7045222713362802, "grad_norm": 2.5136301018463754, "learning_rate": 2.1210997637633067e-06, "loss": 0.7691, "step": 2072 }, { "epoch": 0.7048622917375043, "grad_norm": 2.2541601501295783, "learning_rate": 2.1165983894256647e-06, "loss": 0.7222, "step": 2073 }, { "epoch": 0.7052023121387283, "grad_norm": 2.98901817786412, "learning_rate": 2.1121005137806964e-06, "loss": 0.7528, "step": 2074 }, { "epoch": 0.7055423325399524, "grad_norm": 1.8580295018583073, "learning_rate": 2.1076061422860862e-06, "loss": 0.7779, "step": 2075 }, { "epoch": 0.7058823529411765, "grad_norm": 2.2207128533601983, "learning_rate": 2.1031152803952605e-06, "loss": 0.7673, "step": 2076 }, { "epoch": 0.7062223733424006, "grad_norm": 1.6735932994012201, "learning_rate": 2.098627933557389e-06, "loss": 0.8363, "step": 2077 }, { "epoch": 0.7065623937436246, "grad_norm": 1.5680321057110584, "learning_rate": 2.0941441072173766e-06, "loss": 0.7077, "step": 2078 }, { "epoch": 0.7069024141448487, "grad_norm": 1.8545485537631567, "learning_rate": 2.089663806815856e-06, "loss": 0.7437, "step": 2079 }, { "epoch": 0.7072424345460727, "grad_norm": 1.700070379165538, "learning_rate": 2.085187037789184e-06, "loss": 0.7529, "step": 2080 }, { "epoch": 0.7075824549472969, "grad_norm": 2.092387228234501, "learning_rate": 2.080713805569427e-06, "loss": 0.8072, "step": 2081 }, { "epoch": 0.7079224753485209, "grad_norm": 1.6576023640763133, "learning_rate": 2.0762441155843678e-06, "loss": 0.8356, "step": 2082 }, { "epoch": 0.708262495749745, "grad_norm": 2.096722406636028, "learning_rate": 2.071777973257482e-06, "loss": 0.6847, "step": 2083 }, { "epoch": 0.708602516150969, "grad_norm": 2.647186031515588, "learning_rate": 2.0673153840079502e-06, "loss": 0.8209, "step": 2084 }, { "epoch": 0.7089425365521931, "grad_norm": 1.772423654028428, "learning_rate": 2.0628563532506334e-06, "loss": 0.7487, "step": 2085 }, { "epoch": 0.7092825569534172, "grad_norm": 2.006215820360962, "learning_rate": 2.058400886396079e-06, "loss": 0.7881, "step": 2086 }, { "epoch": 0.7096225773546413, "grad_norm": 1.5471023818172427, "learning_rate": 2.053948988850508e-06, "loss": 0.8533, "step": 2087 }, { "epoch": 0.7099625977558653, "grad_norm": 1.765248777545704, "learning_rate": 2.0495006660158113e-06, "loss": 0.6848, "step": 2088 }, { "epoch": 0.7103026181570894, "grad_norm": 1.6747092561465118, "learning_rate": 2.045055923289544e-06, "loss": 0.7101, "step": 2089 }, { "epoch": 0.7106426385583134, "grad_norm": 1.9042522058118778, "learning_rate": 2.040614766064913e-06, "loss": 0.7763, "step": 2090 }, { "epoch": 0.7109826589595376, "grad_norm": 6.255027159940595, "learning_rate": 2.036177199730781e-06, "loss": 0.85, "step": 2091 }, { "epoch": 0.7113226793607617, "grad_norm": 1.622002495881479, "learning_rate": 2.0317432296716475e-06, "loss": 0.727, "step": 2092 }, { "epoch": 0.7116626997619857, "grad_norm": 1.691104238150303, "learning_rate": 2.0273128612676506e-06, "loss": 0.7537, "step": 2093 }, { "epoch": 0.7120027201632098, "grad_norm": 1.7231893078223932, "learning_rate": 2.0228860998945577e-06, "loss": 0.7642, "step": 2094 }, { "epoch": 0.7123427405644339, "grad_norm": 2.1757614848576345, "learning_rate": 2.0184629509237583e-06, "loss": 0.8258, "step": 2095 }, { "epoch": 0.712682760965658, "grad_norm": 1.7874598523749379, "learning_rate": 2.0140434197222647e-06, "loss": 0.8447, "step": 2096 }, { "epoch": 0.713022781366882, "grad_norm": 1.4437605429731617, "learning_rate": 2.00962751165269e-06, "loss": 0.8244, "step": 2097 }, { "epoch": 0.7133628017681061, "grad_norm": 2.0064545785073773, "learning_rate": 2.00521523207326e-06, "loss": 0.7231, "step": 2098 }, { "epoch": 0.7137028221693301, "grad_norm": 1.8662037760559647, "learning_rate": 2.0008065863377903e-06, "loss": 0.7298, "step": 2099 }, { "epoch": 0.7140428425705543, "grad_norm": 1.864146642080098, "learning_rate": 1.996401579795693e-06, "loss": 0.8252, "step": 2100 }, { "epoch": 0.7143828629717783, "grad_norm": 2.131314441947248, "learning_rate": 1.9920002177919622e-06, "loss": 0.8641, "step": 2101 }, { "epoch": 0.7147228833730024, "grad_norm": 1.6478848825280967, "learning_rate": 1.987602505667169e-06, "loss": 0.8314, "step": 2102 }, { "epoch": 0.7150629037742264, "grad_norm": 1.6965083554911622, "learning_rate": 1.983208448757455e-06, "loss": 0.7798, "step": 2103 }, { "epoch": 0.7154029241754505, "grad_norm": 2.2399140677663514, "learning_rate": 1.978818052394528e-06, "loss": 0.7183, "step": 2104 }, { "epoch": 0.7157429445766746, "grad_norm": 2.052486306229749, "learning_rate": 1.974431321905656e-06, "loss": 0.6849, "step": 2105 }, { "epoch": 0.7160829649778987, "grad_norm": 2.3591721996625266, "learning_rate": 1.9700482626136548e-06, "loss": 0.8216, "step": 2106 }, { "epoch": 0.7164229853791227, "grad_norm": 1.7604095436525182, "learning_rate": 1.9656688798368905e-06, "loss": 0.6536, "step": 2107 }, { "epoch": 0.7167630057803468, "grad_norm": 1.9615351287468117, "learning_rate": 1.9612931788892637e-06, "loss": 0.7843, "step": 2108 }, { "epoch": 0.7171030261815708, "grad_norm": 2.0592818898356997, "learning_rate": 1.956921165080208e-06, "loss": 0.7863, "step": 2109 }, { "epoch": 0.717443046582795, "grad_norm": 2.8058446206181253, "learning_rate": 1.9525528437146886e-06, "loss": 0.7241, "step": 2110 }, { "epoch": 0.7177830669840191, "grad_norm": 1.9945581002621289, "learning_rate": 1.9481882200931794e-06, "loss": 0.903, "step": 2111 }, { "epoch": 0.7181230873852431, "grad_norm": 2.180468609375338, "learning_rate": 1.94382729951168e-06, "loss": 0.8265, "step": 2112 }, { "epoch": 0.7184631077864672, "grad_norm": 2.623414185405911, "learning_rate": 1.9394700872616856e-06, "loss": 0.7801, "step": 2113 }, { "epoch": 0.7188031281876912, "grad_norm": 1.7582604702565359, "learning_rate": 1.9351165886302026e-06, "loss": 0.802, "step": 2114 }, { "epoch": 0.7191431485889154, "grad_norm": 2.5152283185802533, "learning_rate": 1.9307668088997206e-06, "loss": 0.8063, "step": 2115 }, { "epoch": 0.7194831689901394, "grad_norm": 2.7666858070153437, "learning_rate": 1.9264207533482264e-06, "loss": 0.7285, "step": 2116 }, { "epoch": 0.7198231893913635, "grad_norm": 2.069299953932944, "learning_rate": 1.922078427249181e-06, "loss": 0.6232, "step": 2117 }, { "epoch": 0.7201632097925875, "grad_norm": 1.9669276644036997, "learning_rate": 1.917739835871523e-06, "loss": 0.8435, "step": 2118 }, { "epoch": 0.7205032301938117, "grad_norm": 2.352513634957127, "learning_rate": 1.9134049844796583e-06, "loss": 0.7437, "step": 2119 }, { "epoch": 0.7208432505950357, "grad_norm": 1.7010870421085014, "learning_rate": 1.9090738783334535e-06, "loss": 0.8334, "step": 2120 }, { "epoch": 0.7211832709962598, "grad_norm": 1.8286429902827022, "learning_rate": 1.904746522688236e-06, "loss": 0.8278, "step": 2121 }, { "epoch": 0.7215232913974838, "grad_norm": 2.204105230258611, "learning_rate": 1.9004229227947752e-06, "loss": 0.7401, "step": 2122 }, { "epoch": 0.7218633117987079, "grad_norm": 2.1051613276517003, "learning_rate": 1.896103083899291e-06, "loss": 0.7424, "step": 2123 }, { "epoch": 0.722203332199932, "grad_norm": 1.913948718818119, "learning_rate": 1.891787011243434e-06, "loss": 0.9045, "step": 2124 }, { "epoch": 0.7225433526011561, "grad_norm": 1.5026786148699889, "learning_rate": 1.8874747100642844e-06, "loss": 0.7094, "step": 2125 }, { "epoch": 0.7228833730023801, "grad_norm": 2.46070363197272, "learning_rate": 1.8831661855943517e-06, "loss": 0.7417, "step": 2126 }, { "epoch": 0.7232233934036042, "grad_norm": 1.7241794429729518, "learning_rate": 1.8788614430615582e-06, "loss": 0.7184, "step": 2127 }, { "epoch": 0.7235634138048282, "grad_norm": 1.7709850180647637, "learning_rate": 1.8745604876892376e-06, "loss": 0.8362, "step": 2128 }, { "epoch": 0.7239034342060524, "grad_norm": 2.062995631686198, "learning_rate": 1.8702633246961282e-06, "loss": 0.7107, "step": 2129 }, { "epoch": 0.7242434546072765, "grad_norm": 1.7639145613774367, "learning_rate": 1.8659699592963705e-06, "loss": 0.7337, "step": 2130 }, { "epoch": 0.7245834750085005, "grad_norm": 1.6485206528522929, "learning_rate": 1.8616803966994912e-06, "loss": 0.7445, "step": 2131 }, { "epoch": 0.7249234954097246, "grad_norm": 2.2432367551430485, "learning_rate": 1.8573946421104082e-06, "loss": 0.7921, "step": 2132 }, { "epoch": 0.7252635158109486, "grad_norm": 2.4352161818113176, "learning_rate": 1.8531127007294159e-06, "loss": 0.8438, "step": 2133 }, { "epoch": 0.7256035362121728, "grad_norm": 1.7595604425268574, "learning_rate": 1.8488345777521804e-06, "loss": 0.7116, "step": 2134 }, { "epoch": 0.7259435566133968, "grad_norm": 1.9847525687521061, "learning_rate": 1.8445602783697375e-06, "loss": 0.7425, "step": 2135 }, { "epoch": 0.7262835770146209, "grad_norm": 2.6735934681474363, "learning_rate": 1.8402898077684806e-06, "loss": 0.7244, "step": 2136 }, { "epoch": 0.7266235974158449, "grad_norm": 1.9110660509672215, "learning_rate": 1.8360231711301618e-06, "loss": 0.8195, "step": 2137 }, { "epoch": 0.726963617817069, "grad_norm": 2.047331681525153, "learning_rate": 1.8317603736318746e-06, "loss": 0.8365, "step": 2138 }, { "epoch": 0.7273036382182931, "grad_norm": 1.6690303524368486, "learning_rate": 1.8275014204460623e-06, "loss": 0.7402, "step": 2139 }, { "epoch": 0.7276436586195172, "grad_norm": 3.9059200473743965, "learning_rate": 1.8232463167404968e-06, "loss": 0.7426, "step": 2140 }, { "epoch": 0.7279836790207412, "grad_norm": 1.932200172688164, "learning_rate": 1.818995067678279e-06, "loss": 0.7984, "step": 2141 }, { "epoch": 0.7283236994219653, "grad_norm": 1.666195338393341, "learning_rate": 1.8147476784178398e-06, "loss": 0.8126, "step": 2142 }, { "epoch": 0.7286637198231893, "grad_norm": 1.3626844223285068, "learning_rate": 1.8105041541129187e-06, "loss": 0.7733, "step": 2143 }, { "epoch": 0.7290037402244135, "grad_norm": 2.5378369721423577, "learning_rate": 1.8062644999125694e-06, "loss": 0.7947, "step": 2144 }, { "epoch": 0.7293437606256375, "grad_norm": 2.071953765717228, "learning_rate": 1.8020287209611464e-06, "loss": 0.7677, "step": 2145 }, { "epoch": 0.7296837810268616, "grad_norm": 2.161502417367569, "learning_rate": 1.7977968223983089e-06, "loss": 0.8209, "step": 2146 }, { "epoch": 0.7300238014280857, "grad_norm": 1.736684016690494, "learning_rate": 1.7935688093589987e-06, "loss": 0.7406, "step": 2147 }, { "epoch": 0.7303638218293098, "grad_norm": 1.6090760426244202, "learning_rate": 1.789344686973452e-06, "loss": 0.7293, "step": 2148 }, { "epoch": 0.7307038422305339, "grad_norm": 1.5720198527469798, "learning_rate": 1.785124460367177e-06, "loss": 0.7335, "step": 2149 }, { "epoch": 0.7310438626317579, "grad_norm": 2.8678996816857842, "learning_rate": 1.7809081346609574e-06, "loss": 0.7563, "step": 2150 }, { "epoch": 0.731383883032982, "grad_norm": 1.7011052505024924, "learning_rate": 1.7766957149708442e-06, "loss": 0.805, "step": 2151 }, { "epoch": 0.731723903434206, "grad_norm": 2.179333898822292, "learning_rate": 1.7724872064081461e-06, "loss": 0.7768, "step": 2152 }, { "epoch": 0.7320639238354302, "grad_norm": 1.9214766322552161, "learning_rate": 1.768282614079432e-06, "loss": 0.8926, "step": 2153 }, { "epoch": 0.7324039442366542, "grad_norm": 1.6016132177586981, "learning_rate": 1.7640819430865113e-06, "loss": 0.7477, "step": 2154 }, { "epoch": 0.7327439646378783, "grad_norm": 1.7429702206342037, "learning_rate": 1.7598851985264426e-06, "loss": 0.7465, "step": 2155 }, { "epoch": 0.7330839850391023, "grad_norm": 3.780369551041426, "learning_rate": 1.7556923854915148e-06, "loss": 0.8328, "step": 2156 }, { "epoch": 0.7334240054403264, "grad_norm": 5.582776086223021, "learning_rate": 1.7515035090692466e-06, "loss": 0.7591, "step": 2157 }, { "epoch": 0.7337640258415505, "grad_norm": 1.75407838644776, "learning_rate": 1.7473185743423853e-06, "loss": 0.8702, "step": 2158 }, { "epoch": 0.7341040462427746, "grad_norm": 3.88944154897513, "learning_rate": 1.74313758638889e-06, "loss": 0.7773, "step": 2159 }, { "epoch": 0.7344440666439986, "grad_norm": 1.7976911792803083, "learning_rate": 1.7389605502819324e-06, "loss": 0.7521, "step": 2160 }, { "epoch": 0.7347840870452227, "grad_norm": 2.645410770587803, "learning_rate": 1.734787471089887e-06, "loss": 0.7529, "step": 2161 }, { "epoch": 0.7351241074464467, "grad_norm": 2.2156116956693133, "learning_rate": 1.730618353876334e-06, "loss": 0.7916, "step": 2162 }, { "epoch": 0.7354641278476709, "grad_norm": 2.6170696483726616, "learning_rate": 1.726453203700037e-06, "loss": 0.8291, "step": 2163 }, { "epoch": 0.735804148248895, "grad_norm": 1.7461843428392605, "learning_rate": 1.7222920256149544e-06, "loss": 0.8014, "step": 2164 }, { "epoch": 0.736144168650119, "grad_norm": 2.1471971587915606, "learning_rate": 1.7181348246702184e-06, "loss": 0.908, "step": 2165 }, { "epoch": 0.736484189051343, "grad_norm": 1.6110057508169202, "learning_rate": 1.7139816059101372e-06, "loss": 0.8735, "step": 2166 }, { "epoch": 0.7368242094525671, "grad_norm": 1.7252936307832583, "learning_rate": 1.7098323743741906e-06, "loss": 0.8038, "step": 2167 }, { "epoch": 0.7371642298537913, "grad_norm": 1.4751364286474202, "learning_rate": 1.705687135097016e-06, "loss": 0.7546, "step": 2168 }, { "epoch": 0.7375042502550153, "grad_norm": 1.8792991681150337, "learning_rate": 1.7015458931084084e-06, "loss": 0.7799, "step": 2169 }, { "epoch": 0.7378442706562394, "grad_norm": 1.9139596611161407, "learning_rate": 1.69740865343331e-06, "loss": 0.7647, "step": 2170 }, { "epoch": 0.7381842910574634, "grad_norm": 2.90776955170561, "learning_rate": 1.6932754210918133e-06, "loss": 0.8511, "step": 2171 }, { "epoch": 0.7385243114586875, "grad_norm": 1.593958716915834, "learning_rate": 1.689146201099141e-06, "loss": 0.7145, "step": 2172 }, { "epoch": 0.7388643318599116, "grad_norm": 2.13248225387608, "learning_rate": 1.6850209984656497e-06, "loss": 0.8358, "step": 2173 }, { "epoch": 0.7392043522611357, "grad_norm": 1.5208790512501984, "learning_rate": 1.6808998181968238e-06, "loss": 0.8051, "step": 2174 }, { "epoch": 0.7395443726623597, "grad_norm": 1.6847664492358727, "learning_rate": 1.6767826652932651e-06, "loss": 0.6941, "step": 2175 }, { "epoch": 0.7398843930635838, "grad_norm": 2.169697482882676, "learning_rate": 1.6726695447506873e-06, "loss": 0.6935, "step": 2176 }, { "epoch": 0.7402244134648079, "grad_norm": 2.4175688054476634, "learning_rate": 1.6685604615599117e-06, "loss": 0.7608, "step": 2177 }, { "epoch": 0.740564433866032, "grad_norm": 1.9499646893695743, "learning_rate": 1.6644554207068642e-06, "loss": 0.6843, "step": 2178 }, { "epoch": 0.740904454267256, "grad_norm": 3.3649722434795937, "learning_rate": 1.6603544271725607e-06, "loss": 0.7127, "step": 2179 }, { "epoch": 0.7412444746684801, "grad_norm": 1.9340583992381386, "learning_rate": 1.656257485933111e-06, "loss": 0.6918, "step": 2180 }, { "epoch": 0.7415844950697041, "grad_norm": 2.1990875141504214, "learning_rate": 1.652164601959705e-06, "loss": 0.7244, "step": 2181 }, { "epoch": 0.7419245154709283, "grad_norm": 1.6090430714552022, "learning_rate": 1.648075780218607e-06, "loss": 0.7215, "step": 2182 }, { "epoch": 0.7422645358721524, "grad_norm": 2.1536726028566986, "learning_rate": 1.6439910256711595e-06, "loss": 0.7827, "step": 2183 }, { "epoch": 0.7426045562733764, "grad_norm": 2.073708232272958, "learning_rate": 1.6399103432737635e-06, "loss": 0.7036, "step": 2184 }, { "epoch": 0.7429445766746005, "grad_norm": 2.593918036362238, "learning_rate": 1.635833737977881e-06, "loss": 0.8041, "step": 2185 }, { "epoch": 0.7432845970758245, "grad_norm": 2.958691737480463, "learning_rate": 1.631761214730026e-06, "loss": 0.7823, "step": 2186 }, { "epoch": 0.7436246174770487, "grad_norm": 2.511544350856816, "learning_rate": 1.6276927784717628e-06, "loss": 0.8576, "step": 2187 }, { "epoch": 0.7439646378782727, "grad_norm": 2.9344433102702556, "learning_rate": 1.623628434139693e-06, "loss": 0.8594, "step": 2188 }, { "epoch": 0.7443046582794968, "grad_norm": 1.8620840535756518, "learning_rate": 1.6195681866654517e-06, "loss": 0.7979, "step": 2189 }, { "epoch": 0.7446446786807208, "grad_norm": 2.3243589724742857, "learning_rate": 1.6155120409757096e-06, "loss": 0.8446, "step": 2190 }, { "epoch": 0.7449846990819449, "grad_norm": 3.875803179972586, "learning_rate": 1.6114600019921538e-06, "loss": 0.8572, "step": 2191 }, { "epoch": 0.745324719483169, "grad_norm": 1.7545550229086833, "learning_rate": 1.6074120746314915e-06, "loss": 0.8357, "step": 2192 }, { "epoch": 0.7456647398843931, "grad_norm": 2.7660506150712343, "learning_rate": 1.6033682638054376e-06, "loss": 0.7566, "step": 2193 }, { "epoch": 0.7460047602856171, "grad_norm": 2.208599221707356, "learning_rate": 1.5993285744207183e-06, "loss": 0.8451, "step": 2194 }, { "epoch": 0.7463447806868412, "grad_norm": 1.6557633542709016, "learning_rate": 1.5952930113790516e-06, "loss": 0.7773, "step": 2195 }, { "epoch": 0.7466848010880652, "grad_norm": 1.992062183353188, "learning_rate": 1.5912615795771557e-06, "loss": 0.8025, "step": 2196 }, { "epoch": 0.7470248214892894, "grad_norm": 2.6192247388362597, "learning_rate": 1.5872342839067305e-06, "loss": 0.7836, "step": 2197 }, { "epoch": 0.7473648418905134, "grad_norm": 1.6643325962419526, "learning_rate": 1.5832111292544571e-06, "loss": 0.6351, "step": 2198 }, { "epoch": 0.7477048622917375, "grad_norm": 1.7740267079319336, "learning_rate": 1.5791921205019984e-06, "loss": 0.7505, "step": 2199 }, { "epoch": 0.7480448826929615, "grad_norm": 3.3050676756035293, "learning_rate": 1.5751772625259787e-06, "loss": 0.871, "step": 2200 }, { "epoch": 0.7483849030941857, "grad_norm": 2.1038805512690875, "learning_rate": 1.571166560197991e-06, "loss": 0.7385, "step": 2201 }, { "epoch": 0.7487249234954098, "grad_norm": 2.7781533096347144, "learning_rate": 1.567160018384582e-06, "loss": 0.6283, "step": 2202 }, { "epoch": 0.7490649438966338, "grad_norm": 1.8302184287905139, "learning_rate": 1.563157641947255e-06, "loss": 0.7385, "step": 2203 }, { "epoch": 0.7494049642978579, "grad_norm": 2.5596522162134363, "learning_rate": 1.5591594357424555e-06, "loss": 0.8839, "step": 2204 }, { "epoch": 0.7497449846990819, "grad_norm": 2.120816300981687, "learning_rate": 1.555165404621567e-06, "loss": 0.7999, "step": 2205 }, { "epoch": 0.7500850051003061, "grad_norm": 2.3376616281310407, "learning_rate": 1.5511755534309143e-06, "loss": 0.8791, "step": 2206 }, { "epoch": 0.7504250255015301, "grad_norm": 1.9094661848594028, "learning_rate": 1.5471898870117414e-06, "loss": 0.8485, "step": 2207 }, { "epoch": 0.7507650459027542, "grad_norm": 1.7917052477602944, "learning_rate": 1.5432084102002243e-06, "loss": 0.7979, "step": 2208 }, { "epoch": 0.7511050663039782, "grad_norm": 2.0422812529030425, "learning_rate": 1.539231127827443e-06, "loss": 0.789, "step": 2209 }, { "epoch": 0.7514450867052023, "grad_norm": 2.671426125801568, "learning_rate": 1.5352580447194e-06, "loss": 0.7645, "step": 2210 }, { "epoch": 0.7517851071064264, "grad_norm": 2.107626624470792, "learning_rate": 1.5312891656969936e-06, "loss": 0.7658, "step": 2211 }, { "epoch": 0.7521251275076505, "grad_norm": 1.8158680120697543, "learning_rate": 1.5273244955760286e-06, "loss": 0.7953, "step": 2212 }, { "epoch": 0.7524651479088745, "grad_norm": 1.978438021845548, "learning_rate": 1.5233640391671973e-06, "loss": 0.8217, "step": 2213 }, { "epoch": 0.7528051683100986, "grad_norm": 2.2654036793743955, "learning_rate": 1.5194078012760781e-06, "loss": 0.7727, "step": 2214 }, { "epoch": 0.7531451887113226, "grad_norm": 1.654479252128216, "learning_rate": 1.5154557867031378e-06, "loss": 0.7005, "step": 2215 }, { "epoch": 0.7534852091125468, "grad_norm": 1.9670315140819592, "learning_rate": 1.511508000243711e-06, "loss": 0.8233, "step": 2216 }, { "epoch": 0.7538252295137708, "grad_norm": 1.6308197393473027, "learning_rate": 1.5075644466880063e-06, "loss": 0.7718, "step": 2217 }, { "epoch": 0.7541652499149949, "grad_norm": 1.9162779769547262, "learning_rate": 1.5036251308210926e-06, "loss": 0.7515, "step": 2218 }, { "epoch": 0.754505270316219, "grad_norm": 2.9000813161046404, "learning_rate": 1.4996900574229022e-06, "loss": 0.8187, "step": 2219 }, { "epoch": 0.754845290717443, "grad_norm": 2.216886994384, "learning_rate": 1.4957592312682157e-06, "loss": 0.7672, "step": 2220 }, { "epoch": 0.7551853111186672, "grad_norm": 1.586989789116563, "learning_rate": 1.4918326571266584e-06, "loss": 0.7531, "step": 2221 }, { "epoch": 0.7555253315198912, "grad_norm": 1.8489815625024457, "learning_rate": 1.4879103397627027e-06, "loss": 0.7646, "step": 2222 }, { "epoch": 0.7558653519211153, "grad_norm": 1.8544337375478892, "learning_rate": 1.4839922839356484e-06, "loss": 0.7514, "step": 2223 }, { "epoch": 0.7562053723223393, "grad_norm": 2.344521934054793, "learning_rate": 1.4800784943996316e-06, "loss": 0.7807, "step": 2224 }, { "epoch": 0.7565453927235634, "grad_norm": 2.653629676503456, "learning_rate": 1.4761689759036058e-06, "loss": 0.8042, "step": 2225 }, { "epoch": 0.7568854131247875, "grad_norm": 1.6279456912028547, "learning_rate": 1.4722637331913447e-06, "loss": 0.6855, "step": 2226 }, { "epoch": 0.7572254335260116, "grad_norm": 2.235296588127983, "learning_rate": 1.4683627710014325e-06, "loss": 0.7996, "step": 2227 }, { "epoch": 0.7575654539272356, "grad_norm": 1.9873774972236244, "learning_rate": 1.4644660940672628e-06, "loss": 0.7425, "step": 2228 }, { "epoch": 0.7579054743284597, "grad_norm": 1.8195331106101174, "learning_rate": 1.4605737071170257e-06, "loss": 0.7902, "step": 2229 }, { "epoch": 0.7582454947296838, "grad_norm": 2.0299601573336705, "learning_rate": 1.4566856148737057e-06, "loss": 0.6815, "step": 2230 }, { "epoch": 0.7585855151309079, "grad_norm": 1.400935242806698, "learning_rate": 1.452801822055081e-06, "loss": 0.7916, "step": 2231 }, { "epoch": 0.7589255355321319, "grad_norm": 1.7386391395292276, "learning_rate": 1.4489223333737084e-06, "loss": 0.8002, "step": 2232 }, { "epoch": 0.759265555933356, "grad_norm": 1.73667800084222, "learning_rate": 1.4450471535369225e-06, "loss": 0.7085, "step": 2233 }, { "epoch": 0.75960557633458, "grad_norm": 1.739383492547159, "learning_rate": 1.44117628724683e-06, "loss": 0.8655, "step": 2234 }, { "epoch": 0.7599455967358042, "grad_norm": 1.8392317788494839, "learning_rate": 1.437309739200306e-06, "loss": 0.7253, "step": 2235 }, { "epoch": 0.7602856171370282, "grad_norm": 1.546424794533531, "learning_rate": 1.4334475140889813e-06, "loss": 0.7947, "step": 2236 }, { "epoch": 0.7606256375382523, "grad_norm": 2.339707672874242, "learning_rate": 1.4295896165992473e-06, "loss": 0.8063, "step": 2237 }, { "epoch": 0.7609656579394763, "grad_norm": 1.6335877702583506, "learning_rate": 1.4257360514122393e-06, "loss": 0.7126, "step": 2238 }, { "epoch": 0.7613056783407004, "grad_norm": 1.9927357472606342, "learning_rate": 1.4218868232038351e-06, "loss": 0.8169, "step": 2239 }, { "epoch": 0.7616456987419246, "grad_norm": 1.9336783631243173, "learning_rate": 1.4180419366446568e-06, "loss": 0.8019, "step": 2240 }, { "epoch": 0.7619857191431486, "grad_norm": 1.8763655884472532, "learning_rate": 1.4142013964000513e-06, "loss": 0.8054, "step": 2241 }, { "epoch": 0.7623257395443727, "grad_norm": 2.025901859532698, "learning_rate": 1.4103652071300945e-06, "loss": 0.8657, "step": 2242 }, { "epoch": 0.7626657599455967, "grad_norm": 1.9142987689118731, "learning_rate": 1.4065333734895815e-06, "loss": 0.8067, "step": 2243 }, { "epoch": 0.7630057803468208, "grad_norm": 1.8673978585090811, "learning_rate": 1.4027059001280269e-06, "loss": 0.7602, "step": 2244 }, { "epoch": 0.7633458007480449, "grad_norm": 1.5244016754272622, "learning_rate": 1.3988827916896491e-06, "loss": 0.761, "step": 2245 }, { "epoch": 0.763685821149269, "grad_norm": 1.6762856131051267, "learning_rate": 1.3950640528133713e-06, "loss": 0.8457, "step": 2246 }, { "epoch": 0.764025841550493, "grad_norm": 1.696468220368342, "learning_rate": 1.3912496881328185e-06, "loss": 0.6888, "step": 2247 }, { "epoch": 0.7643658619517171, "grad_norm": 1.807989274352168, "learning_rate": 1.3874397022763024e-06, "loss": 0.7174, "step": 2248 }, { "epoch": 0.7647058823529411, "grad_norm": 2.1049766319752674, "learning_rate": 1.3836340998668284e-06, "loss": 0.8443, "step": 2249 }, { "epoch": 0.7650459027541653, "grad_norm": 4.147430162429283, "learning_rate": 1.379832885522074e-06, "loss": 0.8293, "step": 2250 }, { "epoch": 0.7653859231553893, "grad_norm": 2.203536974887446, "learning_rate": 1.3760360638544012e-06, "loss": 0.766, "step": 2251 }, { "epoch": 0.7657259435566134, "grad_norm": 2.84334808411539, "learning_rate": 1.3722436394708349e-06, "loss": 0.7397, "step": 2252 }, { "epoch": 0.7660659639578374, "grad_norm": 2.4606094795939875, "learning_rate": 1.3684556169730706e-06, "loss": 0.8327, "step": 2253 }, { "epoch": 0.7664059843590616, "grad_norm": 3.801030903497411, "learning_rate": 1.3646720009574582e-06, "loss": 0.7486, "step": 2254 }, { "epoch": 0.7667460047602856, "grad_norm": 2.2370175205395, "learning_rate": 1.3608927960150008e-06, "loss": 0.8567, "step": 2255 }, { "epoch": 0.7670860251615097, "grad_norm": 1.8816253225209865, "learning_rate": 1.3571180067313539e-06, "loss": 0.8999, "step": 2256 }, { "epoch": 0.7674260455627337, "grad_norm": 2.383535780477955, "learning_rate": 1.3533476376868088e-06, "loss": 0.7714, "step": 2257 }, { "epoch": 0.7677660659639578, "grad_norm": 1.672812334875347, "learning_rate": 1.3495816934562976e-06, "loss": 0.7594, "step": 2258 }, { "epoch": 0.768106086365182, "grad_norm": 1.771870165986916, "learning_rate": 1.3458201786093795e-06, "loss": 0.8326, "step": 2259 }, { "epoch": 0.768446106766406, "grad_norm": 1.681066649638471, "learning_rate": 1.3420630977102455e-06, "loss": 0.6953, "step": 2260 }, { "epoch": 0.7687861271676301, "grad_norm": 2.0034594737419833, "learning_rate": 1.3383104553177001e-06, "loss": 0.8098, "step": 2261 }, { "epoch": 0.7691261475688541, "grad_norm": 1.9711809712880015, "learning_rate": 1.334562255985164e-06, "loss": 0.6649, "step": 2262 }, { "epoch": 0.7694661679700782, "grad_norm": 1.7828734904005346, "learning_rate": 1.3308185042606698e-06, "loss": 0.8103, "step": 2263 }, { "epoch": 0.7698061883713023, "grad_norm": 1.8609944720601692, "learning_rate": 1.3270792046868486e-06, "loss": 0.8238, "step": 2264 }, { "epoch": 0.7701462087725264, "grad_norm": 1.7934456884377525, "learning_rate": 1.323344361800934e-06, "loss": 0.7776, "step": 2265 }, { "epoch": 0.7704862291737504, "grad_norm": 1.7275631998115004, "learning_rate": 1.3196139801347485e-06, "loss": 0.8547, "step": 2266 }, { "epoch": 0.7708262495749745, "grad_norm": 2.7409306203098707, "learning_rate": 1.3158880642147026e-06, "loss": 0.7145, "step": 2267 }, { "epoch": 0.7711662699761985, "grad_norm": 2.087778768589785, "learning_rate": 1.3121666185617859e-06, "loss": 0.726, "step": 2268 }, { "epoch": 0.7715062903774227, "grad_norm": 1.780483085521547, "learning_rate": 1.3084496476915698e-06, "loss": 0.8217, "step": 2269 }, { "epoch": 0.7718463107786467, "grad_norm": 2.8825219877755415, "learning_rate": 1.3047371561141903e-06, "loss": 0.8109, "step": 2270 }, { "epoch": 0.7721863311798708, "grad_norm": 1.9288735242516986, "learning_rate": 1.3010291483343478e-06, "loss": 0.812, "step": 2271 }, { "epoch": 0.7725263515810948, "grad_norm": 2.431431619077142, "learning_rate": 1.2973256288513086e-06, "loss": 0.8052, "step": 2272 }, { "epoch": 0.7728663719823189, "grad_norm": 1.7830394043812279, "learning_rate": 1.2936266021588872e-06, "loss": 0.6701, "step": 2273 }, { "epoch": 0.773206392383543, "grad_norm": 2.105756285208052, "learning_rate": 1.2899320727454472e-06, "loss": 0.82, "step": 2274 }, { "epoch": 0.7735464127847671, "grad_norm": 5.425208376062677, "learning_rate": 1.2862420450938955e-06, "loss": 0.743, "step": 2275 }, { "epoch": 0.7738864331859912, "grad_norm": 1.827529432109907, "learning_rate": 1.28255652368168e-06, "loss": 0.7528, "step": 2276 }, { "epoch": 0.7742264535872152, "grad_norm": 2.0046945708038524, "learning_rate": 1.2788755129807767e-06, "loss": 0.8446, "step": 2277 }, { "epoch": 0.7745664739884393, "grad_norm": 1.947473671747887, "learning_rate": 1.2751990174576883e-06, "loss": 0.7801, "step": 2278 }, { "epoch": 0.7749064943896634, "grad_norm": 1.730669713034862, "learning_rate": 1.2715270415734425e-06, "loss": 0.7308, "step": 2279 }, { "epoch": 0.7752465147908875, "grad_norm": 2.0448368740503335, "learning_rate": 1.2678595897835788e-06, "loss": 0.7239, "step": 2280 }, { "epoch": 0.7755865351921115, "grad_norm": 1.7996589672894716, "learning_rate": 1.2641966665381517e-06, "loss": 0.8383, "step": 2281 }, { "epoch": 0.7759265555933356, "grad_norm": 1.9501431444735777, "learning_rate": 1.2605382762817164e-06, "loss": 0.7416, "step": 2282 }, { "epoch": 0.7762665759945597, "grad_norm": 2.44528318223147, "learning_rate": 1.2568844234533294e-06, "loss": 0.867, "step": 2283 }, { "epoch": 0.7766065963957838, "grad_norm": 1.5135812296404374, "learning_rate": 1.253235112486541e-06, "loss": 0.7896, "step": 2284 }, { "epoch": 0.7769466167970078, "grad_norm": 1.6910504402544813, "learning_rate": 1.249590347809393e-06, "loss": 0.8092, "step": 2285 }, { "epoch": 0.7772866371982319, "grad_norm": 2.0907001787083623, "learning_rate": 1.2459501338444085e-06, "loss": 0.7999, "step": 2286 }, { "epoch": 0.7776266575994559, "grad_norm": 1.7084265208677907, "learning_rate": 1.2423144750085875e-06, "loss": 0.8109, "step": 2287 }, { "epoch": 0.7779666780006801, "grad_norm": 1.7912086159226683, "learning_rate": 1.2386833757134076e-06, "loss": 0.7468, "step": 2288 }, { "epoch": 0.7783066984019041, "grad_norm": 1.9542261253674484, "learning_rate": 1.2350568403648088e-06, "loss": 0.7268, "step": 2289 }, { "epoch": 0.7786467188031282, "grad_norm": 1.4777106097697732, "learning_rate": 1.2314348733631958e-06, "loss": 0.7642, "step": 2290 }, { "epoch": 0.7789867392043522, "grad_norm": 2.1056584699669814, "learning_rate": 1.2278174791034281e-06, "loss": 0.8599, "step": 2291 }, { "epoch": 0.7793267596055763, "grad_norm": 2.1244138537440156, "learning_rate": 1.224204661974821e-06, "loss": 0.7469, "step": 2292 }, { "epoch": 0.7796667800068005, "grad_norm": 1.842651124665579, "learning_rate": 1.2205964263611325e-06, "loss": 0.7238, "step": 2293 }, { "epoch": 0.7800068004080245, "grad_norm": 9.972026665603295, "learning_rate": 1.2169927766405598e-06, "loss": 0.753, "step": 2294 }, { "epoch": 0.7803468208092486, "grad_norm": 1.8250005499709825, "learning_rate": 1.2133937171857406e-06, "loss": 0.7459, "step": 2295 }, { "epoch": 0.7806868412104726, "grad_norm": 1.4938745812322698, "learning_rate": 1.2097992523637387e-06, "loss": 0.741, "step": 2296 }, { "epoch": 0.7810268616116967, "grad_norm": 1.8847996493887464, "learning_rate": 1.2062093865360458e-06, "loss": 0.8275, "step": 2297 }, { "epoch": 0.7813668820129208, "grad_norm": 1.7228031928755019, "learning_rate": 1.2026241240585702e-06, "loss": 0.75, "step": 2298 }, { "epoch": 0.7817069024141449, "grad_norm": 2.501687535032944, "learning_rate": 1.1990434692816367e-06, "loss": 0.7823, "step": 2299 }, { "epoch": 0.7820469228153689, "grad_norm": 2.7653925468074614, "learning_rate": 1.1954674265499773e-06, "loss": 0.681, "step": 2300 }, { "epoch": 0.782386943216593, "grad_norm": 2.3143819049767864, "learning_rate": 1.1918960002027308e-06, "loss": 0.8237, "step": 2301 }, { "epoch": 0.782726963617817, "grad_norm": 1.5824914893100979, "learning_rate": 1.1883291945734315e-06, "loss": 0.7691, "step": 2302 }, { "epoch": 0.7830669840190412, "grad_norm": 2.2301539841962708, "learning_rate": 1.1847670139900074e-06, "loss": 0.7281, "step": 2303 }, { "epoch": 0.7834070044202652, "grad_norm": 2.4915256532784738, "learning_rate": 1.1812094627747777e-06, "loss": 0.7732, "step": 2304 }, { "epoch": 0.7837470248214893, "grad_norm": 2.2299044959118848, "learning_rate": 1.1776565452444389e-06, "loss": 0.7285, "step": 2305 }, { "epoch": 0.7840870452227133, "grad_norm": 1.8798158077309424, "learning_rate": 1.174108265710071e-06, "loss": 0.8002, "step": 2306 }, { "epoch": 0.7844270656239375, "grad_norm": 2.035534060132518, "learning_rate": 1.1705646284771227e-06, "loss": 0.729, "step": 2307 }, { "epoch": 0.7847670860251615, "grad_norm": 1.7825151695794803, "learning_rate": 1.1670256378454093e-06, "loss": 0.7919, "step": 2308 }, { "epoch": 0.7851071064263856, "grad_norm": 2.0755250446855404, "learning_rate": 1.1634912981091096e-06, "loss": 0.801, "step": 2309 }, { "epoch": 0.7854471268276096, "grad_norm": 1.9145697285689294, "learning_rate": 1.159961613556757e-06, "loss": 0.7888, "step": 2310 }, { "epoch": 0.7857871472288337, "grad_norm": 2.071282232646433, "learning_rate": 1.1564365884712409e-06, "loss": 0.8008, "step": 2311 }, { "epoch": 0.7861271676300579, "grad_norm": 1.8475994726429972, "learning_rate": 1.1529162271297912e-06, "loss": 0.7505, "step": 2312 }, { "epoch": 0.7864671880312819, "grad_norm": 1.8531724346083438, "learning_rate": 1.1494005338039839e-06, "loss": 0.7435, "step": 2313 }, { "epoch": 0.786807208432506, "grad_norm": 2.362438597585831, "learning_rate": 1.1458895127597275e-06, "loss": 0.7681, "step": 2314 }, { "epoch": 0.78714722883373, "grad_norm": 2.2288635557885463, "learning_rate": 1.1423831682572623e-06, "loss": 0.7871, "step": 2315 }, { "epoch": 0.7874872492349541, "grad_norm": 1.5469503925739951, "learning_rate": 1.1388815045511525e-06, "loss": 0.7279, "step": 2316 }, { "epoch": 0.7878272696361782, "grad_norm": 2.0731404202763626, "learning_rate": 1.1353845258902867e-06, "loss": 0.788, "step": 2317 }, { "epoch": 0.7881672900374023, "grad_norm": 2.262319333024703, "learning_rate": 1.131892236517866e-06, "loss": 0.6889, "step": 2318 }, { "epoch": 0.7885073104386263, "grad_norm": 2.157562568854031, "learning_rate": 1.1284046406713994e-06, "loss": 0.6274, "step": 2319 }, { "epoch": 0.7888473308398504, "grad_norm": 1.8322331540862034, "learning_rate": 1.1249217425827063e-06, "loss": 0.7697, "step": 2320 }, { "epoch": 0.7891873512410744, "grad_norm": 1.9705120789647137, "learning_rate": 1.1214435464779006e-06, "loss": 0.8014, "step": 2321 }, { "epoch": 0.7895273716422986, "grad_norm": 1.620994185215544, "learning_rate": 1.117970056577395e-06, "loss": 0.7806, "step": 2322 }, { "epoch": 0.7898673920435226, "grad_norm": 2.1814551902341357, "learning_rate": 1.1145012770958885e-06, "loss": 0.7728, "step": 2323 }, { "epoch": 0.7902074124447467, "grad_norm": 2.2535509074314417, "learning_rate": 1.1110372122423663e-06, "loss": 0.7814, "step": 2324 }, { "epoch": 0.7905474328459707, "grad_norm": 2.814674945307446, "learning_rate": 1.107577866220092e-06, "loss": 0.7463, "step": 2325 }, { "epoch": 0.7908874532471948, "grad_norm": 1.9527766493519834, "learning_rate": 1.104123243226603e-06, "loss": 0.7806, "step": 2326 }, { "epoch": 0.7912274736484189, "grad_norm": 2.6070147128679895, "learning_rate": 1.1006733474537095e-06, "loss": 0.802, "step": 2327 }, { "epoch": 0.791567494049643, "grad_norm": 1.9013207094703433, "learning_rate": 1.0972281830874794e-06, "loss": 0.8648, "step": 2328 }, { "epoch": 0.791907514450867, "grad_norm": 2.0738357697149494, "learning_rate": 1.0937877543082464e-06, "loss": 0.6966, "step": 2329 }, { "epoch": 0.7922475348520911, "grad_norm": 3.3351856836112437, "learning_rate": 1.090352065290593e-06, "loss": 0.7704, "step": 2330 }, { "epoch": 0.7925875552533151, "grad_norm": 2.4139686701986545, "learning_rate": 1.086921120203353e-06, "loss": 0.8781, "step": 2331 }, { "epoch": 0.7929275756545393, "grad_norm": 2.0367586641793785, "learning_rate": 1.0834949232096008e-06, "loss": 0.7859, "step": 2332 }, { "epoch": 0.7932675960557634, "grad_norm": 1.731703445699507, "learning_rate": 1.0800734784666556e-06, "loss": 0.654, "step": 2333 }, { "epoch": 0.7936076164569874, "grad_norm": 1.9761986748758829, "learning_rate": 1.076656790126065e-06, "loss": 0.8221, "step": 2334 }, { "epoch": 0.7939476368582115, "grad_norm": 2.089346557643535, "learning_rate": 1.0732448623336057e-06, "loss": 0.7591, "step": 2335 }, { "epoch": 0.7942876572594356, "grad_norm": 2.0490768682382074, "learning_rate": 1.0698376992292808e-06, "loss": 0.8476, "step": 2336 }, { "epoch": 0.7946276776606597, "grad_norm": 1.7404822303267078, "learning_rate": 1.0664353049473085e-06, "loss": 0.8059, "step": 2337 }, { "epoch": 0.7949676980618837, "grad_norm": 1.9516697066879696, "learning_rate": 1.0630376836161248e-06, "loss": 0.7247, "step": 2338 }, { "epoch": 0.7953077184631078, "grad_norm": 1.7179222033684476, "learning_rate": 1.0596448393583709e-06, "loss": 0.7071, "step": 2339 }, { "epoch": 0.7956477388643318, "grad_norm": 2.089155897647102, "learning_rate": 1.0562567762908915e-06, "loss": 0.7622, "step": 2340 }, { "epoch": 0.795987759265556, "grad_norm": 2.0146843017178906, "learning_rate": 1.052873498524732e-06, "loss": 0.7588, "step": 2341 }, { "epoch": 0.79632777966678, "grad_norm": 1.8914540998106915, "learning_rate": 1.0494950101651274e-06, "loss": 0.7959, "step": 2342 }, { "epoch": 0.7966678000680041, "grad_norm": 2.392413934717188, "learning_rate": 1.046121315311508e-06, "loss": 0.8566, "step": 2343 }, { "epoch": 0.7970078204692281, "grad_norm": 1.763253874255017, "learning_rate": 1.04275241805748e-06, "loss": 0.7474, "step": 2344 }, { "epoch": 0.7973478408704522, "grad_norm": 1.558308712843251, "learning_rate": 1.0393883224908358e-06, "loss": 0.6914, "step": 2345 }, { "epoch": 0.7976878612716763, "grad_norm": 1.7605529498524175, "learning_rate": 1.036029032693534e-06, "loss": 0.837, "step": 2346 }, { "epoch": 0.7980278816729004, "grad_norm": 1.78998260592103, "learning_rate": 1.0326745527417098e-06, "loss": 0.8327, "step": 2347 }, { "epoch": 0.7983679020741244, "grad_norm": 1.7542085033348727, "learning_rate": 1.0293248867056527e-06, "loss": 0.8285, "step": 2348 }, { "epoch": 0.7987079224753485, "grad_norm": 1.9678558489079983, "learning_rate": 1.0259800386498204e-06, "loss": 0.8867, "step": 2349 }, { "epoch": 0.7990479428765725, "grad_norm": 2.214701849578641, "learning_rate": 1.022640012632819e-06, "loss": 0.9296, "step": 2350 }, { "epoch": 0.7993879632777967, "grad_norm": 1.9468124426612539, "learning_rate": 1.0193048127074034e-06, "loss": 0.8632, "step": 2351 }, { "epoch": 0.7997279836790208, "grad_norm": 1.5760592103068731, "learning_rate": 1.0159744429204776e-06, "loss": 0.8049, "step": 2352 }, { "epoch": 0.8000680040802448, "grad_norm": 1.950530829583191, "learning_rate": 1.0126489073130779e-06, "loss": 0.6512, "step": 2353 }, { "epoch": 0.8004080244814689, "grad_norm": 1.7143537928592565, "learning_rate": 1.0093282099203805e-06, "loss": 0.7408, "step": 2354 }, { "epoch": 0.8007480448826929, "grad_norm": 3.702934075061963, "learning_rate": 1.0060123547716888e-06, "loss": 0.7784, "step": 2355 }, { "epoch": 0.8010880652839171, "grad_norm": 1.599124103938154, "learning_rate": 1.0027013458904288e-06, "loss": 0.8521, "step": 2356 }, { "epoch": 0.8014280856851411, "grad_norm": 1.713620640939179, "learning_rate": 9.993951872941493e-07, "loss": 0.8589, "step": 2357 }, { "epoch": 0.8017681060863652, "grad_norm": 1.9033657273086837, "learning_rate": 9.960938829945104e-07, "loss": 0.7361, "step": 2358 }, { "epoch": 0.8021081264875892, "grad_norm": 1.8996990852991276, "learning_rate": 9.927974369972871e-07, "loss": 0.7452, "step": 2359 }, { "epoch": 0.8024481468888133, "grad_norm": 2.0379615163395193, "learning_rate": 9.895058533023532e-07, "loss": 0.5995, "step": 2360 }, { "epoch": 0.8027881672900374, "grad_norm": 1.7890665810043467, "learning_rate": 9.862191359036883e-07, "loss": 0.9003, "step": 2361 }, { "epoch": 0.8031281876912615, "grad_norm": 2.6456228617289748, "learning_rate": 9.829372887893624e-07, "loss": 0.7455, "step": 2362 }, { "epoch": 0.8034682080924855, "grad_norm": 1.6805195436291074, "learning_rate": 9.796603159415407e-07, "loss": 0.7163, "step": 2363 }, { "epoch": 0.8038082284937096, "grad_norm": 2.042414294670765, "learning_rate": 9.763882213364705e-07, "loss": 0.6174, "step": 2364 }, { "epoch": 0.8041482488949337, "grad_norm": 1.8519040933711384, "learning_rate": 9.731210089444803e-07, "loss": 0.7669, "step": 2365 }, { "epoch": 0.8044882692961578, "grad_norm": 2.2531030397503464, "learning_rate": 9.69858682729976e-07, "loss": 0.8395, "step": 2366 }, { "epoch": 0.8048282896973818, "grad_norm": 2.0748617126240516, "learning_rate": 9.66601246651432e-07, "loss": 0.7717, "step": 2367 }, { "epoch": 0.8051683100986059, "grad_norm": 3.841576998570947, "learning_rate": 9.633487046613932e-07, "loss": 0.8345, "step": 2368 }, { "epoch": 0.80550833049983, "grad_norm": 1.7489227453665286, "learning_rate": 9.60101060706462e-07, "loss": 0.8575, "step": 2369 }, { "epoch": 0.8058483509010541, "grad_norm": 1.7114506077871587, "learning_rate": 9.568583187273018e-07, "loss": 0.8861, "step": 2370 }, { "epoch": 0.8061883713022782, "grad_norm": 2.629625241075618, "learning_rate": 9.536204826586243e-07, "loss": 0.707, "step": 2371 }, { "epoch": 0.8065283917035022, "grad_norm": 2.451325517306834, "learning_rate": 9.503875564291886e-07, "loss": 0.7568, "step": 2372 }, { "epoch": 0.8068684121047263, "grad_norm": 1.865454750034287, "learning_rate": 9.471595439617986e-07, "loss": 0.8517, "step": 2373 }, { "epoch": 0.8072084325059503, "grad_norm": 2.0078901844899053, "learning_rate": 9.439364491732927e-07, "loss": 0.7792, "step": 2374 }, { "epoch": 0.8075484529071745, "grad_norm": 2.1884086746809506, "learning_rate": 9.407182759745464e-07, "loss": 0.7711, "step": 2375 }, { "epoch": 0.8078884733083985, "grad_norm": 2.359962377985305, "learning_rate": 9.375050282704596e-07, "loss": 0.7623, "step": 2376 }, { "epoch": 0.8082284937096226, "grad_norm": 1.965811233572265, "learning_rate": 9.342967099599587e-07, "loss": 0.7636, "step": 2377 }, { "epoch": 0.8085685141108466, "grad_norm": 2.1355877396954557, "learning_rate": 9.31093324935985e-07, "loss": 0.8835, "step": 2378 }, { "epoch": 0.8089085345120707, "grad_norm": 1.725291600691821, "learning_rate": 9.278948770854984e-07, "loss": 0.8575, "step": 2379 }, { "epoch": 0.8092485549132948, "grad_norm": 2.1970920436039725, "learning_rate": 9.247013702894653e-07, "loss": 0.7891, "step": 2380 }, { "epoch": 0.8095885753145189, "grad_norm": 2.1911323854498304, "learning_rate": 9.215128084228564e-07, "loss": 0.7819, "step": 2381 }, { "epoch": 0.8099285957157429, "grad_norm": 1.9062475395377723, "learning_rate": 9.183291953546425e-07, "loss": 0.7573, "step": 2382 }, { "epoch": 0.810268616116967, "grad_norm": 1.5938084300323458, "learning_rate": 9.151505349477901e-07, "loss": 0.744, "step": 2383 }, { "epoch": 0.810608636518191, "grad_norm": 2.4169596699907103, "learning_rate": 9.11976831059258e-07, "loss": 0.7026, "step": 2384 }, { "epoch": 0.8109486569194152, "grad_norm": 2.6219709533237903, "learning_rate": 9.088080875399862e-07, "loss": 0.6643, "step": 2385 }, { "epoch": 0.8112886773206393, "grad_norm": 2.01219370432533, "learning_rate": 9.056443082349015e-07, "loss": 0.7425, "step": 2386 }, { "epoch": 0.8116286977218633, "grad_norm": 2.066167061827305, "learning_rate": 9.024854969829016e-07, "loss": 0.6546, "step": 2387 }, { "epoch": 0.8119687181230874, "grad_norm": 1.4300615570476107, "learning_rate": 8.993316576168626e-07, "loss": 0.7899, "step": 2388 }, { "epoch": 0.8123087385243115, "grad_norm": 1.7707829875888732, "learning_rate": 8.961827939636198e-07, "loss": 0.8382, "step": 2389 }, { "epoch": 0.8126487589255356, "grad_norm": 2.1704787690301597, "learning_rate": 8.930389098439751e-07, "loss": 0.7779, "step": 2390 }, { "epoch": 0.8129887793267596, "grad_norm": 1.6521448642583239, "learning_rate": 8.899000090726905e-07, "loss": 0.788, "step": 2391 }, { "epoch": 0.8133287997279837, "grad_norm": 1.9493462394819676, "learning_rate": 8.867660954584773e-07, "loss": 0.8392, "step": 2392 }, { "epoch": 0.8136688201292077, "grad_norm": 1.8331269676018516, "learning_rate": 8.836371728039989e-07, "loss": 0.78, "step": 2393 }, { "epoch": 0.8140088405304319, "grad_norm": 2.026606682272262, "learning_rate": 8.80513244905859e-07, "loss": 0.8935, "step": 2394 }, { "epoch": 0.8143488609316559, "grad_norm": 1.7620149620203838, "learning_rate": 8.773943155546044e-07, "loss": 0.6249, "step": 2395 }, { "epoch": 0.81468888133288, "grad_norm": 2.7206868334859506, "learning_rate": 8.74280388534714e-07, "loss": 0.7804, "step": 2396 }, { "epoch": 0.815028901734104, "grad_norm": 1.7308067133219402, "learning_rate": 8.711714676245975e-07, "loss": 0.7325, "step": 2397 }, { "epoch": 0.8153689221353281, "grad_norm": 2.51943783841568, "learning_rate": 8.680675565965918e-07, "loss": 0.752, "step": 2398 }, { "epoch": 0.8157089425365522, "grad_norm": 1.549980412078022, "learning_rate": 8.64968659216951e-07, "loss": 0.9601, "step": 2399 }, { "epoch": 0.8160489629377763, "grad_norm": 2.0254215121489194, "learning_rate": 8.618747792458515e-07, "loss": 0.8119, "step": 2400 }, { "epoch": 0.8163889833390003, "grad_norm": 1.9202778211704874, "learning_rate": 8.58785920437376e-07, "loss": 0.7911, "step": 2401 }, { "epoch": 0.8167290037402244, "grad_norm": 1.6548864238372174, "learning_rate": 8.557020865395194e-07, "loss": 0.7711, "step": 2402 }, { "epoch": 0.8170690241414484, "grad_norm": 1.5676328354601332, "learning_rate": 8.526232812941748e-07, "loss": 0.6984, "step": 2403 }, { "epoch": 0.8174090445426726, "grad_norm": 1.7193145665504181, "learning_rate": 8.49549508437138e-07, "loss": 0.8111, "step": 2404 }, { "epoch": 0.8177490649438967, "grad_norm": 1.68834796706292, "learning_rate": 8.464807716980961e-07, "loss": 0.7438, "step": 2405 }, { "epoch": 0.8180890853451207, "grad_norm": 2.349332554637134, "learning_rate": 8.434170748006226e-07, "loss": 0.8144, "step": 2406 }, { "epoch": 0.8184291057463448, "grad_norm": 7.841596102340736, "learning_rate": 8.403584214621823e-07, "loss": 0.7929, "step": 2407 }, { "epoch": 0.8187691261475688, "grad_norm": 2.141322231062195, "learning_rate": 8.373048153941144e-07, "loss": 0.8196, "step": 2408 }, { "epoch": 0.819109146548793, "grad_norm": 2.0397457850733884, "learning_rate": 8.34256260301638e-07, "loss": 0.7033, "step": 2409 }, { "epoch": 0.819449166950017, "grad_norm": 2.205136821953891, "learning_rate": 8.312127598838387e-07, "loss": 0.7234, "step": 2410 }, { "epoch": 0.8197891873512411, "grad_norm": 4.337708830724585, "learning_rate": 8.281743178336754e-07, "loss": 0.7171, "step": 2411 }, { "epoch": 0.8201292077524651, "grad_norm": 1.6721477091051125, "learning_rate": 8.251409378379638e-07, "loss": 0.8007, "step": 2412 }, { "epoch": 0.8204692281536892, "grad_norm": 3.8418421223750685, "learning_rate": 8.22112623577378e-07, "loss": 0.7635, "step": 2413 }, { "epoch": 0.8208092485549133, "grad_norm": 1.6182376861006265, "learning_rate": 8.19089378726447e-07, "loss": 0.7876, "step": 2414 }, { "epoch": 0.8211492689561374, "grad_norm": 2.2512602655558376, "learning_rate": 8.160712069535464e-07, "loss": 0.7364, "step": 2415 }, { "epoch": 0.8214892893573614, "grad_norm": 1.5613182342547798, "learning_rate": 8.130581119209008e-07, "loss": 0.7997, "step": 2416 }, { "epoch": 0.8218293097585855, "grad_norm": 2.123606402304467, "learning_rate": 8.100500972845688e-07, "loss": 0.7256, "step": 2417 }, { "epoch": 0.8221693301598096, "grad_norm": 2.5565946650892846, "learning_rate": 8.070471666944496e-07, "loss": 0.7453, "step": 2418 }, { "epoch": 0.8225093505610337, "grad_norm": 2.2217115142287667, "learning_rate": 8.040493237942698e-07, "loss": 0.8128, "step": 2419 }, { "epoch": 0.8228493709622577, "grad_norm": 1.8879938400884209, "learning_rate": 8.010565722215851e-07, "loss": 0.7291, "step": 2420 }, { "epoch": 0.8231893913634818, "grad_norm": 1.8601116173360819, "learning_rate": 7.98068915607772e-07, "loss": 0.801, "step": 2421 }, { "epoch": 0.8235294117647058, "grad_norm": 1.7254034450571911, "learning_rate": 7.950863575780249e-07, "loss": 0.7592, "step": 2422 }, { "epoch": 0.82386943216593, "grad_norm": 2.6122625161288586, "learning_rate": 7.921089017513522e-07, "loss": 0.8019, "step": 2423 }, { "epoch": 0.824209452567154, "grad_norm": 1.6973528915543705, "learning_rate": 7.891365517405702e-07, "loss": 0.8974, "step": 2424 }, { "epoch": 0.8245494729683781, "grad_norm": 1.7759681048749987, "learning_rate": 7.861693111523022e-07, "loss": 0.7917, "step": 2425 }, { "epoch": 0.8248894933696022, "grad_norm": 1.7705878213685702, "learning_rate": 7.832071835869687e-07, "loss": 0.8071, "step": 2426 }, { "epoch": 0.8252295137708262, "grad_norm": 2.156963508346104, "learning_rate": 7.802501726387901e-07, "loss": 0.7664, "step": 2427 }, { "epoch": 0.8255695341720504, "grad_norm": 1.7276432522564236, "learning_rate": 7.772982818957742e-07, "loss": 0.7373, "step": 2428 }, { "epoch": 0.8259095545732744, "grad_norm": 5.112931675706146, "learning_rate": 7.743515149397185e-07, "loss": 0.777, "step": 2429 }, { "epoch": 0.8262495749744985, "grad_norm": 1.859215069146444, "learning_rate": 7.714098753462018e-07, "loss": 0.7991, "step": 2430 }, { "epoch": 0.8265895953757225, "grad_norm": 2.0011677462274737, "learning_rate": 7.684733666845812e-07, "loss": 0.7925, "step": 2431 }, { "epoch": 0.8269296157769466, "grad_norm": 2.7022442512396987, "learning_rate": 7.655419925179919e-07, "loss": 0.6235, "step": 2432 }, { "epoch": 0.8272696361781707, "grad_norm": 2.0241160886895293, "learning_rate": 7.626157564033332e-07, "loss": 0.6865, "step": 2433 }, { "epoch": 0.8276096565793948, "grad_norm": 1.906792937890856, "learning_rate": 7.596946618912754e-07, "loss": 0.8559, "step": 2434 }, { "epoch": 0.8279496769806188, "grad_norm": 1.8797923348430752, "learning_rate": 7.567787125262449e-07, "loss": 0.6898, "step": 2435 }, { "epoch": 0.8282896973818429, "grad_norm": 1.8757719817795944, "learning_rate": 7.538679118464298e-07, "loss": 0.7356, "step": 2436 }, { "epoch": 0.8286297177830669, "grad_norm": 2.699911415427047, "learning_rate": 7.509622633837671e-07, "loss": 0.6198, "step": 2437 }, { "epoch": 0.8289697381842911, "grad_norm": 1.8535353558494398, "learning_rate": 7.480617706639442e-07, "loss": 0.6603, "step": 2438 }, { "epoch": 0.8293097585855151, "grad_norm": 2.0077754858561025, "learning_rate": 7.451664372063916e-07, "loss": 0.8192, "step": 2439 }, { "epoch": 0.8296497789867392, "grad_norm": 2.3842511987435064, "learning_rate": 7.422762665242788e-07, "loss": 0.8319, "step": 2440 }, { "epoch": 0.8299897993879632, "grad_norm": 2.3986998450988835, "learning_rate": 7.393912621245142e-07, "loss": 0.798, "step": 2441 }, { "epoch": 0.8303298197891874, "grad_norm": 1.761347248138979, "learning_rate": 7.365114275077334e-07, "loss": 0.7448, "step": 2442 }, { "epoch": 0.8306698401904115, "grad_norm": 1.9120511252582268, "learning_rate": 7.33636766168303e-07, "loss": 0.8883, "step": 2443 }, { "epoch": 0.8310098605916355, "grad_norm": 1.8267075244116617, "learning_rate": 7.307672815943084e-07, "loss": 0.7732, "step": 2444 }, { "epoch": 0.8313498809928596, "grad_norm": 1.9441639104439716, "learning_rate": 7.279029772675572e-07, "loss": 0.8854, "step": 2445 }, { "epoch": 0.8316899013940836, "grad_norm": 2.1103415209340604, "learning_rate": 7.250438566635692e-07, "loss": 0.8216, "step": 2446 }, { "epoch": 0.8320299217953078, "grad_norm": 1.8163467229280887, "learning_rate": 7.221899232515727e-07, "loss": 0.863, "step": 2447 }, { "epoch": 0.8323699421965318, "grad_norm": 2.1150765935601474, "learning_rate": 7.193411804945061e-07, "loss": 0.6834, "step": 2448 }, { "epoch": 0.8327099625977559, "grad_norm": 1.9172782529773438, "learning_rate": 7.164976318490058e-07, "loss": 0.8915, "step": 2449 }, { "epoch": 0.8330499829989799, "grad_norm": 2.302226063344335, "learning_rate": 7.136592807654085e-07, "loss": 0.7917, "step": 2450 }, { "epoch": 0.833390003400204, "grad_norm": 3.323395060515028, "learning_rate": 7.108261306877423e-07, "loss": 0.7571, "step": 2451 }, { "epoch": 0.8337300238014281, "grad_norm": 2.044271938418409, "learning_rate": 7.079981850537266e-07, "loss": 0.8017, "step": 2452 }, { "epoch": 0.8340700442026522, "grad_norm": 2.1409274203005193, "learning_rate": 7.051754472947625e-07, "loss": 0.7459, "step": 2453 }, { "epoch": 0.8344100646038762, "grad_norm": 2.95578414657139, "learning_rate": 7.023579208359349e-07, "loss": 0.8399, "step": 2454 }, { "epoch": 0.8347500850051003, "grad_norm": 1.7525330525619445, "learning_rate": 6.995456090960034e-07, "loss": 0.8179, "step": 2455 }, { "epoch": 0.8350901054063243, "grad_norm": 3.5586869346230676, "learning_rate": 6.967385154874001e-07, "loss": 0.9779, "step": 2456 }, { "epoch": 0.8354301258075485, "grad_norm": 1.7571946774112055, "learning_rate": 6.939366434162287e-07, "loss": 0.8006, "step": 2457 }, { "epoch": 0.8357701462087725, "grad_norm": 2.4033098083942446, "learning_rate": 6.911399962822518e-07, "loss": 0.7554, "step": 2458 }, { "epoch": 0.8361101666099966, "grad_norm": 1.869358589109052, "learning_rate": 6.883485774788973e-07, "loss": 0.7259, "step": 2459 }, { "epoch": 0.8364501870112206, "grad_norm": 1.91414508963133, "learning_rate": 6.855623903932457e-07, "loss": 0.6757, "step": 2460 }, { "epoch": 0.8367902074124447, "grad_norm": 1.7629021010412422, "learning_rate": 6.82781438406031e-07, "loss": 0.6845, "step": 2461 }, { "epoch": 0.8371302278136689, "grad_norm": 1.733843016954561, "learning_rate": 6.800057248916347e-07, "loss": 0.7731, "step": 2462 }, { "epoch": 0.8374702482148929, "grad_norm": 1.6233499660099797, "learning_rate": 6.772352532180815e-07, "loss": 0.7542, "step": 2463 }, { "epoch": 0.837810268616117, "grad_norm": 1.9854817972998593, "learning_rate": 6.74470026747035e-07, "loss": 0.7532, "step": 2464 }, { "epoch": 0.838150289017341, "grad_norm": 2.0817806024786454, "learning_rate": 6.717100488337952e-07, "loss": 0.7815, "step": 2465 }, { "epoch": 0.8384903094185651, "grad_norm": 2.966599408068768, "learning_rate": 6.689553228272955e-07, "loss": 0.7962, "step": 2466 }, { "epoch": 0.8388303298197892, "grad_norm": 2.0775168349055577, "learning_rate": 6.662058520700926e-07, "loss": 0.7808, "step": 2467 }, { "epoch": 0.8391703502210133, "grad_norm": 2.4039263789618595, "learning_rate": 6.634616398983712e-07, "loss": 0.8221, "step": 2468 }, { "epoch": 0.8395103706222373, "grad_norm": 1.622008521580856, "learning_rate": 6.607226896419305e-07, "loss": 0.7502, "step": 2469 }, { "epoch": 0.8398503910234614, "grad_norm": 2.1962585919549564, "learning_rate": 6.579890046241888e-07, "loss": 0.7449, "step": 2470 }, { "epoch": 0.8401904114246855, "grad_norm": 1.819758576227155, "learning_rate": 6.552605881621732e-07, "loss": 0.7057, "step": 2471 }, { "epoch": 0.8405304318259096, "grad_norm": 1.9641795678417464, "learning_rate": 6.525374435665183e-07, "loss": 0.73, "step": 2472 }, { "epoch": 0.8408704522271336, "grad_norm": 1.6323001552109164, "learning_rate": 6.498195741414637e-07, "loss": 0.7322, "step": 2473 }, { "epoch": 0.8412104726283577, "grad_norm": 1.8961797022845666, "learning_rate": 6.471069831848453e-07, "loss": 0.721, "step": 2474 }, { "epoch": 0.8415504930295817, "grad_norm": 1.811172379243824, "learning_rate": 6.443996739880981e-07, "loss": 0.7265, "step": 2475 }, { "epoch": 0.8418905134308059, "grad_norm": 1.7705443438552309, "learning_rate": 6.416976498362432e-07, "loss": 0.641, "step": 2476 }, { "epoch": 0.84223053383203, "grad_norm": 2.5591611017829186, "learning_rate": 6.39000914007894e-07, "loss": 0.8185, "step": 2477 }, { "epoch": 0.842570554233254, "grad_norm": 1.500821178484566, "learning_rate": 6.363094697752436e-07, "loss": 0.8445, "step": 2478 }, { "epoch": 0.842910574634478, "grad_norm": 1.8045464912453517, "learning_rate": 6.336233204040654e-07, "loss": 0.8186, "step": 2479 }, { "epoch": 0.8432505950357021, "grad_norm": 2.1257740354966024, "learning_rate": 6.309424691537075e-07, "loss": 0.7636, "step": 2480 }, { "epoch": 0.8435906154369263, "grad_norm": 2.1805061395433025, "learning_rate": 6.282669192770896e-07, "loss": 0.7369, "step": 2481 }, { "epoch": 0.8439306358381503, "grad_norm": 2.794846227145433, "learning_rate": 6.255966740207003e-07, "loss": 0.7512, "step": 2482 }, { "epoch": 0.8442706562393744, "grad_norm": 2.139417416344505, "learning_rate": 6.229317366245891e-07, "loss": 0.858, "step": 2483 }, { "epoch": 0.8446106766405984, "grad_norm": 1.6794324817896975, "learning_rate": 6.20272110322368e-07, "loss": 0.7415, "step": 2484 }, { "epoch": 0.8449506970418225, "grad_norm": 2.557876957181143, "learning_rate": 6.176177983412013e-07, "loss": 0.7493, "step": 2485 }, { "epoch": 0.8452907174430466, "grad_norm": 1.6147587726820207, "learning_rate": 6.14968803901807e-07, "loss": 0.7066, "step": 2486 }, { "epoch": 0.8456307378442707, "grad_norm": 6.264968025708578, "learning_rate": 6.123251302184502e-07, "loss": 0.7846, "step": 2487 }, { "epoch": 0.8459707582454947, "grad_norm": 2.0720939669815297, "learning_rate": 6.096867804989387e-07, "loss": 0.8005, "step": 2488 }, { "epoch": 0.8463107786467188, "grad_norm": 1.7818861934834522, "learning_rate": 6.07053757944624e-07, "loss": 0.8083, "step": 2489 }, { "epoch": 0.8466507990479428, "grad_norm": 1.9663397712493058, "learning_rate": 6.044260657503881e-07, "loss": 0.7888, "step": 2490 }, { "epoch": 0.846990819449167, "grad_norm": 1.7007116920435754, "learning_rate": 6.018037071046518e-07, "loss": 0.727, "step": 2491 }, { "epoch": 0.847330839850391, "grad_norm": 2.01433634361555, "learning_rate": 5.991866851893569e-07, "loss": 0.7841, "step": 2492 }, { "epoch": 0.8476708602516151, "grad_norm": 2.0056879931109104, "learning_rate": 5.965750031799772e-07, "loss": 0.7634, "step": 2493 }, { "epoch": 0.8480108806528391, "grad_norm": 1.7817072170081534, "learning_rate": 5.939686642455012e-07, "loss": 0.7755, "step": 2494 }, { "epoch": 0.8483509010540632, "grad_norm": 1.771192620612961, "learning_rate": 5.913676715484363e-07, "loss": 0.8514, "step": 2495 }, { "epoch": 0.8486909214552874, "grad_norm": 1.9377408759282009, "learning_rate": 5.887720282448034e-07, "loss": 0.7875, "step": 2496 }, { "epoch": 0.8490309418565114, "grad_norm": 1.875586498487033, "learning_rate": 5.861817374841311e-07, "loss": 0.7402, "step": 2497 }, { "epoch": 0.8493709622577355, "grad_norm": 1.896813734307177, "learning_rate": 5.835968024094551e-07, "loss": 0.7494, "step": 2498 }, { "epoch": 0.8497109826589595, "grad_norm": 1.746600133481283, "learning_rate": 5.810172261573099e-07, "loss": 0.7486, "step": 2499 }, { "epoch": 0.8500510030601837, "grad_norm": 1.701793210043788, "learning_rate": 5.784430118577322e-07, "loss": 0.7742, "step": 2500 }, { "epoch": 0.8503910234614077, "grad_norm": 1.8039181908755018, "learning_rate": 5.758741626342479e-07, "loss": 0.8416, "step": 2501 }, { "epoch": 0.8507310438626318, "grad_norm": 1.7572654084027113, "learning_rate": 5.733106816038736e-07, "loss": 0.6848, "step": 2502 }, { "epoch": 0.8510710642638558, "grad_norm": 2.018493313007424, "learning_rate": 5.707525718771151e-07, "loss": 0.8917, "step": 2503 }, { "epoch": 0.8514110846650799, "grad_norm": 2.0723674784682644, "learning_rate": 5.681998365579594e-07, "loss": 0.8585, "step": 2504 }, { "epoch": 0.851751105066304, "grad_norm": 1.8580629745251314, "learning_rate": 5.6565247874387e-07, "loss": 0.7809, "step": 2505 }, { "epoch": 0.8520911254675281, "grad_norm": 1.8712383894476246, "learning_rate": 5.631105015257871e-07, "loss": 0.7901, "step": 2506 }, { "epoch": 0.8524311458687521, "grad_norm": 1.574135923996909, "learning_rate": 5.60573907988124e-07, "loss": 0.7791, "step": 2507 }, { "epoch": 0.8527711662699762, "grad_norm": 2.195303216051158, "learning_rate": 5.58042701208758e-07, "loss": 0.6425, "step": 2508 }, { "epoch": 0.8531111866712002, "grad_norm": 2.0222539828068418, "learning_rate": 5.55516884259033e-07, "loss": 0.8305, "step": 2509 }, { "epoch": 0.8534512070724244, "grad_norm": 2.0135511435332427, "learning_rate": 5.529964602037519e-07, "loss": 0.7716, "step": 2510 }, { "epoch": 0.8537912274736484, "grad_norm": 1.7236389648693269, "learning_rate": 5.504814321011732e-07, "loss": 0.6894, "step": 2511 }, { "epoch": 0.8541312478748725, "grad_norm": 1.8873379975752618, "learning_rate": 5.479718030030084e-07, "loss": 0.7636, "step": 2512 }, { "epoch": 0.8544712682760965, "grad_norm": 1.7426057926099683, "learning_rate": 5.454675759544176e-07, "loss": 0.8053, "step": 2513 }, { "epoch": 0.8548112886773206, "grad_norm": 1.8753643537027582, "learning_rate": 5.429687539940076e-07, "loss": 0.723, "step": 2514 }, { "epoch": 0.8551513090785448, "grad_norm": 1.7277156698902645, "learning_rate": 5.404753401538249e-07, "loss": 0.7989, "step": 2515 }, { "epoch": 0.8554913294797688, "grad_norm": 2.2677017882113675, "learning_rate": 5.379873374593563e-07, "loss": 0.7536, "step": 2516 }, { "epoch": 0.8558313498809929, "grad_norm": 1.622977811890628, "learning_rate": 5.355047489295195e-07, "loss": 0.7579, "step": 2517 }, { "epoch": 0.8561713702822169, "grad_norm": 1.8045814672152072, "learning_rate": 5.330275775766642e-07, "loss": 0.7795, "step": 2518 }, { "epoch": 0.856511390683441, "grad_norm": 1.8483802620891205, "learning_rate": 5.30555826406568e-07, "loss": 0.8143, "step": 2519 }, { "epoch": 0.8568514110846651, "grad_norm": 2.0653176883961075, "learning_rate": 5.28089498418431e-07, "loss": 0.8275, "step": 2520 }, { "epoch": 0.8571914314858892, "grad_norm": 2.3816394206381752, "learning_rate": 5.256285966048719e-07, "loss": 0.7278, "step": 2521 }, { "epoch": 0.8575314518871132, "grad_norm": 1.6020707124660172, "learning_rate": 5.23173123951925e-07, "loss": 0.8589, "step": 2522 }, { "epoch": 0.8578714722883373, "grad_norm": 2.2904803070652684, "learning_rate": 5.207230834390403e-07, "loss": 0.7793, "step": 2523 }, { "epoch": 0.8582114926895614, "grad_norm": 2.2504916928798666, "learning_rate": 5.182784780390721e-07, "loss": 0.7643, "step": 2524 }, { "epoch": 0.8585515130907855, "grad_norm": 2.1813636901240074, "learning_rate": 5.158393107182835e-07, "loss": 0.7989, "step": 2525 }, { "epoch": 0.8588915334920095, "grad_norm": 2.17526125079606, "learning_rate": 5.134055844363367e-07, "loss": 0.7287, "step": 2526 }, { "epoch": 0.8592315538932336, "grad_norm": 2.0068173957653364, "learning_rate": 5.109773021462921e-07, "loss": 0.8449, "step": 2527 }, { "epoch": 0.8595715742944576, "grad_norm": 1.9711345745234496, "learning_rate": 5.085544667946057e-07, "loss": 0.8109, "step": 2528 }, { "epoch": 0.8599115946956818, "grad_norm": 2.3317026622050583, "learning_rate": 5.061370813211219e-07, "loss": 0.7172, "step": 2529 }, { "epoch": 0.8602516150969058, "grad_norm": 1.757253994535814, "learning_rate": 5.037251486590755e-07, "loss": 0.7579, "step": 2530 }, { "epoch": 0.8605916354981299, "grad_norm": 2.0285732960091543, "learning_rate": 5.013186717350815e-07, "loss": 0.796, "step": 2531 }, { "epoch": 0.8609316558993539, "grad_norm": 1.659656431274867, "learning_rate": 4.989176534691381e-07, "loss": 0.7392, "step": 2532 }, { "epoch": 0.861271676300578, "grad_norm": 2.0089159264540553, "learning_rate": 4.965220967746181e-07, "loss": 0.7919, "step": 2533 }, { "epoch": 0.8616116967018022, "grad_norm": 2.473425505190949, "learning_rate": 4.94132004558266e-07, "loss": 0.7572, "step": 2534 }, { "epoch": 0.8619517171030262, "grad_norm": 2.151920557303182, "learning_rate": 4.917473797202005e-07, "loss": 0.7254, "step": 2535 }, { "epoch": 0.8622917375042503, "grad_norm": 2.232388197647897, "learning_rate": 4.893682251539012e-07, "loss": 0.6701, "step": 2536 }, { "epoch": 0.8626317579054743, "grad_norm": 1.8999196855734106, "learning_rate": 4.869945437462126e-07, "loss": 0.7422, "step": 2537 }, { "epoch": 0.8629717783066984, "grad_norm": 1.8645892417506236, "learning_rate": 4.846263383773364e-07, "loss": 0.7827, "step": 2538 }, { "epoch": 0.8633117987079225, "grad_norm": 1.9633226448791292, "learning_rate": 4.822636119208335e-07, "loss": 0.8252, "step": 2539 }, { "epoch": 0.8636518191091466, "grad_norm": 2.412498853567893, "learning_rate": 4.799063672436111e-07, "loss": 0.6881, "step": 2540 }, { "epoch": 0.8639918395103706, "grad_norm": 2.0646181111873116, "learning_rate": 4.775546072059311e-07, "loss": 0.7813, "step": 2541 }, { "epoch": 0.8643318599115947, "grad_norm": 1.811749527105386, "learning_rate": 4.752083346613956e-07, "loss": 0.804, "step": 2542 }, { "epoch": 0.8646718803128187, "grad_norm": 2.0205742904901003, "learning_rate": 4.728675524569487e-07, "loss": 0.7478, "step": 2543 }, { "epoch": 0.8650119007140429, "grad_norm": 1.6948710939652791, "learning_rate": 4.7053226343287626e-07, "loss": 0.7354, "step": 2544 }, { "epoch": 0.8653519211152669, "grad_norm": 1.8571787878044181, "learning_rate": 4.68202470422795e-07, "loss": 0.7866, "step": 2545 }, { "epoch": 0.865691941516491, "grad_norm": 4.332069466957595, "learning_rate": 4.6587817625365406e-07, "loss": 0.9335, "step": 2546 }, { "epoch": 0.866031961917715, "grad_norm": 2.1059679315712083, "learning_rate": 4.6355938374572975e-07, "loss": 0.7504, "step": 2547 }, { "epoch": 0.8663719823189391, "grad_norm": 2.411521023379306, "learning_rate": 4.612460957126247e-07, "loss": 0.7945, "step": 2548 }, { "epoch": 0.8667120027201632, "grad_norm": 3.5331071294473904, "learning_rate": 4.589383149612603e-07, "loss": 0.7663, "step": 2549 }, { "epoch": 0.8670520231213873, "grad_norm": 1.8408470267130377, "learning_rate": 4.5663604429187547e-07, "loss": 0.7752, "step": 2550 }, { "epoch": 0.8673920435226113, "grad_norm": 1.8173297059452884, "learning_rate": 4.543392864980256e-07, "loss": 0.734, "step": 2551 }, { "epoch": 0.8677320639238354, "grad_norm": 1.6548861190043966, "learning_rate": 4.5204804436657423e-07, "loss": 0.7518, "step": 2552 }, { "epoch": 0.8680720843250596, "grad_norm": 1.8814419154796098, "learning_rate": 4.4976232067769356e-07, "loss": 0.8335, "step": 2553 }, { "epoch": 0.8684121047262836, "grad_norm": 2.4824574969789017, "learning_rate": 4.474821182048583e-07, "loss": 0.7759, "step": 2554 }, { "epoch": 0.8687521251275077, "grad_norm": 1.929420733997793, "learning_rate": 4.45207439714847e-07, "loss": 0.6974, "step": 2555 }, { "epoch": 0.8690921455287317, "grad_norm": 1.810989892213861, "learning_rate": 4.4293828796773133e-07, "loss": 0.7086, "step": 2556 }, { "epoch": 0.8694321659299558, "grad_norm": 3.143595670737321, "learning_rate": 4.406746657168809e-07, "loss": 0.8016, "step": 2557 }, { "epoch": 0.8697721863311799, "grad_norm": 2.015317063610089, "learning_rate": 4.384165757089526e-07, "loss": 0.7969, "step": 2558 }, { "epoch": 0.870112206732404, "grad_norm": 2.7863828548912277, "learning_rate": 4.361640206838913e-07, "loss": 0.7793, "step": 2559 }, { "epoch": 0.870452227133628, "grad_norm": 1.9725121522194389, "learning_rate": 4.339170033749279e-07, "loss": 0.6607, "step": 2560 }, { "epoch": 0.8707922475348521, "grad_norm": 1.7743293301120258, "learning_rate": 4.316755265085715e-07, "loss": 0.7992, "step": 2561 }, { "epoch": 0.8711322679360761, "grad_norm": 4.284073115166657, "learning_rate": 4.294395928046091e-07, "loss": 0.6972, "step": 2562 }, { "epoch": 0.8714722883373003, "grad_norm": 1.7376821947334347, "learning_rate": 4.272092049761012e-07, "loss": 0.7081, "step": 2563 }, { "epoch": 0.8718123087385243, "grad_norm": 2.4003264376544524, "learning_rate": 4.2498436572938117e-07, "loss": 0.7366, "step": 2564 }, { "epoch": 0.8721523291397484, "grad_norm": 1.7900283874803544, "learning_rate": 4.227650777640474e-07, "loss": 0.7543, "step": 2565 }, { "epoch": 0.8724923495409724, "grad_norm": 2.121565874741983, "learning_rate": 4.2055134377296245e-07, "loss": 0.8625, "step": 2566 }, { "epoch": 0.8728323699421965, "grad_norm": 2.065896891684966, "learning_rate": 4.183431664422527e-07, "loss": 0.8362, "step": 2567 }, { "epoch": 0.8731723903434206, "grad_norm": 3.5516283553936154, "learning_rate": 4.1614054845129814e-07, "loss": 0.7923, "step": 2568 }, { "epoch": 0.8735124107446447, "grad_norm": 1.7586409614005896, "learning_rate": 4.139434924727359e-07, "loss": 0.8087, "step": 2569 }, { "epoch": 0.8738524311458687, "grad_norm": 1.763698578164718, "learning_rate": 4.1175200117245127e-07, "loss": 0.7511, "step": 2570 }, { "epoch": 0.8741924515470928, "grad_norm": 1.9405876321607063, "learning_rate": 4.095660772095822e-07, "loss": 0.7895, "step": 2571 }, { "epoch": 0.8745324719483168, "grad_norm": 2.2201530125382662, "learning_rate": 4.0738572323650636e-07, "loss": 0.7936, "step": 2572 }, { "epoch": 0.874872492349541, "grad_norm": 2.698353558807428, "learning_rate": 4.05210941898847e-07, "loss": 0.7766, "step": 2573 }, { "epoch": 0.8752125127507651, "grad_norm": 1.8695910167521532, "learning_rate": 4.0304173583546214e-07, "loss": 0.7827, "step": 2574 }, { "epoch": 0.8755525331519891, "grad_norm": 1.787702184500189, "learning_rate": 4.008781076784457e-07, "loss": 0.8141, "step": 2575 }, { "epoch": 0.8758925535532132, "grad_norm": 1.9350367343497943, "learning_rate": 3.9872006005312545e-07, "loss": 0.8147, "step": 2576 }, { "epoch": 0.8762325739544373, "grad_norm": 1.9411047058406645, "learning_rate": 3.965675955780551e-07, "loss": 0.8205, "step": 2577 }, { "epoch": 0.8765725943556614, "grad_norm": 2.083484099729529, "learning_rate": 3.9442071686501605e-07, "loss": 0.7374, "step": 2578 }, { "epoch": 0.8769126147568854, "grad_norm": 1.484808045925216, "learning_rate": 3.9227942651900943e-07, "loss": 0.7934, "step": 2579 }, { "epoch": 0.8772526351581095, "grad_norm": 1.6403262654823596, "learning_rate": 3.901437271382591e-07, "loss": 0.75, "step": 2580 }, { "epoch": 0.8775926555593335, "grad_norm": 2.139199109393669, "learning_rate": 3.8801362131420105e-07, "loss": 0.7095, "step": 2581 }, { "epoch": 0.8779326759605577, "grad_norm": 2.283437669529752, "learning_rate": 3.858891116314861e-07, "loss": 0.8167, "step": 2582 }, { "epoch": 0.8782726963617817, "grad_norm": 1.777253272527263, "learning_rate": 3.8377020066797557e-07, "loss": 0.6707, "step": 2583 }, { "epoch": 0.8786127167630058, "grad_norm": 2.032396863440268, "learning_rate": 3.8165689099473436e-07, "loss": 0.7875, "step": 2584 }, { "epoch": 0.8789527371642298, "grad_norm": 2.1496667940512424, "learning_rate": 3.7954918517603636e-07, "loss": 0.7843, "step": 2585 }, { "epoch": 0.8792927575654539, "grad_norm": 2.0953430909454838, "learning_rate": 3.7744708576934795e-07, "loss": 0.7498, "step": 2586 }, { "epoch": 0.879632777966678, "grad_norm": 2.6645355484935, "learning_rate": 3.7535059532533945e-07, "loss": 0.7451, "step": 2587 }, { "epoch": 0.8799727983679021, "grad_norm": 2.7910264296663567, "learning_rate": 3.732597163878715e-07, "loss": 0.761, "step": 2588 }, { "epoch": 0.8803128187691261, "grad_norm": 2.1309852876618893, "learning_rate": 3.711744514939991e-07, "loss": 0.7839, "step": 2589 }, { "epoch": 0.8806528391703502, "grad_norm": 2.0023508759747273, "learning_rate": 3.690948031739622e-07, "loss": 0.7626, "step": 2590 }, { "epoch": 0.8809928595715742, "grad_norm": 2.1149106283441266, "learning_rate": 3.67020773951185e-07, "loss": 0.7549, "step": 2591 }, { "epoch": 0.8813328799727984, "grad_norm": 1.785186211034344, "learning_rate": 3.649523663422783e-07, "loss": 0.7699, "step": 2592 }, { "epoch": 0.8816729003740225, "grad_norm": 2.53236936356743, "learning_rate": 3.6288958285702726e-07, "loss": 0.7464, "step": 2593 }, { "epoch": 0.8820129207752465, "grad_norm": 2.2182667953687516, "learning_rate": 3.6083242599839365e-07, "loss": 0.7926, "step": 2594 }, { "epoch": 0.8823529411764706, "grad_norm": 2.0193533863123316, "learning_rate": 3.587808982625124e-07, "loss": 0.7586, "step": 2595 }, { "epoch": 0.8826929615776946, "grad_norm": 1.8994718280339355, "learning_rate": 3.567350021386895e-07, "loss": 0.7463, "step": 2596 }, { "epoch": 0.8830329819789188, "grad_norm": 1.9243015384876234, "learning_rate": 3.546947401093953e-07, "loss": 0.8557, "step": 2597 }, { "epoch": 0.8833730023801428, "grad_norm": 1.8567845441632016, "learning_rate": 3.5266011465026394e-07, "loss": 0.8092, "step": 2598 }, { "epoch": 0.8837130227813669, "grad_norm": 1.946576177975856, "learning_rate": 3.506311282300934e-07, "loss": 0.7336, "step": 2599 }, { "epoch": 0.8840530431825909, "grad_norm": 1.7654478089333205, "learning_rate": 3.486077833108342e-07, "loss": 0.7989, "step": 2600 }, { "epoch": 0.884393063583815, "grad_norm": 2.3364268494283444, "learning_rate": 3.4659008234759597e-07, "loss": 0.6956, "step": 2601 }, { "epoch": 0.8847330839850391, "grad_norm": 2.1363703287698383, "learning_rate": 3.4457802778863846e-07, "loss": 0.7131, "step": 2602 }, { "epoch": 0.8850731043862632, "grad_norm": 3.273937151667049, "learning_rate": 3.4257162207536887e-07, "loss": 0.821, "step": 2603 }, { "epoch": 0.8854131247874872, "grad_norm": 1.6417747965293206, "learning_rate": 3.405708676423408e-07, "loss": 0.8703, "step": 2604 }, { "epoch": 0.8857531451887113, "grad_norm": 1.7182489892417279, "learning_rate": 3.3857576691725346e-07, "loss": 0.7239, "step": 2605 }, { "epoch": 0.8860931655899354, "grad_norm": 1.7767635345343673, "learning_rate": 3.365863223209409e-07, "loss": 0.7327, "step": 2606 }, { "epoch": 0.8864331859911595, "grad_norm": 1.7793048886202218, "learning_rate": 3.3460253626737774e-07, "loss": 0.7237, "step": 2607 }, { "epoch": 0.8867732063923836, "grad_norm": 2.404768196088338, "learning_rate": 3.3262441116367174e-07, "loss": 0.7197, "step": 2608 }, { "epoch": 0.8871132267936076, "grad_norm": 2.5845357263659863, "learning_rate": 3.306519494100618e-07, "loss": 0.7361, "step": 2609 }, { "epoch": 0.8874532471948317, "grad_norm": 1.7048562949785628, "learning_rate": 3.286851533999136e-07, "loss": 0.8217, "step": 2610 }, { "epoch": 0.8877932675960558, "grad_norm": 1.8598354259713392, "learning_rate": 3.2672402551971903e-07, "loss": 0.7256, "step": 2611 }, { "epoch": 0.8881332879972799, "grad_norm": 2.0484876787203614, "learning_rate": 3.2476856814909364e-07, "loss": 0.7733, "step": 2612 }, { "epoch": 0.8884733083985039, "grad_norm": 4.050508948351152, "learning_rate": 3.2281878366077046e-07, "loss": 0.7087, "step": 2613 }, { "epoch": 0.888813328799728, "grad_norm": 1.7444011978485319, "learning_rate": 3.208746744205998e-07, "loss": 0.8651, "step": 2614 }, { "epoch": 0.889153349200952, "grad_norm": 1.7629638699985104, "learning_rate": 3.1893624278754587e-07, "loss": 0.7781, "step": 2615 }, { "epoch": 0.8894933696021762, "grad_norm": 1.8022588695624528, "learning_rate": 3.170034911136832e-07, "loss": 0.8746, "step": 2616 }, { "epoch": 0.8898333900034002, "grad_norm": 1.8390591568479497, "learning_rate": 3.150764217441954e-07, "loss": 0.6708, "step": 2617 }, { "epoch": 0.8901734104046243, "grad_norm": 1.7855141453060075, "learning_rate": 3.131550370173703e-07, "loss": 0.7825, "step": 2618 }, { "epoch": 0.8905134308058483, "grad_norm": 1.8373816915953056, "learning_rate": 3.112393392645985e-07, "loss": 0.7392, "step": 2619 }, { "epoch": 0.8908534512070724, "grad_norm": 1.999434435687263, "learning_rate": 3.093293308103679e-07, "loss": 0.89, "step": 2620 }, { "epoch": 0.8911934716082965, "grad_norm": 1.5593372875987372, "learning_rate": 3.074250139722679e-07, "loss": 0.7572, "step": 2621 }, { "epoch": 0.8915334920095206, "grad_norm": 2.1392476418230184, "learning_rate": 3.0552639106097684e-07, "loss": 0.7994, "step": 2622 }, { "epoch": 0.8918735124107446, "grad_norm": 2.4268837548387157, "learning_rate": 3.0363346438026633e-07, "loss": 0.8267, "step": 2623 }, { "epoch": 0.8922135328119687, "grad_norm": 2.521048048719284, "learning_rate": 3.0174623622699685e-07, "loss": 0.7818, "step": 2624 }, { "epoch": 0.8925535532131927, "grad_norm": 1.7313419670899555, "learning_rate": 2.998647088911127e-07, "loss": 0.7824, "step": 2625 }, { "epoch": 0.8928935736144169, "grad_norm": 3.2464092926902364, "learning_rate": 2.9798888465564226e-07, "loss": 0.7654, "step": 2626 }, { "epoch": 0.893233594015641, "grad_norm": 1.5731620759656288, "learning_rate": 2.961187657966919e-07, "loss": 0.8325, "step": 2627 }, { "epoch": 0.893573614416865, "grad_norm": 2.6025793303197804, "learning_rate": 2.942543545834475e-07, "loss": 0.7288, "step": 2628 }, { "epoch": 0.893913634818089, "grad_norm": 1.829882281307138, "learning_rate": 2.923956532781691e-07, "loss": 0.7506, "step": 2629 }, { "epoch": 0.8942536552193131, "grad_norm": 2.3041651665798337, "learning_rate": 2.9054266413618525e-07, "loss": 0.7911, "step": 2630 }, { "epoch": 0.8945936756205373, "grad_norm": 1.6349830871902187, "learning_rate": 2.88695389405898e-07, "loss": 0.7504, "step": 2631 }, { "epoch": 0.8949336960217613, "grad_norm": 3.3922771842440587, "learning_rate": 2.8685383132877163e-07, "loss": 0.787, "step": 2632 }, { "epoch": 0.8952737164229854, "grad_norm": 2.0614678915265525, "learning_rate": 2.8501799213933646e-07, "loss": 0.7534, "step": 2633 }, { "epoch": 0.8956137368242094, "grad_norm": 1.5441939097294124, "learning_rate": 2.831878740651833e-07, "loss": 0.8937, "step": 2634 }, { "epoch": 0.8959537572254336, "grad_norm": 2.0399976051493565, "learning_rate": 2.8136347932695926e-07, "loss": 0.6901, "step": 2635 }, { "epoch": 0.8962937776266576, "grad_norm": 1.9641640372424263, "learning_rate": 2.7954481013836744e-07, "loss": 0.8211, "step": 2636 }, { "epoch": 0.8966337980278817, "grad_norm": 2.7068467503326112, "learning_rate": 2.7773186870616585e-07, "loss": 0.8513, "step": 2637 }, { "epoch": 0.8969738184291057, "grad_norm": 1.999992912921896, "learning_rate": 2.759246572301599e-07, "loss": 0.7835, "step": 2638 }, { "epoch": 0.8973138388303298, "grad_norm": 1.4208853502746028, "learning_rate": 2.741231779032022e-07, "loss": 0.7349, "step": 2639 }, { "epoch": 0.8976538592315539, "grad_norm": 1.803477923520495, "learning_rate": 2.72327432911193e-07, "loss": 0.744, "step": 2640 }, { "epoch": 0.897993879632778, "grad_norm": 6.723981213587554, "learning_rate": 2.7053742443307054e-07, "loss": 0.718, "step": 2641 }, { "epoch": 0.898333900034002, "grad_norm": 2.2565609405076814, "learning_rate": 2.6875315464081566e-07, "loss": 0.7945, "step": 2642 }, { "epoch": 0.8986739204352261, "grad_norm": 2.046096256829784, "learning_rate": 2.669746256994449e-07, "loss": 0.73, "step": 2643 }, { "epoch": 0.8990139408364501, "grad_norm": 1.656402743730646, "learning_rate": 2.652018397670081e-07, "loss": 0.7564, "step": 2644 }, { "epoch": 0.8993539612376743, "grad_norm": 2.1387537466555, "learning_rate": 2.6343479899458737e-07, "loss": 0.7734, "step": 2645 }, { "epoch": 0.8996939816388984, "grad_norm": 4.0058075533193325, "learning_rate": 2.616735055262931e-07, "loss": 0.7913, "step": 2646 }, { "epoch": 0.9000340020401224, "grad_norm": 1.8790063917737332, "learning_rate": 2.5991796149926306e-07, "loss": 0.7609, "step": 2647 }, { "epoch": 0.9003740224413465, "grad_norm": 1.7616231603038723, "learning_rate": 2.5816816904365715e-07, "loss": 0.6813, "step": 2648 }, { "epoch": 0.9007140428425705, "grad_norm": 1.6567138767856624, "learning_rate": 2.5642413028265867e-07, "loss": 0.7752, "step": 2649 }, { "epoch": 0.9010540632437947, "grad_norm": 1.5576675294760438, "learning_rate": 2.546858473324676e-07, "loss": 0.6574, "step": 2650 }, { "epoch": 0.9013940836450187, "grad_norm": 1.8413136738169835, "learning_rate": 2.529533223022995e-07, "loss": 0.7272, "step": 2651 }, { "epoch": 0.9017341040462428, "grad_norm": 1.7136660665303882, "learning_rate": 2.5122655729438393e-07, "loss": 0.8226, "step": 2652 }, { "epoch": 0.9020741244474668, "grad_norm": 2.2558521762249355, "learning_rate": 2.495055544039632e-07, "loss": 0.812, "step": 2653 }, { "epoch": 0.9024141448486909, "grad_norm": 1.7798515566519915, "learning_rate": 2.477903157192846e-07, "loss": 0.741, "step": 2654 }, { "epoch": 0.902754165249915, "grad_norm": 2.098481810020344, "learning_rate": 2.4608084332160277e-07, "loss": 0.8253, "step": 2655 }, { "epoch": 0.9030941856511391, "grad_norm": 1.6779439834154712, "learning_rate": 2.443771392851768e-07, "loss": 0.7023, "step": 2656 }, { "epoch": 0.9034342060523631, "grad_norm": 1.9775008264088612, "learning_rate": 2.4267920567726364e-07, "loss": 0.7944, "step": 2657 }, { "epoch": 0.9037742264535872, "grad_norm": 2.5593382037495216, "learning_rate": 2.409870445581225e-07, "loss": 0.7293, "step": 2658 }, { "epoch": 0.9041142468548113, "grad_norm": 2.0166338772224086, "learning_rate": 2.393006579810037e-07, "loss": 0.7021, "step": 2659 }, { "epoch": 0.9044542672560354, "grad_norm": 1.6435413275557214, "learning_rate": 2.3762004799215422e-07, "loss": 0.7309, "step": 2660 }, { "epoch": 0.9047942876572594, "grad_norm": 2.4504668750280008, "learning_rate": 2.3594521663081072e-07, "loss": 0.7637, "step": 2661 }, { "epoch": 0.9051343080584835, "grad_norm": 1.6120218788123009, "learning_rate": 2.3427616592919587e-07, "loss": 0.7751, "step": 2662 }, { "epoch": 0.9054743284597075, "grad_norm": 2.086946712646895, "learning_rate": 2.3261289791252306e-07, "loss": 0.6903, "step": 2663 }, { "epoch": 0.9058143488609317, "grad_norm": 1.7397600903656776, "learning_rate": 2.3095541459898452e-07, "loss": 0.7838, "step": 2664 }, { "epoch": 0.9061543692621558, "grad_norm": 2.2776303771036366, "learning_rate": 2.2930371799975593e-07, "loss": 0.8619, "step": 2665 }, { "epoch": 0.9064943896633798, "grad_norm": 1.6304572281291108, "learning_rate": 2.2765781011899025e-07, "loss": 0.8539, "step": 2666 }, { "epoch": 0.9068344100646039, "grad_norm": 2.8418896975885586, "learning_rate": 2.260176929538166e-07, "loss": 0.9118, "step": 2667 }, { "epoch": 0.9071744304658279, "grad_norm": 1.7963190573302756, "learning_rate": 2.243833684943375e-07, "loss": 0.8397, "step": 2668 }, { "epoch": 0.9075144508670521, "grad_norm": 1.6299943334815419, "learning_rate": 2.2275483872362835e-07, "loss": 0.7385, "step": 2669 }, { "epoch": 0.9078544712682761, "grad_norm": 1.9271317601269167, "learning_rate": 2.2113210561773124e-07, "loss": 0.7455, "step": 2670 }, { "epoch": 0.9081944916695002, "grad_norm": 1.6366781921840725, "learning_rate": 2.1951517114565446e-07, "loss": 0.7428, "step": 2671 }, { "epoch": 0.9085345120707242, "grad_norm": 1.6460432599583494, "learning_rate": 2.179040372693736e-07, "loss": 0.6801, "step": 2672 }, { "epoch": 0.9088745324719483, "grad_norm": 3.0862578766801514, "learning_rate": 2.162987059438204e-07, "loss": 0.7899, "step": 2673 }, { "epoch": 0.9092145528731724, "grad_norm": 2.0209153785335814, "learning_rate": 2.1469917911689232e-07, "loss": 0.8979, "step": 2674 }, { "epoch": 0.9095545732743965, "grad_norm": 2.019796094061022, "learning_rate": 2.1310545872943788e-07, "loss": 0.7872, "step": 2675 }, { "epoch": 0.9098945936756205, "grad_norm": 2.4953968703645515, "learning_rate": 2.115175467152636e-07, "loss": 0.7581, "step": 2676 }, { "epoch": 0.9102346140768446, "grad_norm": 1.9678752909443469, "learning_rate": 2.0993544500112706e-07, "loss": 0.7204, "step": 2677 }, { "epoch": 0.9105746344780686, "grad_norm": 2.4724784821104935, "learning_rate": 2.0835915550673492e-07, "loss": 0.8005, "step": 2678 }, { "epoch": 0.9109146548792928, "grad_norm": 2.3974865270538066, "learning_rate": 2.0678868014474328e-07, "loss": 0.8121, "step": 2679 }, { "epoch": 0.9112546752805168, "grad_norm": 2.626755117379274, "learning_rate": 2.0522402082075121e-07, "loss": 0.615, "step": 2680 }, { "epoch": 0.9115946956817409, "grad_norm": 1.7814776861792805, "learning_rate": 2.0366517943330278e-07, "loss": 0.845, "step": 2681 }, { "epoch": 0.911934716082965, "grad_norm": 1.9241174576916087, "learning_rate": 2.0211215787388105e-07, "loss": 0.8233, "step": 2682 }, { "epoch": 0.912274736484189, "grad_norm": 1.6974694912568216, "learning_rate": 2.0056495802690923e-07, "loss": 0.8282, "step": 2683 }, { "epoch": 0.9126147568854132, "grad_norm": 3.7787758396910336, "learning_rate": 1.9902358176974335e-07, "loss": 0.7483, "step": 2684 }, { "epoch": 0.9129547772866372, "grad_norm": 2.0551170310858593, "learning_rate": 1.974880309726762e-07, "loss": 0.781, "step": 2685 }, { "epoch": 0.9132947976878613, "grad_norm": 1.7924440273829534, "learning_rate": 1.959583074989302e-07, "loss": 0.7122, "step": 2686 }, { "epoch": 0.9136348180890853, "grad_norm": 2.225503399226498, "learning_rate": 1.9443441320465716e-07, "loss": 0.7122, "step": 2687 }, { "epoch": 0.9139748384903095, "grad_norm": 1.8911761329043806, "learning_rate": 1.9291634993893803e-07, "loss": 0.6713, "step": 2688 }, { "epoch": 0.9143148588915335, "grad_norm": 1.8502644569783382, "learning_rate": 1.9140411954377437e-07, "loss": 0.6624, "step": 2689 }, { "epoch": 0.9146548792927576, "grad_norm": 2.653961623513032, "learning_rate": 1.8989772385409445e-07, "loss": 0.8623, "step": 2690 }, { "epoch": 0.9149948996939816, "grad_norm": 2.0652612784577316, "learning_rate": 1.883971646977434e-07, "loss": 0.7011, "step": 2691 }, { "epoch": 0.9153349200952057, "grad_norm": 2.568246822202351, "learning_rate": 1.8690244389548694e-07, "loss": 0.6886, "step": 2692 }, { "epoch": 0.9156749404964298, "grad_norm": 2.1817257448676104, "learning_rate": 1.8541356326100436e-07, "loss": 0.7835, "step": 2693 }, { "epoch": 0.9160149608976539, "grad_norm": 1.5833475097932033, "learning_rate": 1.8393052460088877e-07, "loss": 0.7628, "step": 2694 }, { "epoch": 0.9163549812988779, "grad_norm": 2.0794302266744555, "learning_rate": 1.8245332971464803e-07, "loss": 0.7234, "step": 2695 }, { "epoch": 0.916695001700102, "grad_norm": 1.891383361699711, "learning_rate": 1.8098198039469438e-07, "loss": 0.8679, "step": 2696 }, { "epoch": 0.917035022101326, "grad_norm": 1.847279800591813, "learning_rate": 1.7951647842635035e-07, "loss": 0.7993, "step": 2697 }, { "epoch": 0.9173750425025502, "grad_norm": 1.672424819741688, "learning_rate": 1.780568255878423e-07, "loss": 0.7778, "step": 2698 }, { "epoch": 0.9177150629037742, "grad_norm": 2.6871925509688985, "learning_rate": 1.7660302365029969e-07, "loss": 0.69, "step": 2699 }, { "epoch": 0.9180550833049983, "grad_norm": 1.6768457562216357, "learning_rate": 1.7515507437775193e-07, "loss": 0.7657, "step": 2700 }, { "epoch": 0.9183951037062223, "grad_norm": 1.809600285567411, "learning_rate": 1.7371297952712752e-07, "loss": 0.7147, "step": 2701 }, { "epoch": 0.9187351241074464, "grad_norm": 1.9059976949501458, "learning_rate": 1.722767408482501e-07, "loss": 0.7172, "step": 2702 }, { "epoch": 0.9190751445086706, "grad_norm": 1.9399888604391888, "learning_rate": 1.7084636008383837e-07, "loss": 0.7127, "step": 2703 }, { "epoch": 0.9194151649098946, "grad_norm": 2.6413221231221455, "learning_rate": 1.6942183896950458e-07, "loss": 0.8735, "step": 2704 }, { "epoch": 0.9197551853111187, "grad_norm": 2.052359841123048, "learning_rate": 1.680031792337472e-07, "loss": 0.7513, "step": 2705 }, { "epoch": 0.9200952057123427, "grad_norm": 1.7351319515737746, "learning_rate": 1.6659038259795644e-07, "loss": 0.7591, "step": 2706 }, { "epoch": 0.9204352261135668, "grad_norm": 3.5928987788596505, "learning_rate": 1.6518345077640606e-07, "loss": 0.865, "step": 2707 }, { "epoch": 0.9207752465147909, "grad_norm": 2.475516242774065, "learning_rate": 1.6378238547625436e-07, "loss": 0.7076, "step": 2708 }, { "epoch": 0.921115266916015, "grad_norm": 2.757378010289697, "learning_rate": 1.6238718839753975e-07, "loss": 0.8151, "step": 2709 }, { "epoch": 0.921455287317239, "grad_norm": 2.839365711351715, "learning_rate": 1.609978612331825e-07, "loss": 0.7181, "step": 2710 }, { "epoch": 0.9217953077184631, "grad_norm": 1.8003430537941647, "learning_rate": 1.5961440566897913e-07, "loss": 0.8018, "step": 2711 }, { "epoch": 0.9221353281196872, "grad_norm": 2.3010251129273405, "learning_rate": 1.582368233836007e-07, "loss": 0.7241, "step": 2712 }, { "epoch": 0.9224753485209113, "grad_norm": 1.7723077920834232, "learning_rate": 1.5686511604859456e-07, "loss": 0.8193, "step": 2713 }, { "epoch": 0.9228153689221353, "grad_norm": 2.2601407328290364, "learning_rate": 1.5549928532837544e-07, "loss": 0.855, "step": 2714 }, { "epoch": 0.9231553893233594, "grad_norm": 1.949904865719386, "learning_rate": 1.5413933288023207e-07, "loss": 0.8482, "step": 2715 }, { "epoch": 0.9234954097245834, "grad_norm": 1.3958294128404651, "learning_rate": 1.5278526035431673e-07, "loss": 0.7979, "step": 2716 }, { "epoch": 0.9238354301258076, "grad_norm": 3.1940088213187745, "learning_rate": 1.5143706939364844e-07, "loss": 0.7152, "step": 2717 }, { "epoch": 0.9241754505270316, "grad_norm": 2.3293084372099138, "learning_rate": 1.5009476163410975e-07, "loss": 0.7087, "step": 2718 }, { "epoch": 0.9245154709282557, "grad_norm": 1.724579154051739, "learning_rate": 1.4875833870444334e-07, "loss": 0.8299, "step": 2719 }, { "epoch": 0.9248554913294798, "grad_norm": 1.8586372981224857, "learning_rate": 1.474278022262543e-07, "loss": 0.7033, "step": 2720 }, { "epoch": 0.9251955117307038, "grad_norm": 1.53996008803897, "learning_rate": 1.4610315381400175e-07, "loss": 0.7594, "step": 2721 }, { "epoch": 0.925535532131928, "grad_norm": 2.2916687247168483, "learning_rate": 1.4478439507500218e-07, "loss": 0.7009, "step": 2722 }, { "epoch": 0.925875552533152, "grad_norm": 1.9641042638955648, "learning_rate": 1.4347152760942507e-07, "loss": 0.6479, "step": 2723 }, { "epoch": 0.9262155729343761, "grad_norm": 1.9501129366148464, "learning_rate": 1.4216455301029274e-07, "loss": 0.7925, "step": 2724 }, { "epoch": 0.9265555933356001, "grad_norm": 2.198382597904698, "learning_rate": 1.4086347286347502e-07, "loss": 0.857, "step": 2725 }, { "epoch": 0.9268956137368242, "grad_norm": 2.132363720106857, "learning_rate": 1.3956828874768901e-07, "loss": 0.6655, "step": 2726 }, { "epoch": 0.9272356341380483, "grad_norm": 1.8802823835704232, "learning_rate": 1.3827900223450152e-07, "loss": 0.8498, "step": 2727 }, { "epoch": 0.9275756545392724, "grad_norm": 2.4695615148666104, "learning_rate": 1.3699561488831892e-07, "loss": 0.767, "step": 2728 }, { "epoch": 0.9279156749404964, "grad_norm": 1.547770137207598, "learning_rate": 1.357181282663933e-07, "loss": 0.8417, "step": 2729 }, { "epoch": 0.9282556953417205, "grad_norm": 1.8562537232375893, "learning_rate": 1.3444654391881306e-07, "loss": 0.7578, "step": 2730 }, { "epoch": 0.9285957157429445, "grad_norm": 2.3415279377546647, "learning_rate": 1.3318086338850843e-07, "loss": 0.6844, "step": 2731 }, { "epoch": 0.9289357361441687, "grad_norm": 2.056878813634059, "learning_rate": 1.3192108821124428e-07, "loss": 0.8104, "step": 2732 }, { "epoch": 0.9292757565453927, "grad_norm": 1.800632904887497, "learning_rate": 1.3066721991561891e-07, "loss": 0.7732, "step": 2733 }, { "epoch": 0.9296157769466168, "grad_norm": 1.801402555086326, "learning_rate": 1.2941926002306536e-07, "loss": 0.754, "step": 2734 }, { "epoch": 0.9299557973478408, "grad_norm": 2.1441261210894607, "learning_rate": 1.2817721004784568e-07, "loss": 0.7945, "step": 2735 }, { "epoch": 0.9302958177490649, "grad_norm": 2.5741151332940677, "learning_rate": 1.2694107149705258e-07, "loss": 0.7383, "step": 2736 }, { "epoch": 0.930635838150289, "grad_norm": 1.5286432889504458, "learning_rate": 1.2571084587060466e-07, "loss": 0.6856, "step": 2737 }, { "epoch": 0.9309758585515131, "grad_norm": 11.326187707959681, "learning_rate": 1.2448653466124672e-07, "loss": 0.8106, "step": 2738 }, { "epoch": 0.9313158789527372, "grad_norm": 1.7511130965052701, "learning_rate": 1.2326813935454596e-07, "loss": 0.7444, "step": 2739 }, { "epoch": 0.9316558993539612, "grad_norm": 1.7617651287605105, "learning_rate": 1.2205566142889257e-07, "loss": 0.854, "step": 2740 }, { "epoch": 0.9319959197551854, "grad_norm": 2.145302434001505, "learning_rate": 1.2084910235549586e-07, "loss": 0.8164, "step": 2741 }, { "epoch": 0.9323359401564094, "grad_norm": 1.5133580779256988, "learning_rate": 1.19648463598383e-07, "loss": 0.7909, "step": 2742 }, { "epoch": 0.9326759605576335, "grad_norm": 1.7694718002515495, "learning_rate": 1.1845374661439813e-07, "loss": 0.7474, "step": 2743 }, { "epoch": 0.9330159809588575, "grad_norm": 1.624165239983796, "learning_rate": 1.1726495285319883e-07, "loss": 0.7366, "step": 2744 }, { "epoch": 0.9333560013600816, "grad_norm": 2.2415272140749227, "learning_rate": 1.1608208375725794e-07, "loss": 0.7531, "step": 2745 }, { "epoch": 0.9336960217613057, "grad_norm": 1.806699065485192, "learning_rate": 1.1490514076185621e-07, "loss": 0.7165, "step": 2746 }, { "epoch": 0.9340360421625298, "grad_norm": 2.1552408637827267, "learning_rate": 1.1373412529508687e-07, "loss": 0.7309, "step": 2747 }, { "epoch": 0.9343760625637538, "grad_norm": 1.8671167963972226, "learning_rate": 1.1256903877784886e-07, "loss": 0.7929, "step": 2748 }, { "epoch": 0.9347160829649779, "grad_norm": 2.3625092627025497, "learning_rate": 1.1140988262384633e-07, "loss": 0.7205, "step": 2749 }, { "epoch": 0.9350561033662019, "grad_norm": 1.8238588793473012, "learning_rate": 1.1025665823958975e-07, "loss": 0.78, "step": 2750 }, { "epoch": 0.9353961237674261, "grad_norm": 2.234751620091944, "learning_rate": 1.0910936702438924e-07, "loss": 0.7863, "step": 2751 }, { "epoch": 0.9357361441686501, "grad_norm": 2.5286309526580717, "learning_rate": 1.0796801037035898e-07, "loss": 0.7947, "step": 2752 }, { "epoch": 0.9360761645698742, "grad_norm": 1.4596449824820212, "learning_rate": 1.068325896624095e-07, "loss": 0.7242, "step": 2753 }, { "epoch": 0.9364161849710982, "grad_norm": 1.8586921437706274, "learning_rate": 1.0570310627825042e-07, "loss": 0.7237, "step": 2754 }, { "epoch": 0.9367562053723223, "grad_norm": 2.1973293560111355, "learning_rate": 1.0457956158838545e-07, "loss": 0.7914, "step": 2755 }, { "epoch": 0.9370962257735465, "grad_norm": 1.9486083197205395, "learning_rate": 1.0346195695611461e-07, "loss": 0.6833, "step": 2756 }, { "epoch": 0.9374362461747705, "grad_norm": 2.0127688563988406, "learning_rate": 1.0235029373752758e-07, "loss": 0.8055, "step": 2757 }, { "epoch": 0.9377762665759946, "grad_norm": 1.9434824986065704, "learning_rate": 1.0124457328150705e-07, "loss": 0.7631, "step": 2758 }, { "epoch": 0.9381162869772186, "grad_norm": 1.8861521568684736, "learning_rate": 1.0014479692972368e-07, "loss": 0.8138, "step": 2759 }, { "epoch": 0.9384563073784427, "grad_norm": 2.4202833010762235, "learning_rate": 9.905096601663556e-08, "loss": 0.8974, "step": 2760 }, { "epoch": 0.9387963277796668, "grad_norm": 1.7851590249860223, "learning_rate": 9.796308186948711e-08, "loss": 0.8358, "step": 2761 }, { "epoch": 0.9391363481808909, "grad_norm": 2.4290115845610325, "learning_rate": 9.688114580830688e-08, "loss": 0.7979, "step": 2762 }, { "epoch": 0.9394763685821149, "grad_norm": 1.5838175734575497, "learning_rate": 9.580515914590637e-08, "loss": 0.8471, "step": 2763 }, { "epoch": 0.939816388983339, "grad_norm": 2.027267479543276, "learning_rate": 9.473512318787681e-08, "loss": 0.6754, "step": 2764 }, { "epoch": 0.940156409384563, "grad_norm": 1.809277908733106, "learning_rate": 9.367103923259124e-08, "loss": 0.7902, "step": 2765 }, { "epoch": 0.9404964297857872, "grad_norm": 4.2105282282100225, "learning_rate": 9.261290857119853e-08, "loss": 0.7979, "step": 2766 }, { "epoch": 0.9408364501870112, "grad_norm": 2.1122972531258655, "learning_rate": 9.156073248762387e-08, "loss": 0.7509, "step": 2767 }, { "epoch": 0.9411764705882353, "grad_norm": 1.9840880876989972, "learning_rate": 9.051451225856877e-08, "loss": 0.6946, "step": 2768 }, { "epoch": 0.9415164909894593, "grad_norm": 2.524068083748069, "learning_rate": 8.947424915350723e-08, "loss": 0.7643, "step": 2769 }, { "epoch": 0.9418565113906835, "grad_norm": 2.133668114970484, "learning_rate": 8.843994443468451e-08, "loss": 0.8265, "step": 2770 }, { "epoch": 0.9421965317919075, "grad_norm": 1.4893159029193104, "learning_rate": 8.741159935711563e-08, "loss": 0.8069, "step": 2771 }, { "epoch": 0.9425365521931316, "grad_norm": 2.1763296510811716, "learning_rate": 8.638921516858634e-08, "loss": 0.8413, "step": 2772 }, { "epoch": 0.9428765725943556, "grad_norm": 2.498518002319564, "learning_rate": 8.537279310964763e-08, "loss": 0.8262, "step": 2773 }, { "epoch": 0.9432165929955797, "grad_norm": 8.993724154572014, "learning_rate": 8.436233441361629e-08, "loss": 0.8378, "step": 2774 }, { "epoch": 0.9435566133968039, "grad_norm": 1.576329182227026, "learning_rate": 8.335784030657324e-08, "loss": 0.8379, "step": 2775 }, { "epoch": 0.9438966337980279, "grad_norm": 2.424881688164331, "learning_rate": 8.235931200736235e-08, "loss": 0.7671, "step": 2776 }, { "epoch": 0.944236654199252, "grad_norm": 2.807110671874095, "learning_rate": 8.136675072758948e-08, "loss": 0.7003, "step": 2777 }, { "epoch": 0.944576674600476, "grad_norm": 4.154937564787449, "learning_rate": 8.038015767161789e-08, "loss": 0.8277, "step": 2778 }, { "epoch": 0.9449166950017001, "grad_norm": 1.674791036585208, "learning_rate": 7.939953403657164e-08, "loss": 0.7474, "step": 2779 }, { "epoch": 0.9452567154029242, "grad_norm": 2.0417610679261635, "learning_rate": 7.842488101232893e-08, "loss": 0.7489, "step": 2780 }, { "epoch": 0.9455967358041483, "grad_norm": 2.355200646804559, "learning_rate": 7.745619978152653e-08, "loss": 0.8185, "step": 2781 }, { "epoch": 0.9459367562053723, "grad_norm": 2.1071996152685863, "learning_rate": 7.649349151955199e-08, "loss": 0.7427, "step": 2782 }, { "epoch": 0.9462767766065964, "grad_norm": 2.5068513815147413, "learning_rate": 7.553675739454647e-08, "loss": 0.7599, "step": 2783 }, { "epoch": 0.9466167970078204, "grad_norm": 2.137090570866946, "learning_rate": 7.4585998567403e-08, "loss": 0.7743, "step": 2784 }, { "epoch": 0.9469568174090446, "grad_norm": 1.8432992230207976, "learning_rate": 7.364121619176213e-08, "loss": 0.7191, "step": 2785 }, { "epoch": 0.9472968378102686, "grad_norm": 1.9437651826974969, "learning_rate": 7.270241141401568e-08, "loss": 0.8466, "step": 2786 }, { "epoch": 0.9476368582114927, "grad_norm": 1.7799878186616265, "learning_rate": 7.17695853732997e-08, "loss": 0.77, "step": 2787 }, { "epoch": 0.9479768786127167, "grad_norm": 1.7842807831458771, "learning_rate": 7.084273920149654e-08, "loss": 0.9092, "step": 2788 }, { "epoch": 0.9483168990139408, "grad_norm": 1.7117275262466325, "learning_rate": 6.99218740232338e-08, "loss": 0.8042, "step": 2789 }, { "epoch": 0.948656919415165, "grad_norm": 1.6622479904527474, "learning_rate": 6.900699095587937e-08, "loss": 0.7579, "step": 2790 }, { "epoch": 0.948996939816389, "grad_norm": 1.8529558113196523, "learning_rate": 6.809809110954413e-08, "loss": 0.8636, "step": 2791 }, { "epoch": 0.949336960217613, "grad_norm": 2.163551138095109, "learning_rate": 6.719517558707922e-08, "loss": 0.892, "step": 2792 }, { "epoch": 0.9496769806188371, "grad_norm": 1.8487943962662825, "learning_rate": 6.629824548407381e-08, "loss": 0.7987, "step": 2793 }, { "epoch": 0.9500170010200613, "grad_norm": 2.0154438939507497, "learning_rate": 6.540730188885347e-08, "loss": 0.7981, "step": 2794 }, { "epoch": 0.9503570214212853, "grad_norm": 1.9570984130674787, "learning_rate": 6.452234588248285e-08, "loss": 0.7771, "step": 2795 }, { "epoch": 0.9506970418225094, "grad_norm": 1.6604363995442626, "learning_rate": 6.364337853875745e-08, "loss": 0.654, "step": 2796 }, { "epoch": 0.9510370622237334, "grad_norm": 1.8616496731827172, "learning_rate": 6.277040092420916e-08, "loss": 0.7682, "step": 2797 }, { "epoch": 0.9513770826249575, "grad_norm": 1.4864670570007181, "learning_rate": 6.190341409810063e-08, "loss": 0.7729, "step": 2798 }, { "epoch": 0.9517171030261816, "grad_norm": 2.167348761585454, "learning_rate": 6.104241911242592e-08, "loss": 0.8381, "step": 2799 }, { "epoch": 0.9520571234274057, "grad_norm": 2.0214824467624606, "learning_rate": 6.018741701190767e-08, "loss": 0.8774, "step": 2800 }, { "epoch": 0.9523971438286297, "grad_norm": 2.046012942264478, "learning_rate": 5.933840883399766e-08, "loss": 0.8185, "step": 2801 }, { "epoch": 0.9527371642298538, "grad_norm": 1.6744820889133898, "learning_rate": 5.8495395608874625e-08, "loss": 0.7855, "step": 2802 }, { "epoch": 0.9530771846310778, "grad_norm": 1.9991594031620603, "learning_rate": 5.7658378359443104e-08, "loss": 0.7734, "step": 2803 }, { "epoch": 0.953417205032302, "grad_norm": 1.8896007551548046, "learning_rate": 5.6827358101331774e-08, "loss": 0.753, "step": 2804 }, { "epoch": 0.953757225433526, "grad_norm": 2.499578946675649, "learning_rate": 5.600233584289294e-08, "loss": 0.7645, "step": 2805 }, { "epoch": 0.9540972458347501, "grad_norm": 2.0291966720592898, "learning_rate": 5.518331258520138e-08, "loss": 0.7063, "step": 2806 }, { "epoch": 0.9544372662359741, "grad_norm": 2.115533353024901, "learning_rate": 5.437028932205213e-08, "loss": 0.7353, "step": 2807 }, { "epoch": 0.9547772866371982, "grad_norm": 2.230634267024501, "learning_rate": 5.356326703995884e-08, "loss": 0.7527, "step": 2808 }, { "epoch": 0.9551173070384223, "grad_norm": 1.9500468263820696, "learning_rate": 5.276224671815655e-08, "loss": 0.7196, "step": 2809 }, { "epoch": 0.9554573274396464, "grad_norm": 1.9933718903005513, "learning_rate": 5.196722932859499e-08, "loss": 0.7947, "step": 2810 }, { "epoch": 0.9557973478408704, "grad_norm": 3.9589726805428658, "learning_rate": 5.117821583594085e-08, "loss": 0.8693, "step": 2811 }, { "epoch": 0.9561373682420945, "grad_norm": 1.697109589336965, "learning_rate": 5.0395207197575516e-08, "loss": 0.8559, "step": 2812 }, { "epoch": 0.9564773886433185, "grad_norm": 1.7784029867244318, "learning_rate": 4.9618204363595656e-08, "loss": 0.7196, "step": 2813 }, { "epoch": 0.9568174090445427, "grad_norm": 1.7135140822695198, "learning_rate": 4.8847208276808224e-08, "loss": 0.7667, "step": 2814 }, { "epoch": 0.9571574294457668, "grad_norm": 2.0635249556468676, "learning_rate": 4.808221987273265e-08, "loss": 0.7779, "step": 2815 }, { "epoch": 0.9574974498469908, "grad_norm": 1.7283710035195046, "learning_rate": 4.732324007959921e-08, "loss": 0.7874, "step": 2816 }, { "epoch": 0.9578374702482149, "grad_norm": 2.1716890834730247, "learning_rate": 4.657026981834623e-08, "loss": 0.776, "step": 2817 }, { "epoch": 0.9581774906494389, "grad_norm": 1.8440991529889135, "learning_rate": 4.5823310002621745e-08, "loss": 0.7675, "step": 2818 }, { "epoch": 0.9585175110506631, "grad_norm": 1.9867632028220694, "learning_rate": 4.5082361538779095e-08, "loss": 0.8159, "step": 2819 }, { "epoch": 0.9588575314518871, "grad_norm": 1.6735880869924653, "learning_rate": 4.434742532587855e-08, "loss": 0.8242, "step": 2820 }, { "epoch": 0.9591975518531112, "grad_norm": 1.9489756840557746, "learning_rate": 4.3618502255684533e-08, "loss": 0.7493, "step": 2821 }, { "epoch": 0.9595375722543352, "grad_norm": 1.8908904594168388, "learning_rate": 4.289559321266623e-08, "loss": 0.7569, "step": 2822 }, { "epoch": 0.9598775926555594, "grad_norm": 2.0307318453443663, "learning_rate": 4.2178699073994744e-08, "loss": 0.8468, "step": 2823 }, { "epoch": 0.9602176130567834, "grad_norm": 2.155622909096085, "learning_rate": 4.1467820709541474e-08, "loss": 0.7998, "step": 2824 }, { "epoch": 0.9605576334580075, "grad_norm": 3.045935542421655, "learning_rate": 4.0762958981880876e-08, "loss": 0.6656, "step": 2825 }, { "epoch": 0.9608976538592315, "grad_norm": 1.462100425448403, "learning_rate": 4.006411474628491e-08, "loss": 0.7495, "step": 2826 }, { "epoch": 0.9612376742604556, "grad_norm": 2.0395632850495873, "learning_rate": 3.937128885072528e-08, "loss": 0.7343, "step": 2827 }, { "epoch": 0.9615776946616797, "grad_norm": 1.4943247088061522, "learning_rate": 3.868448213587006e-08, "loss": 0.7365, "step": 2828 }, { "epoch": 0.9619177150629038, "grad_norm": 1.6462875432934096, "learning_rate": 3.800369543508431e-08, "loss": 0.7184, "step": 2829 }, { "epoch": 0.9622577354641279, "grad_norm": 1.7317699680459382, "learning_rate": 3.7328929574428354e-08, "loss": 0.7887, "step": 2830 }, { "epoch": 0.9625977558653519, "grad_norm": 2.0339479815422683, "learning_rate": 3.6660185372656144e-08, "loss": 0.6979, "step": 2831 }, { "epoch": 0.962937776266576, "grad_norm": 2.024001193970488, "learning_rate": 3.5997463641216925e-08, "loss": 0.7664, "step": 2832 }, { "epoch": 0.9632777966678001, "grad_norm": 2.7918239147503123, "learning_rate": 3.534076518424967e-08, "loss": 0.7498, "step": 2833 }, { "epoch": 0.9636178170690242, "grad_norm": 1.8084785912714854, "learning_rate": 3.469009079858698e-08, "loss": 0.5801, "step": 2834 }, { "epoch": 0.9639578374702482, "grad_norm": 1.6988220138175392, "learning_rate": 3.404544127375064e-08, "loss": 0.7183, "step": 2835 }, { "epoch": 0.9642978578714723, "grad_norm": 1.7500355302390025, "learning_rate": 3.340681739195328e-08, "loss": 0.8591, "step": 2836 }, { "epoch": 0.9646378782726963, "grad_norm": 2.0915983335526542, "learning_rate": 3.277421992809448e-08, "loss": 0.7898, "step": 2837 }, { "epoch": 0.9649778986739205, "grad_norm": 1.9905149738010572, "learning_rate": 3.2147649649761914e-08, "loss": 0.7486, "step": 2838 }, { "epoch": 0.9653179190751445, "grad_norm": 1.8204792814385986, "learning_rate": 3.152710731723019e-08, "loss": 0.7907, "step": 2839 }, { "epoch": 0.9656579394763686, "grad_norm": 2.2038325461595125, "learning_rate": 3.0912593683460336e-08, "loss": 0.8221, "step": 2840 }, { "epoch": 0.9659979598775926, "grad_norm": 2.432522698561901, "learning_rate": 3.030410949409701e-08, "loss": 0.8842, "step": 2841 }, { "epoch": 0.9663379802788167, "grad_norm": 1.7618523901465664, "learning_rate": 2.9701655487469062e-08, "loss": 0.7653, "step": 2842 }, { "epoch": 0.9666780006800408, "grad_norm": 2.0644672247287055, "learning_rate": 2.9105232394588955e-08, "loss": 0.7707, "step": 2843 }, { "epoch": 0.9670180210812649, "grad_norm": 1.63818941154093, "learning_rate": 2.8514840939150023e-08, "loss": 0.8413, "step": 2844 }, { "epoch": 0.9673580414824889, "grad_norm": 1.973884901127744, "learning_rate": 2.793048183752922e-08, "loss": 0.7836, "step": 2845 }, { "epoch": 0.967698061883713, "grad_norm": 2.4371181181523354, "learning_rate": 2.735215579878159e-08, "loss": 0.7305, "step": 2846 }, { "epoch": 0.9680380822849372, "grad_norm": 1.7853562497323494, "learning_rate": 2.6779863524642458e-08, "loss": 0.7943, "step": 2847 }, { "epoch": 0.9683781026861612, "grad_norm": 1.6858828008976603, "learning_rate": 2.6213605709525803e-08, "loss": 0.7232, "step": 2848 }, { "epoch": 0.9687181230873853, "grad_norm": 2.158947668013324, "learning_rate": 2.5653383040524228e-08, "loss": 0.8468, "step": 2849 }, { "epoch": 0.9690581434886093, "grad_norm": 1.9632625315112604, "learning_rate": 2.509919619740675e-08, "loss": 0.7163, "step": 2850 }, { "epoch": 0.9693981638898334, "grad_norm": 2.1695249086671757, "learning_rate": 2.4551045852617694e-08, "loss": 0.6472, "step": 2851 }, { "epoch": 0.9697381842910575, "grad_norm": 2.7117847345646107, "learning_rate": 2.4008932671277795e-08, "loss": 0.8502, "step": 2852 }, { "epoch": 0.9700782046922816, "grad_norm": 6.101355782708873, "learning_rate": 2.3472857311183095e-08, "loss": 0.8528, "step": 2853 }, { "epoch": 0.9704182250935056, "grad_norm": 2.254936461781147, "learning_rate": 2.294282042280105e-08, "loss": 0.7906, "step": 2854 }, { "epoch": 0.9707582454947297, "grad_norm": 1.8108204204000768, "learning_rate": 2.2418822649274974e-08, "loss": 0.7968, "step": 2855 }, { "epoch": 0.9710982658959537, "grad_norm": 2.6951434301051402, "learning_rate": 2.1900864626417385e-08, "loss": 0.776, "step": 2856 }, { "epoch": 0.9714382862971779, "grad_norm": 1.6020976275419296, "learning_rate": 2.1388946982714986e-08, "loss": 0.7704, "step": 2857 }, { "epoch": 0.9717783066984019, "grad_norm": 2.4901873723353556, "learning_rate": 2.088307033932313e-08, "loss": 0.7724, "step": 2858 }, { "epoch": 0.972118327099626, "grad_norm": 1.9510473315864627, "learning_rate": 2.0383235310068027e-08, "loss": 0.7161, "step": 2859 }, { "epoch": 0.97245834750085, "grad_norm": 1.7537820967663633, "learning_rate": 1.9889442501444533e-08, "loss": 0.7845, "step": 2860 }, { "epoch": 0.9727983679020741, "grad_norm": 2.770379816331149, "learning_rate": 1.9401692512617254e-08, "loss": 0.7648, "step": 2861 }, { "epoch": 0.9731383883032982, "grad_norm": 1.559159771690986, "learning_rate": 1.891998593541611e-08, "loss": 0.8496, "step": 2862 }, { "epoch": 0.9734784087045223, "grad_norm": 1.8239680223638182, "learning_rate": 1.8444323354340765e-08, "loss": 0.8466, "step": 2863 }, { "epoch": 0.9738184291057463, "grad_norm": 2.097607953123935, "learning_rate": 1.7974705346554543e-08, "loss": 0.7707, "step": 2864 }, { "epoch": 0.9741584495069704, "grad_norm": 15.595515977393774, "learning_rate": 1.7511132481888293e-08, "loss": 0.8303, "step": 2865 }, { "epoch": 0.9744984699081944, "grad_norm": 2.2442397263635923, "learning_rate": 1.7053605322837064e-08, "loss": 0.8331, "step": 2866 }, { "epoch": 0.9748384903094186, "grad_norm": 1.7730283098392012, "learning_rate": 1.6602124424558998e-08, "loss": 0.8179, "step": 2867 }, { "epoch": 0.9751785107106427, "grad_norm": 1.9544407790331693, "learning_rate": 1.6156690334878655e-08, "loss": 0.8524, "step": 2868 }, { "epoch": 0.9755185311118667, "grad_norm": 1.8253723434879328, "learning_rate": 1.571730359427981e-08, "loss": 0.7918, "step": 2869 }, { "epoch": 0.9758585515130908, "grad_norm": 2.0309536185191175, "learning_rate": 1.5283964735911537e-08, "loss": 0.6513, "step": 2870 }, { "epoch": 0.9761985719143148, "grad_norm": 1.838769072190465, "learning_rate": 1.4856674285582128e-08, "loss": 0.8031, "step": 2871 }, { "epoch": 0.976538592315539, "grad_norm": 2.0950822778973692, "learning_rate": 1.4435432761762958e-08, "loss": 0.7676, "step": 2872 }, { "epoch": 0.976878612716763, "grad_norm": 2.0893793425469873, "learning_rate": 1.4020240675583496e-08, "loss": 0.7125, "step": 2873 }, { "epoch": 0.9772186331179871, "grad_norm": 2.4146069116338076, "learning_rate": 1.3611098530834643e-08, "loss": 0.7995, "step": 2874 }, { "epoch": 0.9775586535192111, "grad_norm": 1.6888893378242318, "learning_rate": 1.3208006823965391e-08, "loss": 0.7872, "step": 2875 }, { "epoch": 0.9778986739204353, "grad_norm": 1.8282106707284866, "learning_rate": 1.2810966044083384e-08, "loss": 0.7569, "step": 2876 }, { "epoch": 0.9782386943216593, "grad_norm": 3.0888469815054056, "learning_rate": 1.241997667295436e-08, "loss": 0.7845, "step": 2877 }, { "epoch": 0.9785787147228834, "grad_norm": 2.6085632259240135, "learning_rate": 1.2035039185001595e-08, "loss": 0.6891, "step": 2878 }, { "epoch": 0.9789187351241074, "grad_norm": 1.6548350534770722, "learning_rate": 1.1656154047303691e-08, "loss": 0.776, "step": 2879 }, { "epoch": 0.9792587555253315, "grad_norm": 2.2479317743891163, "learning_rate": 1.128332171959734e-08, "loss": 0.6986, "step": 2880 }, { "epoch": 0.9795987759265556, "grad_norm": 1.5207555564257944, "learning_rate": 1.0916542654273443e-08, "loss": 0.7656, "step": 2881 }, { "epoch": 0.9799387963277797, "grad_norm": 1.7179799866128536, "learning_rate": 1.0555817296378223e-08, "loss": 0.7789, "step": 2882 }, { "epoch": 0.9802788167290037, "grad_norm": 3.5457830250161146, "learning_rate": 1.0201146083612113e-08, "loss": 0.6575, "step": 2883 }, { "epoch": 0.9806188371302278, "grad_norm": 1.66796935458302, "learning_rate": 9.852529446330306e-09, "loss": 0.8078, "step": 2884 }, { "epoch": 0.9809588575314518, "grad_norm": 1.6131158456304904, "learning_rate": 9.509967807541098e-09, "loss": 0.8186, "step": 2885 }, { "epoch": 0.981298877932676, "grad_norm": 1.82468180774407, "learning_rate": 9.17346158290533e-09, "loss": 0.739, "step": 2886 }, { "epoch": 0.9816388983339001, "grad_norm": 1.9688243210497778, "learning_rate": 8.843011180736383e-09, "loss": 0.7168, "step": 2887 }, { "epoch": 0.9819789187351241, "grad_norm": 1.5022510538028564, "learning_rate": 8.518617002000184e-09, "loss": 0.7572, "step": 2888 }, { "epoch": 0.9823189391363482, "grad_norm": 1.727052969368207, "learning_rate": 8.200279440313541e-09, "loss": 0.8521, "step": 2889 }, { "epoch": 0.9826589595375722, "grad_norm": 2.2067244626842633, "learning_rate": 7.88799888194358e-09, "loss": 0.8, "step": 2890 }, { "epoch": 0.9829989799387964, "grad_norm": 1.7065083718462186, "learning_rate": 7.581775705809424e-09, "loss": 0.7378, "step": 2891 }, { "epoch": 0.9833390003400204, "grad_norm": 2.9586498581307064, "learning_rate": 7.281610283479401e-09, "loss": 0.6801, "step": 2892 }, { "epoch": 0.9836790207412445, "grad_norm": 5.171238278863206, "learning_rate": 6.987502979170502e-09, "loss": 0.7629, "step": 2893 }, { "epoch": 0.9840190411424685, "grad_norm": 1.90520126690055, "learning_rate": 6.69945414975115e-09, "loss": 0.8113, "step": 2894 }, { "epoch": 0.9843590615436926, "grad_norm": 2.2503726394658616, "learning_rate": 6.417464144736208e-09, "loss": 0.7513, "step": 2895 }, { "epoch": 0.9846990819449167, "grad_norm": 2.153027725123987, "learning_rate": 6.141533306289749e-09, "loss": 0.8205, "step": 2896 }, { "epoch": 0.9850391023461408, "grad_norm": 1.736801672039933, "learning_rate": 5.871661969223951e-09, "loss": 0.7942, "step": 2897 }, { "epoch": 0.9853791227473648, "grad_norm": 2.3675066320350475, "learning_rate": 5.6078504609979874e-09, "loss": 0.7259, "step": 2898 }, { "epoch": 0.9857191431485889, "grad_norm": 2.017743519753625, "learning_rate": 5.350099101718575e-09, "loss": 0.7131, "step": 2899 }, { "epoch": 0.9860591635498129, "grad_norm": 2.7453562229219703, "learning_rate": 5.098408204138872e-09, "loss": 0.7977, "step": 2900 }, { "epoch": 0.9863991839510371, "grad_norm": 1.6134295512276415, "learning_rate": 4.852778073657361e-09, "loss": 0.7135, "step": 2901 }, { "epoch": 0.9867392043522611, "grad_norm": 2.0405012351886276, "learning_rate": 4.613209008320629e-09, "loss": 0.7537, "step": 2902 }, { "epoch": 0.9870792247534852, "grad_norm": 1.4871414756238235, "learning_rate": 4.379701298818928e-09, "loss": 0.7353, "step": 2903 }, { "epoch": 0.9874192451547092, "grad_norm": 2.093132630125787, "learning_rate": 4.152255228487834e-09, "loss": 0.7546, "step": 2904 }, { "epoch": 0.9877592655559334, "grad_norm": 1.7225547695335006, "learning_rate": 3.9308710733093616e-09, "loss": 0.7336, "step": 2905 }, { "epoch": 0.9880992859571575, "grad_norm": 2.05442384710289, "learning_rate": 3.715549101908633e-09, "loss": 0.7587, "step": 2906 }, { "epoch": 0.9884393063583815, "grad_norm": 5.175498096828514, "learning_rate": 3.5062895755544337e-09, "loss": 0.7976, "step": 2907 }, { "epoch": 0.9887793267596056, "grad_norm": 2.1043805063081678, "learning_rate": 3.3030927481614294e-09, "loss": 0.8033, "step": 2908 }, { "epoch": 0.9891193471608296, "grad_norm": 2.0514526782549884, "learning_rate": 3.10595886628684e-09, "loss": 0.7745, "step": 2909 }, { "epoch": 0.9894593675620538, "grad_norm": 1.8077379168303007, "learning_rate": 2.9148881691298812e-09, "loss": 0.8329, "step": 2910 }, { "epoch": 0.9897993879632778, "grad_norm": 2.7491338803401852, "learning_rate": 2.7298808885350968e-09, "loss": 0.7437, "step": 2911 }, { "epoch": 0.9901394083645019, "grad_norm": 1.8239821715791795, "learning_rate": 2.550937248987917e-09, "loss": 0.752, "step": 2912 }, { "epoch": 0.9904794287657259, "grad_norm": 1.9332024110813362, "learning_rate": 2.378057467617434e-09, "loss": 0.7285, "step": 2913 }, { "epoch": 0.99081944916695, "grad_norm": 2.1222346021287284, "learning_rate": 2.211241754193627e-09, "loss": 0.7529, "step": 2914 }, { "epoch": 0.9911594695681741, "grad_norm": 2.949500732515013, "learning_rate": 2.050490311130138e-09, "loss": 0.7769, "step": 2915 }, { "epoch": 0.9914994899693982, "grad_norm": 2.2876307190277387, "learning_rate": 1.8958033334803837e-09, "loss": 0.7332, "step": 2916 }, { "epoch": 0.9918395103706222, "grad_norm": 1.8659398649610075, "learning_rate": 1.7471810089403352e-09, "loss": 0.7737, "step": 2917 }, { "epoch": 0.9921795307718463, "grad_norm": 4.208386889103378, "learning_rate": 1.6046235178474034e-09, "loss": 0.6483, "step": 2918 }, { "epoch": 0.9925195511730703, "grad_norm": 2.7900149310075437, "learning_rate": 1.4681310331787767e-09, "loss": 0.7181, "step": 2919 }, { "epoch": 0.9928595715742945, "grad_norm": 3.2570105193883836, "learning_rate": 1.3377037205541954e-09, "loss": 0.745, "step": 2920 }, { "epoch": 0.9931995919755185, "grad_norm": 2.253063484591565, "learning_rate": 1.2133417382320656e-09, "loss": 0.7365, "step": 2921 }, { "epoch": 0.9935396123767426, "grad_norm": 1.6986142527464747, "learning_rate": 1.0950452371116805e-09, "loss": 0.7249, "step": 2922 }, { "epoch": 0.9938796327779666, "grad_norm": 2.1769362876424783, "learning_rate": 9.828143607343298e-10, "loss": 0.675, "step": 2923 }, { "epoch": 0.9942196531791907, "grad_norm": 1.8623993129979777, "learning_rate": 8.766492452783048e-10, "loss": 0.7031, "step": 2924 }, { "epoch": 0.9945596735804149, "grad_norm": 2.8223920826039577, "learning_rate": 7.765500195650034e-10, "loss": 0.7948, "step": 2925 }, { "epoch": 0.9948996939816389, "grad_norm": 1.8686593078473168, "learning_rate": 6.825168050528241e-10, "loss": 0.727, "step": 2926 }, { "epoch": 0.995239714382863, "grad_norm": 2.162089701353925, "learning_rate": 5.945497158404979e-10, "loss": 0.7878, "step": 2927 }, { "epoch": 0.995579734784087, "grad_norm": 2.0618636520713265, "learning_rate": 5.126488586676414e-10, "loss": 0.681, "step": 2928 }, { "epoch": 0.9959197551853112, "grad_norm": 2.0577534585653656, "learning_rate": 4.368143329114283e-10, "loss": 0.7572, "step": 2929 }, { "epoch": 0.9962597755865352, "grad_norm": 1.9033317775907852, "learning_rate": 3.6704623058825275e-10, "loss": 0.725, "step": 2930 }, { "epoch": 0.9965997959877593, "grad_norm": 1.7966473182708045, "learning_rate": 3.033446363548409e-10, "loss": 0.7165, "step": 2931 }, { "epoch": 0.9969398163889833, "grad_norm": 1.9439610520049038, "learning_rate": 2.4570962750547487e-10, "loss": 0.806, "step": 2932 }, { "epoch": 0.9972798367902074, "grad_norm": 1.8447978817356736, "learning_rate": 1.9414127397476834e-10, "loss": 0.6776, "step": 2933 }, { "epoch": 0.9976198571914315, "grad_norm": 1.8689492566426245, "learning_rate": 1.486396383343358e-10, "loss": 0.7369, "step": 2934 }, { "epoch": 0.9979598775926556, "grad_norm": 2.3593280079362984, "learning_rate": 1.0920477579612342e-10, "loss": 0.737, "step": 2935 }, { "epoch": 0.9982998979938796, "grad_norm": 1.8130326623567172, "learning_rate": 7.583673420963333e-11, "loss": 0.7728, "step": 2936 }, { "epoch": 0.9986399183951037, "grad_norm": 1.8422384775818308, "learning_rate": 4.8535554063589006e-11, "loss": 0.7864, "step": 2937 }, { "epoch": 0.9989799387963277, "grad_norm": 2.6330920946601504, "learning_rate": 2.7301268484825062e-11, "loss": 0.7965, "step": 2938 }, { "epoch": 0.9993199591975519, "grad_norm": 1.4728667367747035, "learning_rate": 1.2133903238842337e-11, "loss": 0.6569, "step": 2939 }, { "epoch": 0.999659979598776, "grad_norm": 2.449775970829956, "learning_rate": 3.033476729807916e-12, "loss": 0.7428, "step": 2940 }, { "epoch": 1.0, "grad_norm": 1.9956841641574536, "learning_rate": 0.0, "loss": 0.7876, "step": 2941 }, { "epoch": 1.0, "step": 2941, "total_flos": 3322796039995392.0, "train_loss": 0.8056760676374892, "train_runtime": 111614.1149, "train_samples_per_second": 0.843, "train_steps_per_second": 0.026 } ], "logging_steps": 1.0, "max_steps": 2941, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3322796039995392.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }