{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 100, "global_step": 24030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008322929671244279, "grad_norm": 0.0830078125, "learning_rate": 2.08073241781107e-08, "loss": 0.2994, "step": 20 }, { "epoch": 0.016645859342488557, "grad_norm": 0.0849609375, "learning_rate": 4.16146483562214e-08, "loss": 0.2962, "step": 40 }, { "epoch": 0.024968789013732832, "grad_norm": 0.0947265625, "learning_rate": 6.242197253433209e-08, "loss": 0.2974, "step": 60 }, { "epoch": 0.033291718684977115, "grad_norm": 0.08544921875, "learning_rate": 8.32292967124428e-08, "loss": 0.3034, "step": 80 }, { "epoch": 0.04161464835622139, "grad_norm": 0.07861328125, "learning_rate": 1.0403662089055348e-07, "loss": 0.2985, "step": 100 }, { "epoch": 0.04161464835622139, "eval_main_loss": 0.3075891137123108, "eval_main_runtime": 6.3529, "eval_main_samples_per_second": 29.908, "eval_main_steps_per_second": 3.778, "step": 100 }, { "epoch": 0.04161464835622139, "eval_anatomy_loss": 2.9698662757873535, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.496, "eval_anatomy_steps_per_second": 3.748, "step": 100 }, { "epoch": 0.04161464835622139, "eval_college_mathematics_loss": 2.178929567337036, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.511, "eval_college_mathematics_steps_per_second": 3.755, "step": 100 }, { "epoch": 0.04161464835622139, "eval_international_law_loss": 3.19402813911438, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.493, "eval_international_law_steps_per_second": 3.746, "step": 100 }, { "epoch": 0.049937578027465665, "grad_norm": 0.076171875, "learning_rate": 1.2484394506866418e-07, "loss": 0.2971, "step": 120 }, { "epoch": 0.05826050769870995, "grad_norm": 0.08349609375, "learning_rate": 1.4565126924677488e-07, "loss": 0.2983, "step": 140 }, { "epoch": 0.06658343736995423, "grad_norm": 0.083984375, "learning_rate": 1.664585934248856e-07, "loss": 0.2998, "step": 160 }, { "epoch": 0.0749063670411985, "grad_norm": 0.0791015625, "learning_rate": 1.8726591760299626e-07, "loss": 0.2974, "step": 180 }, { "epoch": 0.08322929671244278, "grad_norm": 0.07568359375, "learning_rate": 2.0807324178110696e-07, "loss": 0.2981, "step": 200 }, { "epoch": 0.08322929671244278, "eval_main_loss": 0.307395339012146, "eval_main_runtime": 6.3616, "eval_main_samples_per_second": 29.866, "eval_main_steps_per_second": 3.773, "step": 200 }, { "epoch": 0.08322929671244278, "eval_anatomy_loss": 2.9686527252197266, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.48, "eval_anatomy_steps_per_second": 3.74, "step": 200 }, { "epoch": 0.08322929671244278, "eval_college_mathematics_loss": 2.1776983737945557, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.499, "eval_college_mathematics_steps_per_second": 3.75, "step": 200 }, { "epoch": 0.08322929671244278, "eval_international_law_loss": 3.1930840015411377, "eval_international_law_runtime": 0.268, "eval_international_law_samples_per_second": 7.463, "eval_international_law_steps_per_second": 3.732, "step": 200 }, { "epoch": 0.09155222638368705, "grad_norm": 0.08251953125, "learning_rate": 2.2888056595921765e-07, "loss": 0.3004, "step": 220 }, { "epoch": 0.09987515605493133, "grad_norm": 0.08837890625, "learning_rate": 2.4968789013732837e-07, "loss": 0.3023, "step": 240 }, { "epoch": 0.10819808572617562, "grad_norm": 0.0888671875, "learning_rate": 2.704952143154391e-07, "loss": 0.3018, "step": 260 }, { "epoch": 0.1165210153974199, "grad_norm": 0.09130859375, "learning_rate": 2.9130253849354976e-07, "loss": 0.3018, "step": 280 }, { "epoch": 0.12484394506866417, "grad_norm": 0.08642578125, "learning_rate": 3.121098626716604e-07, "loss": 0.2971, "step": 300 }, { "epoch": 0.12484394506866417, "eval_main_loss": 0.3072234094142914, "eval_main_runtime": 6.363, "eval_main_samples_per_second": 29.86, "eval_main_steps_per_second": 3.772, "step": 300 }, { "epoch": 0.12484394506866417, "eval_anatomy_loss": 2.9682118892669678, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.476, "eval_anatomy_steps_per_second": 3.738, "step": 300 }, { "epoch": 0.12484394506866417, "eval_college_mathematics_loss": 2.1782639026641846, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 300 }, { "epoch": 0.12484394506866417, "eval_international_law_loss": 3.1912684440612793, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.505, "eval_international_law_steps_per_second": 3.752, "step": 300 }, { "epoch": 0.13316687473990846, "grad_norm": 0.087890625, "learning_rate": 3.329171868497712e-07, "loss": 0.3034, "step": 320 }, { "epoch": 0.14148980441115272, "grad_norm": 0.08837890625, "learning_rate": 3.537245110278818e-07, "loss": 0.299, "step": 340 }, { "epoch": 0.149812734082397, "grad_norm": 0.08251953125, "learning_rate": 3.7453183520599253e-07, "loss": 0.2994, "step": 360 }, { "epoch": 0.15813566375364127, "grad_norm": 0.09326171875, "learning_rate": 3.953391593841032e-07, "loss": 0.3001, "step": 380 }, { "epoch": 0.16645859342488556, "grad_norm": 0.1025390625, "learning_rate": 4.161464835622139e-07, "loss": 0.2971, "step": 400 }, { "epoch": 0.16645859342488556, "eval_main_loss": 0.3068091869354248, "eval_main_runtime": 6.3609, "eval_main_samples_per_second": 29.87, "eval_main_steps_per_second": 3.773, "step": 400 }, { "epoch": 0.16645859342488556, "eval_anatomy_loss": 2.969444990158081, "eval_anatomy_runtime": 0.2677, "eval_anatomy_samples_per_second": 7.472, "eval_anatomy_steps_per_second": 3.736, "step": 400 }, { "epoch": 0.16645859342488556, "eval_college_mathematics_loss": 2.1761982440948486, "eval_college_mathematics_runtime": 0.2684, "eval_college_mathematics_samples_per_second": 7.45, "eval_college_mathematics_steps_per_second": 3.725, "step": 400 }, { "epoch": 0.16645859342488556, "eval_international_law_loss": 3.1895108222961426, "eval_international_law_runtime": 0.2681, "eval_international_law_samples_per_second": 7.461, "eval_international_law_steps_per_second": 3.731, "step": 400 }, { "epoch": 0.17478152309612985, "grad_norm": 0.11279296875, "learning_rate": 4.3695380774032463e-07, "loss": 0.2971, "step": 420 }, { "epoch": 0.1831044527673741, "grad_norm": 0.10400390625, "learning_rate": 4.577611319184353e-07, "loss": 0.3002, "step": 440 }, { "epoch": 0.1914273824386184, "grad_norm": 0.10498046875, "learning_rate": 4.785684560965461e-07, "loss": 0.3001, "step": 460 }, { "epoch": 0.19975031210986266, "grad_norm": 0.1171875, "learning_rate": 4.993757802746567e-07, "loss": 0.2933, "step": 480 }, { "epoch": 0.20807324178110695, "grad_norm": 0.11767578125, "learning_rate": 5.201831044527674e-07, "loss": 0.2953, "step": 500 }, { "epoch": 0.20807324178110695, "eval_main_loss": 0.3060111999511719, "eval_main_runtime": 6.3306, "eval_main_samples_per_second": 30.013, "eval_main_steps_per_second": 3.791, "step": 500 }, { "epoch": 0.20807324178110695, "eval_anatomy_loss": 2.966276168823242, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.758, "step": 500 }, { "epoch": 0.20807324178110695, "eval_college_mathematics_loss": 2.173703193664551, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 500 }, { "epoch": 0.20807324178110695, "eval_international_law_loss": 3.1900217533111572, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.496, "eval_international_law_steps_per_second": 3.748, "step": 500 }, { "epoch": 0.21639617145235124, "grad_norm": 0.11962890625, "learning_rate": 5.409904286308782e-07, "loss": 0.3011, "step": 520 }, { "epoch": 0.2247191011235955, "grad_norm": 0.12890625, "learning_rate": 5.617977528089888e-07, "loss": 0.2986, "step": 540 }, { "epoch": 0.2330420307948398, "grad_norm": 0.142578125, "learning_rate": 5.826050769870995e-07, "loss": 0.3017, "step": 560 }, { "epoch": 0.24136496046608405, "grad_norm": 0.1455078125, "learning_rate": 6.034124011652102e-07, "loss": 0.2957, "step": 580 }, { "epoch": 0.24968789013732834, "grad_norm": 0.166015625, "learning_rate": 6.242197253433208e-07, "loss": 0.2969, "step": 600 }, { "epoch": 0.24968789013732834, "eval_main_loss": 0.3044416904449463, "eval_main_runtime": 6.333, "eval_main_samples_per_second": 30.002, "eval_main_steps_per_second": 3.79, "step": 600 }, { "epoch": 0.24968789013732834, "eval_anatomy_loss": 2.9617347717285156, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.753, "step": 600 }, { "epoch": 0.24968789013732834, "eval_college_mathematics_loss": 2.169576644897461, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.534, "eval_college_mathematics_steps_per_second": 3.767, "step": 600 }, { "epoch": 0.24968789013732834, "eval_international_law_loss": 3.184370756149292, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.53, "eval_international_law_steps_per_second": 3.765, "step": 600 }, { "epoch": 0.2580108198085726, "grad_norm": 0.16796875, "learning_rate": 6.450270495214315e-07, "loss": 0.2953, "step": 620 }, { "epoch": 0.2663337494798169, "grad_norm": 0.1650390625, "learning_rate": 6.658343736995424e-07, "loss": 0.2994, "step": 640 }, { "epoch": 0.2746566791510612, "grad_norm": 0.1796875, "learning_rate": 6.86641697877653e-07, "loss": 0.2941, "step": 660 }, { "epoch": 0.28297960882230544, "grad_norm": 0.1611328125, "learning_rate": 7.074490220557636e-07, "loss": 0.2982, "step": 680 }, { "epoch": 0.29130253849354976, "grad_norm": 0.1845703125, "learning_rate": 7.282563462338745e-07, "loss": 0.2956, "step": 700 }, { "epoch": 0.29130253849354976, "eval_main_loss": 0.3035320043563843, "eval_main_runtime": 6.3633, "eval_main_samples_per_second": 29.859, "eval_main_steps_per_second": 3.772, "step": 700 }, { "epoch": 0.29130253849354976, "eval_anatomy_loss": 2.9591825008392334, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.487, "eval_anatomy_steps_per_second": 3.744, "step": 700 }, { "epoch": 0.29130253849354976, "eval_college_mathematics_loss": 2.1641225814819336, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.491, "eval_college_mathematics_steps_per_second": 3.746, "step": 700 }, { "epoch": 0.29130253849354976, "eval_international_law_loss": 3.183608293533325, "eval_international_law_runtime": 0.2677, "eval_international_law_samples_per_second": 7.47, "eval_international_law_steps_per_second": 3.735, "step": 700 }, { "epoch": 0.299625468164794, "grad_norm": 0.1806640625, "learning_rate": 7.490636704119851e-07, "loss": 0.2968, "step": 720 }, { "epoch": 0.3079483978360383, "grad_norm": 0.1748046875, "learning_rate": 7.698709945900957e-07, "loss": 0.2924, "step": 740 }, { "epoch": 0.31627132750728254, "grad_norm": 0.1787109375, "learning_rate": 7.906783187682064e-07, "loss": 0.2948, "step": 760 }, { "epoch": 0.32459425717852686, "grad_norm": 0.173828125, "learning_rate": 8.114856429463172e-07, "loss": 0.2987, "step": 780 }, { "epoch": 0.3329171868497711, "grad_norm": 0.16015625, "learning_rate": 8.322929671244278e-07, "loss": 0.3004, "step": 800 }, { "epoch": 0.3329171868497711, "eval_main_loss": 0.3030804991722107, "eval_main_runtime": 6.3678, "eval_main_samples_per_second": 29.838, "eval_main_steps_per_second": 3.769, "step": 800 }, { "epoch": 0.3329171868497711, "eval_anatomy_loss": 2.958465337753296, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.476, "eval_anatomy_steps_per_second": 3.738, "step": 800 }, { "epoch": 0.3329171868497711, "eval_college_mathematics_loss": 2.1627776622772217, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.474, "eval_college_mathematics_steps_per_second": 3.737, "step": 800 }, { "epoch": 0.3329171868497711, "eval_international_law_loss": 3.1812000274658203, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.507, "eval_international_law_steps_per_second": 3.754, "step": 800 }, { "epoch": 0.3412401165210154, "grad_norm": 0.1650390625, "learning_rate": 8.531002913025385e-07, "loss": 0.2941, "step": 820 }, { "epoch": 0.3495630461922597, "grad_norm": 0.1806640625, "learning_rate": 8.739076154806493e-07, "loss": 0.2961, "step": 840 }, { "epoch": 0.35788597586350396, "grad_norm": 0.189453125, "learning_rate": 8.947149396587599e-07, "loss": 0.2957, "step": 860 }, { "epoch": 0.3662089055347482, "grad_norm": 0.1708984375, "learning_rate": 9.155222638368706e-07, "loss": 0.2941, "step": 880 }, { "epoch": 0.37453183520599254, "grad_norm": 0.203125, "learning_rate": 9.363295880149814e-07, "loss": 0.2956, "step": 900 }, { "epoch": 0.37453183520599254, "eval_main_loss": 0.30232974886894226, "eval_main_runtime": 6.3643, "eval_main_samples_per_second": 29.854, "eval_main_steps_per_second": 3.771, "step": 900 }, { "epoch": 0.37453183520599254, "eval_anatomy_loss": 2.9550912380218506, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.478, "eval_anatomy_steps_per_second": 3.739, "step": 900 }, { "epoch": 0.37453183520599254, "eval_college_mathematics_loss": 2.162787914276123, "eval_college_mathematics_runtime": 0.266, "eval_college_mathematics_samples_per_second": 7.518, "eval_college_mathematics_steps_per_second": 3.759, "step": 900 }, { "epoch": 0.37453183520599254, "eval_international_law_loss": 3.1821956634521484, "eval_international_law_runtime": 0.2683, "eval_international_law_samples_per_second": 7.456, "eval_international_law_steps_per_second": 3.728, "step": 900 }, { "epoch": 0.3828547648772368, "grad_norm": 0.21875, "learning_rate": 9.571369121930921e-07, "loss": 0.2989, "step": 920 }, { "epoch": 0.39117769454848106, "grad_norm": 0.23046875, "learning_rate": 9.779442363712028e-07, "loss": 0.2965, "step": 940 }, { "epoch": 0.3995006242197253, "grad_norm": 0.2001953125, "learning_rate": 9.987515605493135e-07, "loss": 0.2969, "step": 960 }, { "epoch": 0.40782355389096964, "grad_norm": 0.2275390625, "learning_rate": 1.0195588847274241e-06, "loss": 0.2913, "step": 980 }, { "epoch": 0.4161464835622139, "grad_norm": 0.275390625, "learning_rate": 1.0403662089055348e-06, "loss": 0.2945, "step": 1000 }, { "epoch": 0.4161464835622139, "eval_main_loss": 0.3009730279445648, "eval_main_runtime": 6.3379, "eval_main_samples_per_second": 29.978, "eval_main_steps_per_second": 3.787, "step": 1000 }, { "epoch": 0.4161464835622139, "eval_anatomy_loss": 2.9502501487731934, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.512, "eval_anatomy_steps_per_second": 3.756, "step": 1000 }, { "epoch": 0.4161464835622139, "eval_college_mathematics_loss": 2.1539924144744873, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.493, "eval_college_mathematics_steps_per_second": 3.746, "step": 1000 }, { "epoch": 0.4161464835622139, "eval_international_law_loss": 3.1735572814941406, "eval_international_law_runtime": 0.268, "eval_international_law_samples_per_second": 7.464, "eval_international_law_steps_per_second": 3.732, "step": 1000 }, { "epoch": 0.42446941323345816, "grad_norm": 0.2294921875, "learning_rate": 1.0611735330836455e-06, "loss": 0.2908, "step": 1020 }, { "epoch": 0.4327923429047025, "grad_norm": 0.26953125, "learning_rate": 1.0819808572617564e-06, "loss": 0.2923, "step": 1040 }, { "epoch": 0.44111527257594674, "grad_norm": 0.25390625, "learning_rate": 1.102788181439867e-06, "loss": 0.291, "step": 1060 }, { "epoch": 0.449438202247191, "grad_norm": 0.255859375, "learning_rate": 1.1235955056179777e-06, "loss": 0.2914, "step": 1080 }, { "epoch": 0.4577611319184353, "grad_norm": 0.275390625, "learning_rate": 1.1444028297960884e-06, "loss": 0.2921, "step": 1100 }, { "epoch": 0.4577611319184353, "eval_main_loss": 0.29984337091445923, "eval_main_runtime": 6.3525, "eval_main_samples_per_second": 29.909, "eval_main_steps_per_second": 3.778, "step": 1100 }, { "epoch": 0.4577611319184353, "eval_anatomy_loss": 2.947199821472168, "eval_anatomy_runtime": 0.27, "eval_anatomy_samples_per_second": 7.407, "eval_anatomy_steps_per_second": 3.703, "step": 1100 }, { "epoch": 0.4577611319184353, "eval_college_mathematics_loss": 2.1513562202453613, "eval_college_mathematics_runtime": 0.2683, "eval_college_mathematics_samples_per_second": 7.454, "eval_college_mathematics_steps_per_second": 3.727, "step": 1100 }, { "epoch": 0.4577611319184353, "eval_international_law_loss": 3.1728124618530273, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.507, "eval_international_law_steps_per_second": 3.754, "step": 1100 }, { "epoch": 0.4660840615896796, "grad_norm": 0.271484375, "learning_rate": 1.165210153974199e-06, "loss": 0.2917, "step": 1120 }, { "epoch": 0.47440699126092384, "grad_norm": 0.30078125, "learning_rate": 1.1860174781523097e-06, "loss": 0.2874, "step": 1140 }, { "epoch": 0.4827299209321681, "grad_norm": 0.298828125, "learning_rate": 1.2068248023304204e-06, "loss": 0.2926, "step": 1160 }, { "epoch": 0.4910528506034124, "grad_norm": 0.294921875, "learning_rate": 1.2276321265085312e-06, "loss": 0.2913, "step": 1180 }, { "epoch": 0.4993757802746567, "grad_norm": 0.2890625, "learning_rate": 1.2484394506866417e-06, "loss": 0.2926, "step": 1200 }, { "epoch": 0.4993757802746567, "eval_main_loss": 0.29899466037750244, "eval_main_runtime": 6.3585, "eval_main_samples_per_second": 29.881, "eval_main_steps_per_second": 3.774, "step": 1200 }, { "epoch": 0.4993757802746567, "eval_anatomy_loss": 2.9453182220458984, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.477, "eval_anatomy_steps_per_second": 3.738, "step": 1200 }, { "epoch": 0.4993757802746567, "eval_college_mathematics_loss": 2.1483311653137207, "eval_college_mathematics_runtime": 0.2675, "eval_college_mathematics_samples_per_second": 7.477, "eval_college_mathematics_steps_per_second": 3.739, "step": 1200 }, { "epoch": 0.4993757802746567, "eval_international_law_loss": 3.1692144870758057, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.493, "eval_international_law_steps_per_second": 3.746, "step": 1200 }, { "epoch": 0.5076987099459009, "grad_norm": 0.294921875, "learning_rate": 1.2692467748647524e-06, "loss": 0.2921, "step": 1220 }, { "epoch": 0.5160216396171452, "grad_norm": 0.29296875, "learning_rate": 1.290054099042863e-06, "loss": 0.2888, "step": 1240 }, { "epoch": 0.5243445692883895, "grad_norm": 0.2734375, "learning_rate": 1.3108614232209737e-06, "loss": 0.2914, "step": 1260 }, { "epoch": 0.5326674989596338, "grad_norm": 0.306640625, "learning_rate": 1.3316687473990848e-06, "loss": 0.2912, "step": 1280 }, { "epoch": 0.5409904286308781, "grad_norm": 0.2890625, "learning_rate": 1.3524760715771954e-06, "loss": 0.2909, "step": 1300 }, { "epoch": 0.5409904286308781, "eval_main_loss": 0.29816609621047974, "eval_main_runtime": 6.3672, "eval_main_samples_per_second": 29.84, "eval_main_steps_per_second": 3.769, "step": 1300 }, { "epoch": 0.5409904286308781, "eval_anatomy_loss": 2.942121744155884, "eval_anatomy_runtime": 0.2697, "eval_anatomy_samples_per_second": 7.416, "eval_anatomy_steps_per_second": 3.708, "step": 1300 }, { "epoch": 0.5409904286308781, "eval_college_mathematics_loss": 2.1458935737609863, "eval_college_mathematics_runtime": 0.2675, "eval_college_mathematics_samples_per_second": 7.476, "eval_college_mathematics_steps_per_second": 3.738, "step": 1300 }, { "epoch": 0.5409904286308781, "eval_international_law_loss": 3.1678075790405273, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.482, "eval_international_law_steps_per_second": 3.741, "step": 1300 }, { "epoch": 0.5493133583021224, "grad_norm": 0.291015625, "learning_rate": 1.373283395755306e-06, "loss": 0.2928, "step": 1320 }, { "epoch": 0.5576362879733666, "grad_norm": 0.263671875, "learning_rate": 1.3940907199334166e-06, "loss": 0.2915, "step": 1340 }, { "epoch": 0.5659592176446109, "grad_norm": 0.26953125, "learning_rate": 1.4148980441115272e-06, "loss": 0.2893, "step": 1360 }, { "epoch": 0.5742821473158551, "grad_norm": 0.283203125, "learning_rate": 1.435705368289638e-06, "loss": 0.2907, "step": 1380 }, { "epoch": 0.5826050769870995, "grad_norm": 0.28125, "learning_rate": 1.456512692467749e-06, "loss": 0.2938, "step": 1400 }, { "epoch": 0.5826050769870995, "eval_main_loss": 0.2975090444087982, "eval_main_runtime": 6.3608, "eval_main_samples_per_second": 29.87, "eval_main_steps_per_second": 3.773, "step": 1400 }, { "epoch": 0.5826050769870995, "eval_anatomy_loss": 2.940919876098633, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.495, "eval_anatomy_steps_per_second": 3.747, "step": 1400 }, { "epoch": 0.5826050769870995, "eval_college_mathematics_loss": 2.1454479694366455, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.495, "eval_college_mathematics_steps_per_second": 3.748, "step": 1400 }, { "epoch": 0.5826050769870995, "eval_international_law_loss": 3.1660165786743164, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.48, "eval_international_law_steps_per_second": 3.74, "step": 1400 }, { "epoch": 0.5909280066583438, "grad_norm": 0.310546875, "learning_rate": 1.4773200166458597e-06, "loss": 0.2922, "step": 1420 }, { "epoch": 0.599250936329588, "grad_norm": 0.29296875, "learning_rate": 1.4981273408239701e-06, "loss": 0.2957, "step": 1440 }, { "epoch": 0.6075738660008323, "grad_norm": 0.302734375, "learning_rate": 1.5189346650020808e-06, "loss": 0.2891, "step": 1460 }, { "epoch": 0.6158967956720766, "grad_norm": 0.291015625, "learning_rate": 1.5397419891801914e-06, "loss": 0.2913, "step": 1480 }, { "epoch": 0.6242197253433208, "grad_norm": 0.3125, "learning_rate": 1.5605493133583021e-06, "loss": 0.2864, "step": 1500 }, { "epoch": 0.6242197253433208, "eval_main_loss": 0.29658398032188416, "eval_main_runtime": 6.3423, "eval_main_samples_per_second": 29.958, "eval_main_steps_per_second": 3.784, "step": 1500 }, { "epoch": 0.6242197253433208, "eval_anatomy_loss": 2.939924478530884, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.496, "eval_anatomy_steps_per_second": 3.748, "step": 1500 }, { "epoch": 0.6242197253433208, "eval_college_mathematics_loss": 2.138369560241699, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.505, "eval_college_mathematics_steps_per_second": 3.753, "step": 1500 }, { "epoch": 0.6242197253433208, "eval_international_law_loss": 3.16491436958313, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.512, "eval_international_law_steps_per_second": 3.756, "step": 1500 }, { "epoch": 0.6325426550145651, "grad_norm": 0.294921875, "learning_rate": 1.5813566375364128e-06, "loss": 0.2904, "step": 1520 }, { "epoch": 0.6408655846858095, "grad_norm": 0.294921875, "learning_rate": 1.6021639617145237e-06, "loss": 0.291, "step": 1540 }, { "epoch": 0.6491885143570537, "grad_norm": 0.3203125, "learning_rate": 1.6229712858926343e-06, "loss": 0.287, "step": 1560 }, { "epoch": 0.657511444028298, "grad_norm": 0.369140625, "learning_rate": 1.643778610070745e-06, "loss": 0.2892, "step": 1580 }, { "epoch": 0.6658343736995422, "grad_norm": 0.328125, "learning_rate": 1.6645859342488557e-06, "loss": 0.2882, "step": 1600 }, { "epoch": 0.6658343736995422, "eval_main_loss": 0.29542383551597595, "eval_main_runtime": 6.3441, "eval_main_samples_per_second": 29.949, "eval_main_steps_per_second": 3.783, "step": 1600 }, { "epoch": 0.6658343736995422, "eval_anatomy_loss": 2.932673692703247, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 1600 }, { "epoch": 0.6658343736995422, "eval_college_mathematics_loss": 2.137777328491211, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.505, "eval_college_mathematics_steps_per_second": 3.753, "step": 1600 }, { "epoch": 0.6658343736995422, "eval_international_law_loss": 3.161029577255249, "eval_international_law_runtime": 0.2659, "eval_international_law_samples_per_second": 7.523, "eval_international_law_steps_per_second": 3.761, "step": 1600 }, { "epoch": 0.6741573033707865, "grad_norm": 0.302734375, "learning_rate": 1.6853932584269663e-06, "loss": 0.2896, "step": 1620 }, { "epoch": 0.6824802330420308, "grad_norm": 0.349609375, "learning_rate": 1.706200582605077e-06, "loss": 0.29, "step": 1640 }, { "epoch": 0.690803162713275, "grad_norm": 0.3671875, "learning_rate": 1.7270079067831877e-06, "loss": 0.2867, "step": 1660 }, { "epoch": 0.6991260923845194, "grad_norm": 0.3671875, "learning_rate": 1.7478152309612985e-06, "loss": 0.2883, "step": 1680 }, { "epoch": 0.7074490220557637, "grad_norm": 0.341796875, "learning_rate": 1.7686225551394092e-06, "loss": 0.2843, "step": 1700 }, { "epoch": 0.7074490220557637, "eval_main_loss": 0.29412609338760376, "eval_main_runtime": 6.3391, "eval_main_samples_per_second": 29.973, "eval_main_steps_per_second": 3.786, "step": 1700 }, { "epoch": 0.7074490220557637, "eval_anatomy_loss": 2.92899751663208, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.49, "eval_anatomy_steps_per_second": 3.745, "step": 1700 }, { "epoch": 0.7074490220557637, "eval_college_mathematics_loss": 2.132533311843872, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.532, "eval_college_mathematics_steps_per_second": 3.766, "step": 1700 }, { "epoch": 0.7074490220557637, "eval_international_law_loss": 3.1562838554382324, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.516, "eval_international_law_steps_per_second": 3.758, "step": 1700 }, { "epoch": 0.7157719517270079, "grad_norm": 0.373046875, "learning_rate": 1.7894298793175199e-06, "loss": 0.2916, "step": 1720 }, { "epoch": 0.7240948813982522, "grad_norm": 0.373046875, "learning_rate": 1.8102372034956305e-06, "loss": 0.2844, "step": 1740 }, { "epoch": 0.7324178110694964, "grad_norm": 0.408203125, "learning_rate": 1.8310445276737412e-06, "loss": 0.2877, "step": 1760 }, { "epoch": 0.7407407407407407, "grad_norm": 0.3984375, "learning_rate": 1.8518518518518519e-06, "loss": 0.2862, "step": 1780 }, { "epoch": 0.7490636704119851, "grad_norm": 0.421875, "learning_rate": 1.8726591760299627e-06, "loss": 0.2826, "step": 1800 }, { "epoch": 0.7490636704119851, "eval_main_loss": 0.2929327189922333, "eval_main_runtime": 6.3377, "eval_main_samples_per_second": 29.979, "eval_main_steps_per_second": 3.787, "step": 1800 }, { "epoch": 0.7490636704119851, "eval_anatomy_loss": 2.923774480819702, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.527, "eval_anatomy_steps_per_second": 3.763, "step": 1800 }, { "epoch": 0.7490636704119851, "eval_college_mathematics_loss": 2.1297199726104736, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.516, "eval_college_mathematics_steps_per_second": 3.758, "step": 1800 }, { "epoch": 0.7490636704119851, "eval_international_law_loss": 3.153568744659424, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.505, "eval_international_law_steps_per_second": 3.753, "step": 1800 }, { "epoch": 0.7573866000832293, "grad_norm": 0.36328125, "learning_rate": 1.8934665002080734e-06, "loss": 0.2883, "step": 1820 }, { "epoch": 0.7657095297544736, "grad_norm": 0.4140625, "learning_rate": 1.9142738243861843e-06, "loss": 0.2827, "step": 1840 }, { "epoch": 0.7740324594257179, "grad_norm": 0.45703125, "learning_rate": 1.9350811485642947e-06, "loss": 0.2892, "step": 1860 }, { "epoch": 0.7823553890969621, "grad_norm": 0.380859375, "learning_rate": 1.9558884727424056e-06, "loss": 0.2816, "step": 1880 }, { "epoch": 0.7906783187682064, "grad_norm": 0.3671875, "learning_rate": 1.976695796920516e-06, "loss": 0.2859, "step": 1900 }, { "epoch": 0.7906783187682064, "eval_main_loss": 0.2918354272842407, "eval_main_runtime": 6.3351, "eval_main_samples_per_second": 29.992, "eval_main_steps_per_second": 3.788, "step": 1900 }, { "epoch": 0.7906783187682064, "eval_anatomy_loss": 2.922013759613037, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.495, "eval_anatomy_steps_per_second": 3.747, "step": 1900 }, { "epoch": 0.7906783187682064, "eval_college_mathematics_loss": 2.127079963684082, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 1900 }, { "epoch": 0.7906783187682064, "eval_international_law_loss": 3.1497292518615723, "eval_international_law_runtime": 0.2681, "eval_international_law_samples_per_second": 7.459, "eval_international_law_steps_per_second": 3.729, "step": 1900 }, { "epoch": 0.7990012484394506, "grad_norm": 0.44140625, "learning_rate": 1.997503121098627e-06, "loss": 0.2827, "step": 1920 }, { "epoch": 0.807324178110695, "grad_norm": 0.396484375, "learning_rate": 2.018310445276738e-06, "loss": 0.2835, "step": 1940 }, { "epoch": 0.8156471077819393, "grad_norm": 0.4375, "learning_rate": 2.0391177694548483e-06, "loss": 0.2824, "step": 1960 }, { "epoch": 0.8239700374531835, "grad_norm": 0.4453125, "learning_rate": 2.059925093632959e-06, "loss": 0.2834, "step": 1980 }, { "epoch": 0.8322929671244278, "grad_norm": 0.3984375, "learning_rate": 2.0807324178110696e-06, "loss": 0.285, "step": 2000 }, { "epoch": 0.8322929671244278, "eval_main_loss": 0.29074886441230774, "eval_main_runtime": 6.3397, "eval_main_samples_per_second": 29.97, "eval_main_steps_per_second": 3.786, "step": 2000 }, { "epoch": 0.8322929671244278, "eval_anatomy_loss": 2.92093825340271, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.494, "eval_anatomy_steps_per_second": 3.747, "step": 2000 }, { "epoch": 0.8322929671244278, "eval_college_mathematics_loss": 2.1216940879821777, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.511, "eval_college_mathematics_steps_per_second": 3.756, "step": 2000 }, { "epoch": 0.8322929671244278, "eval_international_law_loss": 3.146960973739624, "eval_international_law_runtime": 0.2679, "eval_international_law_samples_per_second": 7.465, "eval_international_law_steps_per_second": 3.733, "step": 2000 }, { "epoch": 0.8406158967956721, "grad_norm": 0.41015625, "learning_rate": 2.1015397419891805e-06, "loss": 0.2793, "step": 2020 }, { "epoch": 0.8489388264669163, "grad_norm": 0.443359375, "learning_rate": 2.122347066167291e-06, "loss": 0.2855, "step": 2040 }, { "epoch": 0.8572617561381606, "grad_norm": 0.45703125, "learning_rate": 2.1431543903454014e-06, "loss": 0.2789, "step": 2060 }, { "epoch": 0.865584685809405, "grad_norm": 0.4765625, "learning_rate": 2.1639617145235127e-06, "loss": 0.2807, "step": 2080 }, { "epoch": 0.8739076154806492, "grad_norm": 0.431640625, "learning_rate": 2.184769038701623e-06, "loss": 0.2851, "step": 2100 }, { "epoch": 0.8739076154806492, "eval_main_loss": 0.2896941602230072, "eval_main_runtime": 6.3421, "eval_main_samples_per_second": 29.958, "eval_main_steps_per_second": 3.784, "step": 2100 }, { "epoch": 0.8739076154806492, "eval_anatomy_loss": 2.916968822479248, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.481, "eval_anatomy_steps_per_second": 3.741, "step": 2100 }, { "epoch": 0.8739076154806492, "eval_college_mathematics_loss": 2.119070053100586, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 2100 }, { "epoch": 0.8739076154806492, "eval_international_law_loss": 3.1451635360717773, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.481, "eval_international_law_steps_per_second": 3.74, "step": 2100 }, { "epoch": 0.8822305451518935, "grad_norm": 0.455078125, "learning_rate": 2.205576362879734e-06, "loss": 0.2894, "step": 2120 }, { "epoch": 0.8905534748231377, "grad_norm": 0.431640625, "learning_rate": 2.2263836870578445e-06, "loss": 0.2872, "step": 2140 }, { "epoch": 0.898876404494382, "grad_norm": 0.455078125, "learning_rate": 2.2471910112359554e-06, "loss": 0.2842, "step": 2160 }, { "epoch": 0.9071993341656263, "grad_norm": 0.42578125, "learning_rate": 2.267998335414066e-06, "loss": 0.282, "step": 2180 }, { "epoch": 0.9155222638368706, "grad_norm": 0.435546875, "learning_rate": 2.2888056595921767e-06, "loss": 0.2787, "step": 2200 }, { "epoch": 0.9155222638368706, "eval_main_loss": 0.28874531388282776, "eval_main_runtime": 6.3297, "eval_main_samples_per_second": 30.017, "eval_main_steps_per_second": 3.792, "step": 2200 }, { "epoch": 0.9155222638368706, "eval_anatomy_loss": 2.9137964248657227, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.522, "eval_anatomy_steps_per_second": 3.761, "step": 2200 }, { "epoch": 0.9155222638368706, "eval_college_mathematics_loss": 2.115910530090332, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 2200 }, { "epoch": 0.9155222638368706, "eval_international_law_loss": 3.14198637008667, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.497, "eval_international_law_steps_per_second": 3.749, "step": 2200 }, { "epoch": 0.9238451935081149, "grad_norm": 0.474609375, "learning_rate": 2.3096129837702876e-06, "loss": 0.2844, "step": 2220 }, { "epoch": 0.9321681231793592, "grad_norm": 0.44921875, "learning_rate": 2.330420307948398e-06, "loss": 0.2822, "step": 2240 }, { "epoch": 0.9404910528506034, "grad_norm": 0.466796875, "learning_rate": 2.351227632126509e-06, "loss": 0.2776, "step": 2260 }, { "epoch": 0.9488139825218477, "grad_norm": 0.388671875, "learning_rate": 2.3720349563046194e-06, "loss": 0.2788, "step": 2280 }, { "epoch": 0.9571369121930919, "grad_norm": 0.486328125, "learning_rate": 2.39284228048273e-06, "loss": 0.2806, "step": 2300 }, { "epoch": 0.9571369121930919, "eval_main_loss": 0.2877212464809418, "eval_main_runtime": 6.3349, "eval_main_samples_per_second": 29.993, "eval_main_steps_per_second": 3.789, "step": 2300 }, { "epoch": 0.9571369121930919, "eval_anatomy_loss": 2.9125289916992188, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.507, "eval_anatomy_steps_per_second": 3.753, "step": 2300 }, { "epoch": 0.9571369121930919, "eval_college_mathematics_loss": 2.1152408123016357, "eval_college_mathematics_runtime": 0.2681, "eval_college_mathematics_samples_per_second": 7.459, "eval_college_mathematics_steps_per_second": 3.729, "step": 2300 }, { "epoch": 0.9571369121930919, "eval_international_law_loss": 3.1422488689422607, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.516, "eval_international_law_steps_per_second": 3.758, "step": 2300 }, { "epoch": 0.9654598418643362, "grad_norm": 0.490234375, "learning_rate": 2.4136496046608407e-06, "loss": 0.2781, "step": 2320 }, { "epoch": 0.9737827715355806, "grad_norm": 0.4296875, "learning_rate": 2.4344569288389516e-06, "loss": 0.2808, "step": 2340 }, { "epoch": 0.9821057012068248, "grad_norm": 0.4609375, "learning_rate": 2.4552642530170625e-06, "loss": 0.2829, "step": 2360 }, { "epoch": 0.9904286308780691, "grad_norm": 0.466796875, "learning_rate": 2.476071577195173e-06, "loss": 0.2789, "step": 2380 }, { "epoch": 0.9987515605493134, "grad_norm": 0.44921875, "learning_rate": 2.4968789013732834e-06, "loss": 0.283, "step": 2400 }, { "epoch": 0.9987515605493134, "eval_main_loss": 0.2861831784248352, "eval_main_runtime": 6.3371, "eval_main_samples_per_second": 29.982, "eval_main_steps_per_second": 3.787, "step": 2400 }, { "epoch": 0.9987515605493134, "eval_anatomy_loss": 2.9071733951568604, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.513, "eval_anatomy_steps_per_second": 3.756, "step": 2400 }, { "epoch": 0.9987515605493134, "eval_college_mathematics_loss": 2.1126503944396973, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.503, "eval_college_mathematics_steps_per_second": 3.752, "step": 2400 }, { "epoch": 0.9987515605493134, "eval_international_law_loss": 3.1375622749328613, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.492, "eval_international_law_steps_per_second": 3.746, "step": 2400 }, { "epoch": 1.0070744902205577, "grad_norm": 0.474609375, "learning_rate": 2.5176862255513947e-06, "loss": 0.2779, "step": 2420 }, { "epoch": 1.0153974198918019, "grad_norm": 0.51171875, "learning_rate": 2.5384935497295047e-06, "loss": 0.282, "step": 2440 }, { "epoch": 1.0237203495630463, "grad_norm": 0.435546875, "learning_rate": 2.559300873907616e-06, "loss": 0.2757, "step": 2460 }, { "epoch": 1.0320432792342904, "grad_norm": 0.431640625, "learning_rate": 2.580108198085726e-06, "loss": 0.2803, "step": 2480 }, { "epoch": 1.0403662089055348, "grad_norm": 0.46484375, "learning_rate": 2.6009155222638374e-06, "loss": 0.2768, "step": 2500 }, { "epoch": 1.0403662089055348, "eval_main_loss": 0.283886194229126, "eval_main_runtime": 6.3349, "eval_main_samples_per_second": 29.992, "eval_main_steps_per_second": 3.789, "step": 2500 }, { "epoch": 1.0403662089055348, "eval_anatomy_loss": 2.9029533863067627, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.519, "eval_anatomy_steps_per_second": 3.76, "step": 2500 }, { "epoch": 1.0403662089055348, "eval_college_mathematics_loss": 2.1050500869750977, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 2500 }, { "epoch": 1.0403662089055348, "eval_international_law_loss": 3.1294405460357666, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.484, "eval_international_law_steps_per_second": 3.742, "step": 2500 }, { "epoch": 1.048689138576779, "grad_norm": 0.462890625, "learning_rate": 2.6217228464419474e-06, "loss": 0.2817, "step": 2520 }, { "epoch": 1.0570120682480233, "grad_norm": 0.5078125, "learning_rate": 2.6425301706200583e-06, "loss": 0.2763, "step": 2540 }, { "epoch": 1.0653349979192677, "grad_norm": 0.46484375, "learning_rate": 2.6633374947981696e-06, "loss": 0.2757, "step": 2560 }, { "epoch": 1.0736579275905118, "grad_norm": 0.51953125, "learning_rate": 2.6841448189762796e-06, "loss": 0.2764, "step": 2580 }, { "epoch": 1.0819808572617562, "grad_norm": 0.48828125, "learning_rate": 2.704952143154391e-06, "loss": 0.2747, "step": 2600 }, { "epoch": 1.0819808572617562, "eval_main_loss": 0.2814878225326538, "eval_main_runtime": 6.3334, "eval_main_samples_per_second": 30.0, "eval_main_steps_per_second": 3.789, "step": 2600 }, { "epoch": 1.0819808572617562, "eval_anatomy_loss": 2.898045778274536, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.503, "eval_anatomy_steps_per_second": 3.752, "step": 2600 }, { "epoch": 1.0819808572617562, "eval_college_mathematics_loss": 2.1027755737304688, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.754, "step": 2600 }, { "epoch": 1.0819808572617562, "eval_international_law_loss": 3.1270911693573, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.503, "eval_international_law_steps_per_second": 3.751, "step": 2600 }, { "epoch": 1.0903037869330003, "grad_norm": 0.474609375, "learning_rate": 2.725759467332501e-06, "loss": 0.2752, "step": 2620 }, { "epoch": 1.0986267166042447, "grad_norm": 0.49609375, "learning_rate": 2.746566791510612e-06, "loss": 0.2709, "step": 2640 }, { "epoch": 1.1069496462754889, "grad_norm": 0.490234375, "learning_rate": 2.7673741156887223e-06, "loss": 0.2741, "step": 2660 }, { "epoch": 1.1152725759467332, "grad_norm": 0.44140625, "learning_rate": 2.788181439866833e-06, "loss": 0.2693, "step": 2680 }, { "epoch": 1.1235955056179776, "grad_norm": 0.474609375, "learning_rate": 2.8089887640449444e-06, "loss": 0.2796, "step": 2700 }, { "epoch": 1.1235955056179776, "eval_main_loss": 0.2786170542240143, "eval_main_runtime": 6.3383, "eval_main_samples_per_second": 29.977, "eval_main_steps_per_second": 3.787, "step": 2700 }, { "epoch": 1.1235955056179776, "eval_anatomy_loss": 2.8903613090515137, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.517, "eval_anatomy_steps_per_second": 3.758, "step": 2700 }, { "epoch": 1.1235955056179776, "eval_college_mathematics_loss": 2.0952742099761963, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.537, "eval_college_mathematics_steps_per_second": 3.769, "step": 2700 }, { "epoch": 1.1235955056179776, "eval_international_law_loss": 3.120177745819092, "eval_international_law_runtime": 0.2677, "eval_international_law_samples_per_second": 7.47, "eval_international_law_steps_per_second": 3.735, "step": 2700 }, { "epoch": 1.1319184352892218, "grad_norm": 0.50390625, "learning_rate": 2.8297960882230545e-06, "loss": 0.2717, "step": 2720 }, { "epoch": 1.1402413649604661, "grad_norm": 0.498046875, "learning_rate": 2.8506034124011653e-06, "loss": 0.2704, "step": 2740 }, { "epoch": 1.1485642946317103, "grad_norm": 0.4765625, "learning_rate": 2.871410736579276e-06, "loss": 0.2688, "step": 2760 }, { "epoch": 1.1568872243029547, "grad_norm": 0.458984375, "learning_rate": 2.8922180607573867e-06, "loss": 0.2654, "step": 2780 }, { "epoch": 1.1652101539741988, "grad_norm": 0.419921875, "learning_rate": 2.913025384935498e-06, "loss": 0.2646, "step": 2800 }, { "epoch": 1.1652101539741988, "eval_main_loss": 0.27353334426879883, "eval_main_runtime": 6.3411, "eval_main_samples_per_second": 29.963, "eval_main_steps_per_second": 3.785, "step": 2800 }, { "epoch": 1.1652101539741988, "eval_anatomy_loss": 2.8842265605926514, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.521, "eval_anatomy_steps_per_second": 3.76, "step": 2800 }, { "epoch": 1.1652101539741988, "eval_college_mathematics_loss": 2.086782693862915, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.531, "eval_college_mathematics_steps_per_second": 3.766, "step": 2800 }, { "epoch": 1.1652101539741988, "eval_international_law_loss": 3.11441707611084, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.501, "eval_international_law_steps_per_second": 3.75, "step": 2800 }, { "epoch": 1.1735330836454432, "grad_norm": 0.43359375, "learning_rate": 2.933832709113608e-06, "loss": 0.2618, "step": 2820 }, { "epoch": 1.1818560133166875, "grad_norm": 0.453125, "learning_rate": 2.9546400332917193e-06, "loss": 0.2636, "step": 2840 }, { "epoch": 1.1901789429879317, "grad_norm": 0.453125, "learning_rate": 2.9754473574698293e-06, "loss": 0.2654, "step": 2860 }, { "epoch": 1.198501872659176, "grad_norm": 0.46484375, "learning_rate": 2.9962546816479402e-06, "loss": 0.2586, "step": 2880 }, { "epoch": 1.2068248023304202, "grad_norm": 0.462890625, "learning_rate": 3.0170620058260507e-06, "loss": 0.2576, "step": 2900 }, { "epoch": 1.2068248023304202, "eval_main_loss": 0.2663625478744507, "eval_main_runtime": 6.3386, "eval_main_samples_per_second": 29.975, "eval_main_steps_per_second": 3.786, "step": 2900 }, { "epoch": 1.2068248023304202, "eval_anatomy_loss": 2.876462936401367, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.503, "eval_anatomy_steps_per_second": 3.752, "step": 2900 }, { "epoch": 1.2068248023304202, "eval_college_mathematics_loss": 2.0814149379730225, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.473, "eval_college_mathematics_steps_per_second": 3.736, "step": 2900 }, { "epoch": 1.2068248023304202, "eval_international_law_loss": 3.108351707458496, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.527, "eval_international_law_steps_per_second": 3.764, "step": 2900 }, { "epoch": 1.2151477320016646, "grad_norm": 0.41015625, "learning_rate": 3.0378693300041616e-06, "loss": 0.2599, "step": 2920 }, { "epoch": 1.2234706616729087, "grad_norm": 0.46875, "learning_rate": 3.058676654182273e-06, "loss": 0.2602, "step": 2940 }, { "epoch": 1.2317935913441531, "grad_norm": 0.4609375, "learning_rate": 3.079483978360383e-06, "loss": 0.2564, "step": 2960 }, { "epoch": 1.2401165210153975, "grad_norm": 0.42578125, "learning_rate": 3.1002913025384938e-06, "loss": 0.2551, "step": 2980 }, { "epoch": 1.2484394506866416, "grad_norm": 0.416015625, "learning_rate": 3.1210986267166042e-06, "loss": 0.2551, "step": 3000 }, { "epoch": 1.2484394506866416, "eval_main_loss": 0.2596937417984009, "eval_main_runtime": 6.3395, "eval_main_samples_per_second": 29.971, "eval_main_steps_per_second": 3.786, "step": 3000 }, { "epoch": 1.2484394506866416, "eval_anatomy_loss": 2.8675546646118164, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.501, "eval_anatomy_steps_per_second": 3.751, "step": 3000 }, { "epoch": 1.2484394506866416, "eval_college_mathematics_loss": 2.0764455795288086, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.515, "eval_college_mathematics_steps_per_second": 3.758, "step": 3000 }, { "epoch": 1.2484394506866416, "eval_international_law_loss": 3.1014602184295654, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.505, "eval_international_law_steps_per_second": 3.752, "step": 3000 }, { "epoch": 1.256762380357886, "grad_norm": 0.423828125, "learning_rate": 3.141905950894715e-06, "loss": 0.2528, "step": 3020 }, { "epoch": 1.2650853100291304, "grad_norm": 0.408203125, "learning_rate": 3.1627132750728256e-06, "loss": 0.2555, "step": 3040 }, { "epoch": 1.2734082397003745, "grad_norm": 0.404296875, "learning_rate": 3.1835205992509364e-06, "loss": 0.2527, "step": 3060 }, { "epoch": 1.2817311693716187, "grad_norm": 0.41796875, "learning_rate": 3.2043279234290473e-06, "loss": 0.2523, "step": 3080 }, { "epoch": 1.290054099042863, "grad_norm": 0.4453125, "learning_rate": 3.2251352476071578e-06, "loss": 0.2501, "step": 3100 }, { "epoch": 1.290054099042863, "eval_main_loss": 0.2549309730529785, "eval_main_runtime": 6.3572, "eval_main_samples_per_second": 29.887, "eval_main_steps_per_second": 3.775, "step": 3100 }, { "epoch": 1.290054099042863, "eval_anatomy_loss": 2.862455368041992, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.507, "eval_anatomy_steps_per_second": 3.753, "step": 3100 }, { "epoch": 1.290054099042863, "eval_college_mathematics_loss": 2.0720179080963135, "eval_college_mathematics_runtime": 0.2673, "eval_college_mathematics_samples_per_second": 7.482, "eval_college_mathematics_steps_per_second": 3.741, "step": 3100 }, { "epoch": 1.290054099042863, "eval_international_law_loss": 3.095749855041504, "eval_international_law_runtime": 0.2682, "eval_international_law_samples_per_second": 7.457, "eval_international_law_steps_per_second": 3.728, "step": 3100 }, { "epoch": 1.2983770287141074, "grad_norm": 0.423828125, "learning_rate": 3.2459425717852687e-06, "loss": 0.2479, "step": 3120 }, { "epoch": 1.3066999583853516, "grad_norm": 0.376953125, "learning_rate": 3.266749895963379e-06, "loss": 0.2454, "step": 3140 }, { "epoch": 1.315022888056596, "grad_norm": 0.41796875, "learning_rate": 3.28755722014149e-06, "loss": 0.2428, "step": 3160 }, { "epoch": 1.3233458177278403, "grad_norm": 0.404296875, "learning_rate": 3.3083645443196004e-06, "loss": 0.243, "step": 3180 }, { "epoch": 1.3316687473990845, "grad_norm": 0.38671875, "learning_rate": 3.3291718684977113e-06, "loss": 0.2489, "step": 3200 }, { "epoch": 1.3316687473990845, "eval_main_loss": 0.2516440749168396, "eval_main_runtime": 6.3553, "eval_main_samples_per_second": 29.896, "eval_main_steps_per_second": 3.776, "step": 3200 }, { "epoch": 1.3316687473990845, "eval_anatomy_loss": 2.8593392372131348, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.481, "eval_anatomy_steps_per_second": 3.741, "step": 3200 }, { "epoch": 1.3316687473990845, "eval_college_mathematics_loss": 2.0700876712799072, "eval_college_mathematics_runtime": 0.2677, "eval_college_mathematics_samples_per_second": 7.471, "eval_college_mathematics_steps_per_second": 3.735, "step": 3200 }, { "epoch": 1.3316687473990845, "eval_international_law_loss": 3.094395875930786, "eval_international_law_runtime": 0.2675, "eval_international_law_samples_per_second": 7.478, "eval_international_law_steps_per_second": 3.739, "step": 3200 }, { "epoch": 1.3399916770703286, "grad_norm": 0.34765625, "learning_rate": 3.349979192675822e-06, "loss": 0.2518, "step": 3220 }, { "epoch": 1.348314606741573, "grad_norm": 0.412109375, "learning_rate": 3.3707865168539327e-06, "loss": 0.243, "step": 3240 }, { "epoch": 1.3566375364128174, "grad_norm": 0.431640625, "learning_rate": 3.3915938410320435e-06, "loss": 0.246, "step": 3260 }, { "epoch": 1.3649604660840615, "grad_norm": 0.322265625, "learning_rate": 3.412401165210154e-06, "loss": 0.249, "step": 3280 }, { "epoch": 1.373283395755306, "grad_norm": 0.37890625, "learning_rate": 3.433208489388265e-06, "loss": 0.2439, "step": 3300 }, { "epoch": 1.373283395755306, "eval_main_loss": 0.24985496699810028, "eval_main_runtime": 6.3475, "eval_main_samples_per_second": 29.933, "eval_main_steps_per_second": 3.781, "step": 3300 }, { "epoch": 1.373283395755306, "eval_anatomy_loss": 2.8591318130493164, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.514, "eval_anatomy_steps_per_second": 3.757, "step": 3300 }, { "epoch": 1.373283395755306, "eval_college_mathematics_loss": 2.067070722579956, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.475, "eval_college_mathematics_steps_per_second": 3.738, "step": 3300 }, { "epoch": 1.373283395755306, "eval_international_law_loss": 3.0916695594787598, "eval_international_law_runtime": 0.2676, "eval_international_law_samples_per_second": 7.475, "eval_international_law_steps_per_second": 3.738, "step": 3300 }, { "epoch": 1.3816063254265503, "grad_norm": 0.369140625, "learning_rate": 3.4540158135663753e-06, "loss": 0.2431, "step": 3320 }, { "epoch": 1.3899292550977944, "grad_norm": 0.416015625, "learning_rate": 3.474823137744486e-06, "loss": 0.2437, "step": 3340 }, { "epoch": 1.3982521847690386, "grad_norm": 0.353515625, "learning_rate": 3.495630461922597e-06, "loss": 0.2434, "step": 3360 }, { "epoch": 1.406575114440283, "grad_norm": 0.376953125, "learning_rate": 3.5164377861007075e-06, "loss": 0.243, "step": 3380 }, { "epoch": 1.4148980441115273, "grad_norm": 0.40625, "learning_rate": 3.5372451102788184e-06, "loss": 0.2447, "step": 3400 }, { "epoch": 1.4148980441115273, "eval_main_loss": 0.24850161373615265, "eval_main_runtime": 6.3478, "eval_main_samples_per_second": 29.931, "eval_main_steps_per_second": 3.781, "step": 3400 }, { "epoch": 1.4148980441115273, "eval_anatomy_loss": 2.85494327545166, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.753, "step": 3400 }, { "epoch": 1.4148980441115273, "eval_college_mathematics_loss": 2.071016550064087, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 3400 }, { "epoch": 1.4148980441115273, "eval_international_law_loss": 3.0913243293762207, "eval_international_law_runtime": 0.2682, "eval_international_law_samples_per_second": 7.457, "eval_international_law_steps_per_second": 3.729, "step": 3400 }, { "epoch": 1.4232209737827715, "grad_norm": 0.3828125, "learning_rate": 3.558052434456929e-06, "loss": 0.2432, "step": 3420 }, { "epoch": 1.4315439034540158, "grad_norm": 0.37890625, "learning_rate": 3.5788597586350397e-06, "loss": 0.2367, "step": 3440 }, { "epoch": 1.4398668331252602, "grad_norm": 0.357421875, "learning_rate": 3.59966708281315e-06, "loss": 0.242, "step": 3460 }, { "epoch": 1.4481897627965044, "grad_norm": 0.369140625, "learning_rate": 3.620474406991261e-06, "loss": 0.2382, "step": 3480 }, { "epoch": 1.4565126924677487, "grad_norm": 0.34765625, "learning_rate": 3.641281731169372e-06, "loss": 0.2412, "step": 3500 }, { "epoch": 1.4565126924677487, "eval_main_loss": 0.24757333099842072, "eval_main_runtime": 6.3261, "eval_main_samples_per_second": 30.034, "eval_main_steps_per_second": 3.794, "step": 3500 }, { "epoch": 1.4565126924677487, "eval_anatomy_loss": 2.8548152446746826, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.525, "eval_anatomy_steps_per_second": 3.763, "step": 3500 }, { "epoch": 1.4565126924677487, "eval_college_mathematics_loss": 2.0659983158111572, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.531, "eval_college_mathematics_steps_per_second": 3.766, "step": 3500 }, { "epoch": 1.4565126924677487, "eval_international_law_loss": 3.090118646621704, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.489, "eval_international_law_steps_per_second": 3.745, "step": 3500 }, { "epoch": 1.4648356221389929, "grad_norm": 0.375, "learning_rate": 3.6620890553474824e-06, "loss": 0.2393, "step": 3520 }, { "epoch": 1.4731585518102372, "grad_norm": 0.365234375, "learning_rate": 3.6828963795255933e-06, "loss": 0.2408, "step": 3540 }, { "epoch": 1.4814814814814814, "grad_norm": 0.322265625, "learning_rate": 3.7037037037037037e-06, "loss": 0.2421, "step": 3560 }, { "epoch": 1.4898044111527258, "grad_norm": 0.333984375, "learning_rate": 3.7245110278818146e-06, "loss": 0.2396, "step": 3580 }, { "epoch": 1.4981273408239701, "grad_norm": 0.390625, "learning_rate": 3.7453183520599255e-06, "loss": 0.2409, "step": 3600 }, { "epoch": 1.4981273408239701, "eval_main_loss": 0.24677349627017975, "eval_main_runtime": 6.3243, "eval_main_samples_per_second": 30.043, "eval_main_steps_per_second": 3.795, "step": 3600 }, { "epoch": 1.4981273408239701, "eval_anatomy_loss": 2.8548572063446045, "eval_anatomy_runtime": 0.2652, "eval_anatomy_samples_per_second": 7.542, "eval_anatomy_steps_per_second": 3.771, "step": 3600 }, { "epoch": 1.4981273408239701, "eval_college_mathematics_loss": 2.067040205001831, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.529, "eval_college_mathematics_steps_per_second": 3.764, "step": 3600 }, { "epoch": 1.4981273408239701, "eval_international_law_loss": 3.08817720413208, "eval_international_law_runtime": 0.2659, "eval_international_law_samples_per_second": 7.523, "eval_international_law_steps_per_second": 3.761, "step": 3600 }, { "epoch": 1.5064502704952143, "grad_norm": 0.3828125, "learning_rate": 3.766125676238036e-06, "loss": 0.24, "step": 3620 }, { "epoch": 1.5147732001664584, "grad_norm": 0.37109375, "learning_rate": 3.786933000416147e-06, "loss": 0.2414, "step": 3640 }, { "epoch": 1.5230961298377028, "grad_norm": 0.37109375, "learning_rate": 3.8077403245942573e-06, "loss": 0.2401, "step": 3660 }, { "epoch": 1.5314190595089472, "grad_norm": 0.37890625, "learning_rate": 3.828547648772369e-06, "loss": 0.2373, "step": 3680 }, { "epoch": 1.5397419891801913, "grad_norm": 0.353515625, "learning_rate": 3.849354972950479e-06, "loss": 0.2404, "step": 3700 }, { "epoch": 1.5397419891801913, "eval_main_loss": 0.24623610079288483, "eval_main_runtime": 6.3272, "eval_main_samples_per_second": 30.029, "eval_main_steps_per_second": 3.793, "step": 3700 }, { "epoch": 1.5397419891801913, "eval_anatomy_loss": 2.8530216217041016, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.518, "eval_anatomy_steps_per_second": 3.759, "step": 3700 }, { "epoch": 1.5397419891801913, "eval_college_mathematics_loss": 2.065014123916626, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.528, "eval_college_mathematics_steps_per_second": 3.764, "step": 3700 }, { "epoch": 1.5397419891801913, "eval_international_law_loss": 3.0873329639434814, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.51, "eval_international_law_steps_per_second": 3.755, "step": 3700 }, { "epoch": 1.5480649188514357, "grad_norm": 0.34765625, "learning_rate": 3.8701622971285895e-06, "loss": 0.2414, "step": 3720 }, { "epoch": 1.55638784852268, "grad_norm": 0.384765625, "learning_rate": 3.890969621306701e-06, "loss": 0.2402, "step": 3740 }, { "epoch": 1.5647107781939242, "grad_norm": 0.400390625, "learning_rate": 3.911776945484811e-06, "loss": 0.2394, "step": 3760 }, { "epoch": 1.5730337078651684, "grad_norm": 0.328125, "learning_rate": 3.932584269662922e-06, "loss": 0.239, "step": 3780 }, { "epoch": 1.581356637536413, "grad_norm": 0.384765625, "learning_rate": 3.953391593841032e-06, "loss": 0.2383, "step": 3800 }, { "epoch": 1.581356637536413, "eval_main_loss": 0.24581117928028107, "eval_main_runtime": 6.3278, "eval_main_samples_per_second": 30.026, "eval_main_steps_per_second": 3.793, "step": 3800 }, { "epoch": 1.581356637536413, "eval_anatomy_loss": 2.8533191680908203, "eval_anatomy_runtime": 0.2681, "eval_anatomy_samples_per_second": 7.459, "eval_anatomy_steps_per_second": 3.729, "step": 3800 }, { "epoch": 1.581356637536413, "eval_college_mathematics_loss": 2.0673060417175293, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.526, "eval_college_mathematics_steps_per_second": 3.763, "step": 3800 }, { "epoch": 1.581356637536413, "eval_international_law_loss": 3.0861918926239014, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.482, "eval_international_law_steps_per_second": 3.741, "step": 3800 }, { "epoch": 1.5896795672076571, "grad_norm": 0.34375, "learning_rate": 3.9741989180191435e-06, "loss": 0.2399, "step": 3820 }, { "epoch": 1.5980024968789013, "grad_norm": 0.33984375, "learning_rate": 3.995006242197254e-06, "loss": 0.2298, "step": 3840 }, { "epoch": 1.6063254265501457, "grad_norm": 0.37109375, "learning_rate": 4.015813566375364e-06, "loss": 0.2409, "step": 3860 }, { "epoch": 1.61464835622139, "grad_norm": 0.36328125, "learning_rate": 4.036620890553476e-06, "loss": 0.2397, "step": 3880 }, { "epoch": 1.6229712858926342, "grad_norm": 0.35546875, "learning_rate": 4.057428214731586e-06, "loss": 0.2373, "step": 3900 }, { "epoch": 1.6229712858926342, "eval_main_loss": 0.2453879863023758, "eval_main_runtime": 6.3211, "eval_main_samples_per_second": 30.058, "eval_main_steps_per_second": 3.797, "step": 3900 }, { "epoch": 1.6229712858926342, "eval_anatomy_loss": 2.8509092330932617, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.525, "eval_anatomy_steps_per_second": 3.762, "step": 3900 }, { "epoch": 1.6229712858926342, "eval_college_mathematics_loss": 2.0684049129486084, "eval_college_mathematics_runtime": 0.2654, "eval_college_mathematics_samples_per_second": 7.535, "eval_college_mathematics_steps_per_second": 3.767, "step": 3900 }, { "epoch": 1.6229712858926342, "eval_international_law_loss": 3.0845108032226562, "eval_international_law_runtime": 0.2652, "eval_international_law_samples_per_second": 7.542, "eval_international_law_steps_per_second": 3.771, "step": 3900 }, { "epoch": 1.6312942155638783, "grad_norm": 0.3203125, "learning_rate": 4.078235538909697e-06, "loss": 0.2356, "step": 3920 }, { "epoch": 1.639617145235123, "grad_norm": 0.36328125, "learning_rate": 4.099042863087807e-06, "loss": 0.238, "step": 3940 }, { "epoch": 1.647940074906367, "grad_norm": 0.3359375, "learning_rate": 4.119850187265918e-06, "loss": 0.2421, "step": 3960 }, { "epoch": 1.6562630045776112, "grad_norm": 0.380859375, "learning_rate": 4.140657511444029e-06, "loss": 0.2401, "step": 3980 }, { "epoch": 1.6645859342488556, "grad_norm": 0.341796875, "learning_rate": 4.161464835622139e-06, "loss": 0.2372, "step": 4000 }, { "epoch": 1.6645859342488556, "eval_main_loss": 0.2449522763490677, "eval_main_runtime": 6.3294, "eval_main_samples_per_second": 30.019, "eval_main_steps_per_second": 3.792, "step": 4000 }, { "epoch": 1.6645859342488556, "eval_anatomy_loss": 2.8499295711517334, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.487, "eval_anatomy_steps_per_second": 3.744, "step": 4000 }, { "epoch": 1.6645859342488556, "eval_college_mathematics_loss": 2.0638344287872314, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.514, "eval_college_mathematics_steps_per_second": 3.757, "step": 4000 }, { "epoch": 1.6645859342488556, "eval_international_law_loss": 3.08413028717041, "eval_international_law_runtime": 0.2655, "eval_international_law_samples_per_second": 7.534, "eval_international_law_steps_per_second": 3.767, "step": 4000 }, { "epoch": 1.6729088639201, "grad_norm": 0.361328125, "learning_rate": 4.1822721598002506e-06, "loss": 0.2415, "step": 4020 }, { "epoch": 1.6812317935913441, "grad_norm": 0.349609375, "learning_rate": 4.203079483978361e-06, "loss": 0.244, "step": 4040 }, { "epoch": 1.6895547232625883, "grad_norm": 0.33984375, "learning_rate": 4.2238868081564715e-06, "loss": 0.2371, "step": 4060 }, { "epoch": 1.6978776529338329, "grad_norm": 0.353515625, "learning_rate": 4.244694132334582e-06, "loss": 0.2371, "step": 4080 }, { "epoch": 1.706200582605077, "grad_norm": 0.318359375, "learning_rate": 4.265501456512693e-06, "loss": 0.2389, "step": 4100 }, { "epoch": 1.706200582605077, "eval_main_loss": 0.24446672201156616, "eval_main_runtime": 6.3267, "eval_main_samples_per_second": 30.031, "eval_main_steps_per_second": 3.793, "step": 4100 }, { "epoch": 1.706200582605077, "eval_anatomy_loss": 2.8472187519073486, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 4100 }, { "epoch": 1.706200582605077, "eval_college_mathematics_loss": 2.0650761127471924, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.513, "eval_college_mathematics_steps_per_second": 3.757, "step": 4100 }, { "epoch": 1.706200582605077, "eval_international_law_loss": 3.085548162460327, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.529, "eval_international_law_steps_per_second": 3.765, "step": 4100 }, { "epoch": 1.7145235122763212, "grad_norm": 0.265625, "learning_rate": 4.286308780690803e-06, "loss": 0.2373, "step": 4120 }, { "epoch": 1.7228464419475655, "grad_norm": 0.330078125, "learning_rate": 4.307116104868914e-06, "loss": 0.2394, "step": 4140 }, { "epoch": 1.73116937161881, "grad_norm": 0.3515625, "learning_rate": 4.3279234290470254e-06, "loss": 0.2376, "step": 4160 }, { "epoch": 1.739492301290054, "grad_norm": 0.330078125, "learning_rate": 4.348730753225136e-06, "loss": 0.2348, "step": 4180 }, { "epoch": 1.7478152309612984, "grad_norm": 0.392578125, "learning_rate": 4.369538077403246e-06, "loss": 0.2377, "step": 4200 }, { "epoch": 1.7478152309612984, "eval_main_loss": 0.2439066469669342, "eval_main_runtime": 6.3271, "eval_main_samples_per_second": 30.03, "eval_main_steps_per_second": 3.793, "step": 4200 }, { "epoch": 1.7478152309612984, "eval_anatomy_loss": 2.8513143062591553, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.517, "eval_anatomy_steps_per_second": 3.758, "step": 4200 }, { "epoch": 1.7478152309612984, "eval_college_mathematics_loss": 2.062347412109375, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.539, "eval_college_mathematics_steps_per_second": 3.769, "step": 4200 }, { "epoch": 1.7478152309612984, "eval_international_law_loss": 3.082612991333008, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.504, "eval_international_law_steps_per_second": 3.752, "step": 4200 }, { "epoch": 1.7561381606325428, "grad_norm": 0.322265625, "learning_rate": 4.390345401581357e-06, "loss": 0.2359, "step": 4220 }, { "epoch": 1.764461090303787, "grad_norm": 0.392578125, "learning_rate": 4.411152725759468e-06, "loss": 0.2387, "step": 4240 }, { "epoch": 1.772784019975031, "grad_norm": 0.330078125, "learning_rate": 4.431960049937578e-06, "loss": 0.2356, "step": 4260 }, { "epoch": 1.7811069496462755, "grad_norm": 0.326171875, "learning_rate": 4.452767374115689e-06, "loss": 0.236, "step": 4280 }, { "epoch": 1.7894298793175198, "grad_norm": 0.35546875, "learning_rate": 4.4735746982938e-06, "loss": 0.2359, "step": 4300 }, { "epoch": 1.7894298793175198, "eval_main_loss": 0.2437724471092224, "eval_main_runtime": 6.33, "eval_main_samples_per_second": 30.016, "eval_main_steps_per_second": 3.791, "step": 4300 }, { "epoch": 1.7894298793175198, "eval_anatomy_loss": 2.847775936126709, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.752, "step": 4300 }, { "epoch": 1.7894298793175198, "eval_college_mathematics_loss": 2.0655105113983154, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.534, "eval_college_mathematics_steps_per_second": 3.767, "step": 4300 }, { "epoch": 1.7894298793175198, "eval_international_law_loss": 3.084287166595459, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.757, "step": 4300 }, { "epoch": 1.797752808988764, "grad_norm": 0.322265625, "learning_rate": 4.494382022471911e-06, "loss": 0.2378, "step": 4320 }, { "epoch": 1.8060757386600084, "grad_norm": 0.322265625, "learning_rate": 4.515189346650021e-06, "loss": 0.2356, "step": 4340 }, { "epoch": 1.8143986683312527, "grad_norm": 0.369140625, "learning_rate": 4.535996670828132e-06, "loss": 0.2358, "step": 4360 }, { "epoch": 1.822721598002497, "grad_norm": 0.34375, "learning_rate": 4.556803995006243e-06, "loss": 0.2379, "step": 4380 }, { "epoch": 1.831044527673741, "grad_norm": 0.33984375, "learning_rate": 4.5776113191843534e-06, "loss": 0.2395, "step": 4400 }, { "epoch": 1.831044527673741, "eval_main_loss": 0.24319638311862946, "eval_main_runtime": 6.3267, "eval_main_samples_per_second": 30.032, "eval_main_steps_per_second": 3.793, "step": 4400 }, { "epoch": 1.831044527673741, "eval_anatomy_loss": 2.846129894256592, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.501, "eval_anatomy_steps_per_second": 3.751, "step": 4400 }, { "epoch": 1.831044527673741, "eval_college_mathematics_loss": 2.0631587505340576, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.545, "eval_college_mathematics_steps_per_second": 3.773, "step": 4400 }, { "epoch": 1.831044527673741, "eval_international_law_loss": 3.0830471515655518, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.498, "eval_international_law_steps_per_second": 3.749, "step": 4400 }, { "epoch": 1.8393674573449854, "grad_norm": 0.29296875, "learning_rate": 4.598418643362464e-06, "loss": 0.2367, "step": 4420 }, { "epoch": 1.8476903870162298, "grad_norm": 0.31640625, "learning_rate": 4.619225967540575e-06, "loss": 0.2356, "step": 4440 }, { "epoch": 1.856013316687474, "grad_norm": 0.3359375, "learning_rate": 4.640033291718685e-06, "loss": 0.2386, "step": 4460 }, { "epoch": 1.8643362463587183, "grad_norm": 0.337890625, "learning_rate": 4.660840615896796e-06, "loss": 0.236, "step": 4480 }, { "epoch": 1.8726591760299627, "grad_norm": 0.32421875, "learning_rate": 4.6816479400749066e-06, "loss": 0.24, "step": 4500 }, { "epoch": 1.8726591760299627, "eval_main_loss": 0.2428365796804428, "eval_main_runtime": 6.3298, "eval_main_samples_per_second": 30.017, "eval_main_steps_per_second": 3.792, "step": 4500 }, { "epoch": 1.8726591760299627, "eval_anatomy_loss": 2.846830368041992, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.526, "eval_anatomy_steps_per_second": 3.763, "step": 4500 }, { "epoch": 1.8726591760299627, "eval_college_mathematics_loss": 2.061974287033081, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.755, "step": 4500 }, { "epoch": 1.8726591760299627, "eval_international_law_loss": 3.0783193111419678, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.492, "eval_international_law_steps_per_second": 3.746, "step": 4500 }, { "epoch": 1.8809821057012068, "grad_norm": 0.353515625, "learning_rate": 4.702455264253018e-06, "loss": 0.2359, "step": 4520 }, { "epoch": 1.889305035372451, "grad_norm": 0.310546875, "learning_rate": 4.723262588431128e-06, "loss": 0.2419, "step": 4540 }, { "epoch": 1.8976279650436954, "grad_norm": 0.294921875, "learning_rate": 4.744069912609239e-06, "loss": 0.2353, "step": 4560 }, { "epoch": 1.9059508947149397, "grad_norm": 0.361328125, "learning_rate": 4.76487723678735e-06, "loss": 0.2364, "step": 4580 }, { "epoch": 1.9142738243861839, "grad_norm": 0.330078125, "learning_rate": 4.78568456096546e-06, "loss": 0.2397, "step": 4600 }, { "epoch": 1.9142738243861839, "eval_main_loss": 0.24256636202335358, "eval_main_runtime": 6.328, "eval_main_samples_per_second": 30.025, "eval_main_steps_per_second": 3.793, "step": 4600 }, { "epoch": 1.9142738243861839, "eval_anatomy_loss": 2.8468308448791504, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.498, "eval_anatomy_steps_per_second": 3.749, "step": 4600 }, { "epoch": 1.9142738243861839, "eval_college_mathematics_loss": 2.06019926071167, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.534, "eval_college_mathematics_steps_per_second": 3.767, "step": 4600 }, { "epoch": 1.9142738243861839, "eval_international_law_loss": 3.079383373260498, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.518, "eval_international_law_steps_per_second": 3.759, "step": 4600 }, { "epoch": 1.9225967540574282, "grad_norm": 0.328125, "learning_rate": 4.806491885143571e-06, "loss": 0.2299, "step": 4620 }, { "epoch": 1.9309196837286726, "grad_norm": 0.314453125, "learning_rate": 4.8272992093216814e-06, "loss": 0.2363, "step": 4640 }, { "epoch": 1.9392426133999168, "grad_norm": 0.298828125, "learning_rate": 4.848106533499793e-06, "loss": 0.2336, "step": 4660 }, { "epoch": 1.947565543071161, "grad_norm": 0.326171875, "learning_rate": 4.868913857677903e-06, "loss": 0.2344, "step": 4680 }, { "epoch": 1.9558884727424053, "grad_norm": 0.296875, "learning_rate": 4.889721181856014e-06, "loss": 0.2373, "step": 4700 }, { "epoch": 1.9558884727424053, "eval_main_loss": 0.2421763837337494, "eval_main_runtime": 6.3346, "eval_main_samples_per_second": 29.994, "eval_main_steps_per_second": 3.789, "step": 4700 }, { "epoch": 1.9558884727424053, "eval_anatomy_loss": 2.8438565731048584, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.742, "step": 4700 }, { "epoch": 1.9558884727424053, "eval_college_mathematics_loss": 2.0591747760772705, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.501, "eval_college_mathematics_steps_per_second": 3.75, "step": 4700 }, { "epoch": 1.9558884727424053, "eval_international_law_loss": 3.0781655311584473, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.512, "eval_international_law_steps_per_second": 3.756, "step": 4700 }, { "epoch": 1.9642114024136497, "grad_norm": 0.333984375, "learning_rate": 4.910528506034125e-06, "loss": 0.2338, "step": 4720 }, { "epoch": 1.9725343320848938, "grad_norm": 0.3359375, "learning_rate": 4.9313358302122346e-06, "loss": 0.2396, "step": 4740 }, { "epoch": 1.9808572617561382, "grad_norm": 0.35546875, "learning_rate": 4.952143154390346e-06, "loss": 0.2303, "step": 4760 }, { "epoch": 1.9891801914273826, "grad_norm": 0.310546875, "learning_rate": 4.972950478568456e-06, "loss": 0.2332, "step": 4780 }, { "epoch": 1.9975031210986267, "grad_norm": 0.283203125, "learning_rate": 4.993757802746567e-06, "loss": 0.2409, "step": 4800 }, { "epoch": 1.9975031210986267, "eval_main_loss": 0.24170413613319397, "eval_main_runtime": 6.3319, "eval_main_samples_per_second": 30.007, "eval_main_steps_per_second": 3.79, "step": 4800 }, { "epoch": 1.9975031210986267, "eval_anatomy_loss": 2.843282699584961, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.491, "eval_anatomy_steps_per_second": 3.745, "step": 4800 }, { "epoch": 1.9975031210986267, "eval_college_mathematics_loss": 2.059091329574585, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.762, "step": 4800 }, { "epoch": 1.9975031210986267, "eval_international_law_loss": 3.0776219367980957, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.756, "step": 4800 }, { "epoch": 2.005826050769871, "grad_norm": 0.296875, "learning_rate": 4.999993456981855e-06, "loss": 0.2335, "step": 4820 }, { "epoch": 2.0141489804411155, "grad_norm": 0.3203125, "learning_rate": 4.999961409628488e-06, "loss": 0.2309, "step": 4840 }, { "epoch": 2.0224719101123596, "grad_norm": 0.3515625, "learning_rate": 4.999902656502973e-06, "loss": 0.2327, "step": 4860 }, { "epoch": 2.0307948397836038, "grad_norm": 0.296875, "learning_rate": 4.99981719823294e-06, "loss": 0.2379, "step": 4880 }, { "epoch": 2.039117769454848, "grad_norm": 0.28515625, "learning_rate": 4.999705035731294e-06, "loss": 0.2372, "step": 4900 }, { "epoch": 2.039117769454848, "eval_main_loss": 0.24145889282226562, "eval_main_runtime": 6.3292, "eval_main_samples_per_second": 30.02, "eval_main_steps_per_second": 3.792, "step": 4900 }, { "epoch": 2.039117769454848, "eval_anatomy_loss": 2.841543197631836, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.506, "eval_anatomy_steps_per_second": 3.753, "step": 4900 }, { "epoch": 2.039117769454848, "eval_college_mathematics_loss": 2.058622360229492, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.503, "eval_college_mathematics_steps_per_second": 3.752, "step": 4900 }, { "epoch": 2.039117769454848, "eval_international_law_loss": 3.0742015838623047, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.49, "eval_international_law_steps_per_second": 3.745, "step": 4900 }, { "epoch": 2.0474406991260925, "grad_norm": 0.3203125, "learning_rate": 4.999566170196208e-06, "loss": 0.236, "step": 4920 }, { "epoch": 2.0557636287973367, "grad_norm": 0.26171875, "learning_rate": 4.999400603111109e-06, "loss": 0.2362, "step": 4940 }, { "epoch": 2.064086558468581, "grad_norm": 0.279296875, "learning_rate": 4.999208336244664e-06, "loss": 0.2338, "step": 4960 }, { "epoch": 2.0724094881398254, "grad_norm": 0.28515625, "learning_rate": 4.998989371650758e-06, "loss": 0.2336, "step": 4980 }, { "epoch": 2.0807324178110695, "grad_norm": 0.30078125, "learning_rate": 4.998743711668475e-06, "loss": 0.233, "step": 5000 }, { "epoch": 2.0807324178110695, "eval_main_loss": 0.2409505397081375, "eval_main_runtime": 6.3309, "eval_main_samples_per_second": 30.012, "eval_main_steps_per_second": 3.791, "step": 5000 }, { "epoch": 2.0807324178110695, "eval_anatomy_loss": 2.8420751094818115, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.527, "eval_anatomy_steps_per_second": 3.764, "step": 5000 }, { "epoch": 2.0807324178110695, "eval_college_mathematics_loss": 2.0578598976135254, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.534, "eval_college_mathematics_steps_per_second": 3.767, "step": 5000 }, { "epoch": 2.0807324178110695, "eval_international_law_loss": 3.0756309032440186, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.529, "eval_international_law_steps_per_second": 3.765, "step": 5000 }, { "epoch": 2.0890553474823137, "grad_norm": 0.291015625, "learning_rate": 4.998471358922071e-06, "loss": 0.227, "step": 5020 }, { "epoch": 2.097378277153558, "grad_norm": 0.275390625, "learning_rate": 4.998172316320947e-06, "loss": 0.2354, "step": 5040 }, { "epoch": 2.1057012068248024, "grad_norm": 0.322265625, "learning_rate": 4.997846587059618e-06, "loss": 0.2314, "step": 5060 }, { "epoch": 2.1140241364960466, "grad_norm": 0.328125, "learning_rate": 4.997494174617679e-06, "loss": 0.2381, "step": 5080 }, { "epoch": 2.1223470661672907, "grad_norm": 0.28515625, "learning_rate": 4.997115082759764e-06, "loss": 0.233, "step": 5100 }, { "epoch": 2.1223470661672907, "eval_main_loss": 0.2407059371471405, "eval_main_runtime": 6.33, "eval_main_samples_per_second": 30.016, "eval_main_steps_per_second": 3.791, "step": 5100 }, { "epoch": 2.1223470661672907, "eval_anatomy_loss": 2.8402063846588135, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.503, "eval_anatomy_steps_per_second": 3.752, "step": 5100 }, { "epoch": 2.1223470661672907, "eval_college_mathematics_loss": 2.0568246841430664, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.517, "eval_college_mathematics_steps_per_second": 3.758, "step": 5100 }, { "epoch": 2.1223470661672907, "eval_international_law_loss": 3.073444128036499, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.514, "eval_international_law_steps_per_second": 3.757, "step": 5100 }, { "epoch": 2.1306699958385353, "grad_norm": 0.322265625, "learning_rate": 4.996709315535515e-06, "loss": 0.2334, "step": 5120 }, { "epoch": 2.1389929255097795, "grad_norm": 0.2578125, "learning_rate": 4.9962768772795274e-06, "loss": 0.235, "step": 5140 }, { "epoch": 2.1473158551810236, "grad_norm": 0.31640625, "learning_rate": 4.995817772611314e-06, "loss": 0.232, "step": 5160 }, { "epoch": 2.1556387848522682, "grad_norm": 0.3046875, "learning_rate": 4.995332006435246e-06, "loss": 0.2399, "step": 5180 }, { "epoch": 2.1639617145235124, "grad_norm": 0.294921875, "learning_rate": 4.9948195839405085e-06, "loss": 0.2345, "step": 5200 }, { "epoch": 2.1639617145235124, "eval_main_loss": 0.2404136210680008, "eval_main_runtime": 6.3338, "eval_main_samples_per_second": 29.998, "eval_main_steps_per_second": 3.789, "step": 5200 }, { "epoch": 2.1639617145235124, "eval_anatomy_loss": 2.838599443435669, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.527, "eval_anatomy_steps_per_second": 3.763, "step": 5200 }, { "epoch": 2.1639617145235124, "eval_college_mathematics_loss": 2.056016683578491, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 5200 }, { "epoch": 2.1639617145235124, "eval_international_law_loss": 3.0730698108673096, "eval_international_law_runtime": 0.2655, "eval_international_law_samples_per_second": 7.533, "eval_international_law_steps_per_second": 3.767, "step": 5200 }, { "epoch": 2.1722846441947565, "grad_norm": 0.333984375, "learning_rate": 4.9942805106010415e-06, "loss": 0.2301, "step": 5220 }, { "epoch": 2.1806075738660007, "grad_norm": 0.296875, "learning_rate": 4.993714792175483e-06, "loss": 0.2344, "step": 5240 }, { "epoch": 2.1889305035372453, "grad_norm": 0.3125, "learning_rate": 4.993122434707103e-06, "loss": 0.2359, "step": 5260 }, { "epoch": 2.1972534332084894, "grad_norm": 0.34375, "learning_rate": 4.992503444523746e-06, "loss": 0.2366, "step": 5280 }, { "epoch": 2.2055763628797336, "grad_norm": 0.302734375, "learning_rate": 4.991857828237757e-06, "loss": 0.2369, "step": 5300 }, { "epoch": 2.2055763628797336, "eval_main_loss": 0.24032773077487946, "eval_main_runtime": 6.3258, "eval_main_samples_per_second": 30.036, "eval_main_steps_per_second": 3.794, "step": 5300 }, { "epoch": 2.2055763628797336, "eval_anatomy_loss": 2.839557409286499, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.758, "step": 5300 }, { "epoch": 2.2055763628797336, "eval_college_mathematics_loss": 2.0543038845062256, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.494, "eval_college_mathematics_steps_per_second": 3.747, "step": 5300 }, { "epoch": 2.2055763628797336, "eval_international_law_loss": 3.0714683532714844, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.487, "eval_international_law_steps_per_second": 3.743, "step": 5300 }, { "epoch": 2.2138992925509777, "grad_norm": 0.287109375, "learning_rate": 4.9911855927459175e-06, "loss": 0.2338, "step": 5320 }, { "epoch": 2.2222222222222223, "grad_norm": 0.3359375, "learning_rate": 4.990486745229364e-06, "loss": 0.2324, "step": 5340 }, { "epoch": 2.2305451518934665, "grad_norm": 0.337890625, "learning_rate": 4.989761293153516e-06, "loss": 0.2362, "step": 5360 }, { "epoch": 2.2388680815647106, "grad_norm": 0.2734375, "learning_rate": 4.989009244267998e-06, "loss": 0.2331, "step": 5380 }, { "epoch": 2.247191011235955, "grad_norm": 0.322265625, "learning_rate": 4.988230606606552e-06, "loss": 0.235, "step": 5400 }, { "epoch": 2.247191011235955, "eval_main_loss": 0.2399168312549591, "eval_main_runtime": 6.3585, "eval_main_samples_per_second": 29.881, "eval_main_steps_per_second": 3.774, "step": 5400 }, { "epoch": 2.247191011235955, "eval_anatomy_loss": 2.83823823928833, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.752, "step": 5400 }, { "epoch": 2.247191011235955, "eval_college_mathematics_loss": 2.0524001121520996, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.497, "eval_college_mathematics_steps_per_second": 3.748, "step": 5400 }, { "epoch": 2.247191011235955, "eval_international_law_loss": 3.073040246963501, "eval_international_law_runtime": 0.2681, "eval_international_law_samples_per_second": 7.461, "eval_international_law_steps_per_second": 3.73, "step": 5400 }, { "epoch": 2.2555139409071994, "grad_norm": 0.29296875, "learning_rate": 4.987425388486953e-06, "loss": 0.2338, "step": 5420 }, { "epoch": 2.2638368705784435, "grad_norm": 0.29296875, "learning_rate": 4.986593598510924e-06, "loss": 0.2309, "step": 5440 }, { "epoch": 2.272159800249688, "grad_norm": 0.271484375, "learning_rate": 4.985735245564039e-06, "loss": 0.2325, "step": 5460 }, { "epoch": 2.2804827299209323, "grad_norm": 0.34375, "learning_rate": 4.984850338815631e-06, "loss": 0.2319, "step": 5480 }, { "epoch": 2.2888056595921764, "grad_norm": 0.333984375, "learning_rate": 4.983938887718692e-06, "loss": 0.2414, "step": 5500 }, { "epoch": 2.2888056595921764, "eval_main_loss": 0.23984655737876892, "eval_main_runtime": 6.3511, "eval_main_samples_per_second": 29.916, "eval_main_steps_per_second": 3.779, "step": 5500 }, { "epoch": 2.2888056595921764, "eval_anatomy_loss": 2.8364694118499756, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.741, "step": 5500 }, { "epoch": 2.2888056595921764, "eval_college_mathematics_loss": 2.051426410675049, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 5500 }, { "epoch": 2.2888056595921764, "eval_international_law_loss": 3.0698070526123047, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.502, "eval_international_law_steps_per_second": 3.751, "step": 5500 }, { "epoch": 2.2971285892634206, "grad_norm": 0.30078125, "learning_rate": 4.983000902009776e-06, "loss": 0.2384, "step": 5520 }, { "epoch": 2.305451518934665, "grad_norm": 0.306640625, "learning_rate": 4.982036391708891e-06, "loss": 0.2342, "step": 5540 }, { "epoch": 2.3137744486059093, "grad_norm": 0.2734375, "learning_rate": 4.98104536711939e-06, "loss": 0.2402, "step": 5560 }, { "epoch": 2.3220973782771535, "grad_norm": 0.26953125, "learning_rate": 4.9800278388278715e-06, "loss": 0.231, "step": 5580 }, { "epoch": 2.3304203079483976, "grad_norm": 0.294921875, "learning_rate": 4.978983817704051e-06, "loss": 0.2428, "step": 5600 }, { "epoch": 2.3304203079483976, "eval_main_loss": 0.23956555128097534, "eval_main_runtime": 6.334, "eval_main_samples_per_second": 29.997, "eval_main_steps_per_second": 3.789, "step": 5600 }, { "epoch": 2.3304203079483976, "eval_anatomy_loss": 2.8366129398345947, "eval_anatomy_runtime": 0.268, "eval_anatomy_samples_per_second": 7.464, "eval_anatomy_steps_per_second": 3.732, "step": 5600 }, { "epoch": 2.3304203079483976, "eval_college_mathematics_loss": 2.0517213344573975, "eval_college_mathematics_runtime": 0.2675, "eval_college_mathematics_samples_per_second": 7.477, "eval_college_mathematics_steps_per_second": 3.738, "step": 5600 }, { "epoch": 2.3304203079483976, "eval_international_law_loss": 3.0719730854034424, "eval_international_law_runtime": 0.2653, "eval_international_law_samples_per_second": 7.538, "eval_international_law_steps_per_second": 3.769, "step": 5600 }, { "epoch": 2.338743237619642, "grad_norm": 0.259765625, "learning_rate": 4.977913314900659e-06, "loss": 0.228, "step": 5620 }, { "epoch": 2.3470661672908864, "grad_norm": 0.265625, "learning_rate": 4.976816341853312e-06, "loss": 0.2344, "step": 5640 }, { "epoch": 2.3553890969621305, "grad_norm": 0.294921875, "learning_rate": 4.975692910280397e-06, "loss": 0.2305, "step": 5660 }, { "epoch": 2.363712026633375, "grad_norm": 0.279296875, "learning_rate": 4.974543032182943e-06, "loss": 0.2372, "step": 5680 }, { "epoch": 2.3720349563046192, "grad_norm": 0.294921875, "learning_rate": 4.973366719844491e-06, "loss": 0.2319, "step": 5700 }, { "epoch": 2.3720349563046192, "eval_main_loss": 0.23942260444164276, "eval_main_runtime": 6.3369, "eval_main_samples_per_second": 29.983, "eval_main_steps_per_second": 3.787, "step": 5700 }, { "epoch": 2.3720349563046192, "eval_anatomy_loss": 2.8341376781463623, "eval_anatomy_runtime": 0.2654, "eval_anatomy_samples_per_second": 7.537, "eval_anatomy_steps_per_second": 3.768, "step": 5700 }, { "epoch": 2.3720349563046192, "eval_college_mathematics_loss": 2.0530576705932617, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.512, "eval_college_mathematics_steps_per_second": 3.756, "step": 5700 }, { "epoch": 2.3720349563046192, "eval_international_law_loss": 3.070324420928955, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.513, "eval_international_law_steps_per_second": 3.757, "step": 5700 }, { "epoch": 2.3803578859758634, "grad_norm": 0.30859375, "learning_rate": 4.972163985830967e-06, "loss": 0.2326, "step": 5720 }, { "epoch": 2.388680815647108, "grad_norm": 0.296875, "learning_rate": 4.970934842990546e-06, "loss": 0.234, "step": 5740 }, { "epoch": 2.397003745318352, "grad_norm": 0.28515625, "learning_rate": 4.969679304453513e-06, "loss": 0.231, "step": 5760 }, { "epoch": 2.4053266749895963, "grad_norm": 0.33984375, "learning_rate": 4.968397383632127e-06, "loss": 0.2322, "step": 5780 }, { "epoch": 2.4136496046608404, "grad_norm": 0.263671875, "learning_rate": 4.967089094220473e-06, "loss": 0.2276, "step": 5800 }, { "epoch": 2.4136496046608404, "eval_main_loss": 0.23935824632644653, "eval_main_runtime": 6.3314, "eval_main_samples_per_second": 30.009, "eval_main_steps_per_second": 3.791, "step": 5800 }, { "epoch": 2.4136496046608404, "eval_anatomy_loss": 2.8363730907440186, "eval_anatomy_runtime": 0.2672, "eval_anatomy_samples_per_second": 7.484, "eval_anatomy_steps_per_second": 3.742, "step": 5800 }, { "epoch": 2.4136496046608404, "eval_college_mathematics_loss": 2.04972767829895, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 5800 }, { "epoch": 2.4136496046608404, "eval_international_law_loss": 3.0698299407958984, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.536, "eval_international_law_steps_per_second": 3.768, "step": 5800 }, { "epoch": 2.421972534332085, "grad_norm": 0.2734375, "learning_rate": 4.9657544501943175e-06, "loss": 0.2349, "step": 5820 }, { "epoch": 2.430295464003329, "grad_norm": 0.28125, "learning_rate": 4.964393465810963e-06, "loss": 0.236, "step": 5840 }, { "epoch": 2.4386183936745733, "grad_norm": 0.248046875, "learning_rate": 4.9630061556090855e-06, "loss": 0.2362, "step": 5860 }, { "epoch": 2.4469413233458175, "grad_norm": 0.240234375, "learning_rate": 4.961592534408592e-06, "loss": 0.2285, "step": 5880 }, { "epoch": 2.455264253017062, "grad_norm": 0.30859375, "learning_rate": 4.9601526173104544e-06, "loss": 0.2346, "step": 5900 }, { "epoch": 2.455264253017062, "eval_main_loss": 0.2392611801624298, "eval_main_runtime": 6.3213, "eval_main_samples_per_second": 30.057, "eval_main_steps_per_second": 3.797, "step": 5900 }, { "epoch": 2.455264253017062, "eval_anatomy_loss": 2.8343961238861084, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.753, "step": 5900 }, { "epoch": 2.455264253017062, "eval_college_mathematics_loss": 2.0529263019561768, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.514, "eval_college_mathematics_steps_per_second": 3.757, "step": 5900 }, { "epoch": 2.455264253017062, "eval_international_law_loss": 3.068134069442749, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.507, "eval_international_law_steps_per_second": 3.753, "step": 5900 }, { "epoch": 2.4635871826883062, "grad_norm": 0.291015625, "learning_rate": 4.958686419696548e-06, "loss": 0.228, "step": 5920 }, { "epoch": 2.4719101123595504, "grad_norm": 0.265625, "learning_rate": 4.9571939572294914e-06, "loss": 0.233, "step": 5940 }, { "epoch": 2.480233042030795, "grad_norm": 0.259765625, "learning_rate": 4.955675245852475e-06, "loss": 0.2316, "step": 5960 }, { "epoch": 2.488555971702039, "grad_norm": 0.33984375, "learning_rate": 4.954130301789093e-06, "loss": 0.2342, "step": 5980 }, { "epoch": 2.4968789013732833, "grad_norm": 0.28125, "learning_rate": 4.952559141543171e-06, "loss": 0.233, "step": 6000 }, { "epoch": 2.4968789013732833, "eval_main_loss": 0.23919633030891418, "eval_main_runtime": 6.3217, "eval_main_samples_per_second": 30.055, "eval_main_steps_per_second": 3.796, "step": 6000 }, { "epoch": 2.4968789013732833, "eval_anatomy_loss": 2.835357427597046, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.758, "step": 6000 }, { "epoch": 2.4968789013732833, "eval_college_mathematics_loss": 2.0497984886169434, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 6000 }, { "epoch": 2.4968789013732833, "eval_international_law_loss": 3.0689234733581543, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.494, "eval_international_law_steps_per_second": 3.747, "step": 6000 }, { "epoch": 2.505201831044528, "grad_norm": 0.271484375, "learning_rate": 4.950961781898586e-06, "loss": 0.2345, "step": 6020 }, { "epoch": 2.513524760715772, "grad_norm": 0.2578125, "learning_rate": 4.94933823991909e-06, "loss": 0.2327, "step": 6040 }, { "epoch": 2.521847690387016, "grad_norm": 0.34765625, "learning_rate": 4.947688532948129e-06, "loss": 0.2286, "step": 6060 }, { "epoch": 2.5301706200582608, "grad_norm": 0.283203125, "learning_rate": 4.9460126786086535e-06, "loss": 0.2313, "step": 6080 }, { "epoch": 2.538493549729505, "grad_norm": 0.25, "learning_rate": 4.944310694802935e-06, "loss": 0.2343, "step": 6100 }, { "epoch": 2.538493549729505, "eval_main_loss": 0.23911188542842865, "eval_main_runtime": 6.33, "eval_main_samples_per_second": 30.016, "eval_main_steps_per_second": 3.791, "step": 6100 }, { "epoch": 2.538493549729505, "eval_anatomy_loss": 2.834503650665283, "eval_anatomy_runtime": 0.2651, "eval_anatomy_samples_per_second": 7.545, "eval_anatomy_steps_per_second": 3.772, "step": 6100 }, { "epoch": 2.538493549729505, "eval_college_mathematics_loss": 2.049175977706909, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.5, "eval_college_mathematics_steps_per_second": 3.75, "step": 6100 }, { "epoch": 2.538493549729505, "eval_international_law_loss": 3.0691072940826416, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.502, "eval_international_law_steps_per_second": 3.751, "step": 6100 }, { "epoch": 2.546816479400749, "grad_norm": 0.29296875, "learning_rate": 4.942582599712369e-06, "loss": 0.2356, "step": 6120 }, { "epoch": 2.555139409071993, "grad_norm": 0.29296875, "learning_rate": 4.940828411797287e-06, "loss": 0.2369, "step": 6140 }, { "epoch": 2.5634623387432374, "grad_norm": 0.306640625, "learning_rate": 4.9390481497967545e-06, "loss": 0.2287, "step": 6160 }, { "epoch": 2.571785268414482, "grad_norm": 0.298828125, "learning_rate": 4.937241832728373e-06, "loss": 0.2288, "step": 6180 }, { "epoch": 2.580108198085726, "grad_norm": 0.2890625, "learning_rate": 4.9354094798880806e-06, "loss": 0.2301, "step": 6200 }, { "epoch": 2.580108198085726, "eval_main_loss": 0.23901519179344177, "eval_main_runtime": 6.3573, "eval_main_samples_per_second": 29.887, "eval_main_steps_per_second": 3.775, "step": 6200 }, { "epoch": 2.580108198085726, "eval_anatomy_loss": 2.8348350524902344, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.513, "eval_anatomy_steps_per_second": 3.757, "step": 6200 }, { "epoch": 2.580108198085726, "eval_college_mathematics_loss": 2.048042058944702, "eval_college_mathematics_runtime": 0.2678, "eval_college_mathematics_samples_per_second": 7.467, "eval_college_mathematics_steps_per_second": 3.734, "step": 6200 }, { "epoch": 2.580108198085726, "eval_international_law_loss": 3.0679852962493896, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.483, "eval_international_law_steps_per_second": 3.742, "step": 6200 }, { "epoch": 2.5884311277569703, "grad_norm": 0.251953125, "learning_rate": 4.9335511108499344e-06, "loss": 0.2333, "step": 6220 }, { "epoch": 2.596754057428215, "grad_norm": 0.291015625, "learning_rate": 4.931666745465915e-06, "loss": 0.2275, "step": 6240 }, { "epoch": 2.605076987099459, "grad_norm": 0.26953125, "learning_rate": 4.929756403865706e-06, "loss": 0.2303, "step": 6260 }, { "epoch": 2.613399916770703, "grad_norm": 0.251953125, "learning_rate": 4.927820106456481e-06, "loss": 0.2314, "step": 6280 }, { "epoch": 2.6217228464419478, "grad_norm": 0.2734375, "learning_rate": 4.925857873922686e-06, "loss": 0.2344, "step": 6300 }, { "epoch": 2.6217228464419478, "eval_main_loss": 0.2388763278722763, "eval_main_runtime": 6.3519, "eval_main_samples_per_second": 29.912, "eval_main_steps_per_second": 3.778, "step": 6300 }, { "epoch": 2.6217228464419478, "eval_anatomy_loss": 2.8354506492614746, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.478, "eval_anatomy_steps_per_second": 3.739, "step": 6300 }, { "epoch": 2.6217228464419478, "eval_college_mathematics_loss": 2.0530946254730225, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 6300 }, { "epoch": 2.6217228464419478, "eval_international_law_loss": 3.0681755542755127, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.485, "eval_international_law_steps_per_second": 3.742, "step": 6300 }, { "epoch": 2.630045776113192, "grad_norm": 0.287109375, "learning_rate": 4.923869727225819e-06, "loss": 0.23, "step": 6320 }, { "epoch": 2.638368705784436, "grad_norm": 0.259765625, "learning_rate": 4.921855687604206e-06, "loss": 0.2337, "step": 6340 }, { "epoch": 2.6466916354556806, "grad_norm": 0.267578125, "learning_rate": 4.91981577657277e-06, "loss": 0.2337, "step": 6360 }, { "epoch": 2.655014565126925, "grad_norm": 0.2412109375, "learning_rate": 4.917750015922809e-06, "loss": 0.2363, "step": 6380 }, { "epoch": 2.663337494798169, "grad_norm": 0.29296875, "learning_rate": 4.915658427721755e-06, "loss": 0.2317, "step": 6400 }, { "epoch": 2.663337494798169, "eval_main_loss": 0.23897981643676758, "eval_main_runtime": 6.3411, "eval_main_samples_per_second": 29.963, "eval_main_steps_per_second": 3.785, "step": 6400 }, { "epoch": 2.663337494798169, "eval_anatomy_loss": 2.8342771530151367, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.495, "eval_anatomy_steps_per_second": 3.748, "step": 6400 }, { "epoch": 2.663337494798169, "eval_college_mathematics_loss": 2.0512688159942627, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 6400 }, { "epoch": 2.663337494798169, "eval_international_law_loss": 3.0684025287628174, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.504, "eval_international_law_steps_per_second": 3.752, "step": 6400 }, { "epoch": 2.671660424469413, "grad_norm": 0.310546875, "learning_rate": 4.9135410343129465e-06, "loss": 0.2327, "step": 6420 }, { "epoch": 2.6799833541406572, "grad_norm": 0.306640625, "learning_rate": 4.911397858315382e-06, "loss": 0.2279, "step": 6440 }, { "epoch": 2.688306283811902, "grad_norm": 0.326171875, "learning_rate": 4.909228922623482e-06, "loss": 0.2287, "step": 6460 }, { "epoch": 2.696629213483146, "grad_norm": 0.337890625, "learning_rate": 4.907034250406846e-06, "loss": 0.2288, "step": 6480 }, { "epoch": 2.70495214315439, "grad_norm": 0.294921875, "learning_rate": 4.904813865110002e-06, "loss": 0.2308, "step": 6500 }, { "epoch": 2.70495214315439, "eval_main_loss": 0.23884011805057526, "eval_main_runtime": 6.3205, "eval_main_samples_per_second": 30.061, "eval_main_steps_per_second": 3.797, "step": 6500 }, { "epoch": 2.70495214315439, "eval_anatomy_loss": 2.8351662158966064, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.51, "eval_anatomy_steps_per_second": 3.755, "step": 6500 }, { "epoch": 2.70495214315439, "eval_college_mathematics_loss": 2.0514416694641113, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 6500 }, { "epoch": 2.70495214315439, "eval_international_law_loss": 3.0670132637023926, "eval_international_law_runtime": 0.265, "eval_international_law_samples_per_second": 7.546, "eval_international_law_steps_per_second": 3.773, "step": 6500 }, { "epoch": 2.7132750728256347, "grad_norm": 0.302734375, "learning_rate": 4.902567790452158e-06, "loss": 0.2308, "step": 6520 }, { "epoch": 2.721598002496879, "grad_norm": 0.263671875, "learning_rate": 4.900296050426947e-06, "loss": 0.2373, "step": 6540 }, { "epoch": 2.729920932168123, "grad_norm": 0.2197265625, "learning_rate": 4.897998669302173e-06, "loss": 0.2291, "step": 6560 }, { "epoch": 2.7382438618393676, "grad_norm": 0.36328125, "learning_rate": 4.895675671619549e-06, "loss": 0.2369, "step": 6580 }, { "epoch": 2.746566791510612, "grad_norm": 0.2578125, "learning_rate": 4.893327082194436e-06, "loss": 0.2353, "step": 6600 }, { "epoch": 2.746566791510612, "eval_main_loss": 0.23877017199993134, "eval_main_runtime": 6.35, "eval_main_samples_per_second": 29.921, "eval_main_steps_per_second": 3.78, "step": 6600 }, { "epoch": 2.746566791510612, "eval_anatomy_loss": 2.832685947418213, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.502, "eval_anatomy_steps_per_second": 3.751, "step": 6600 }, { "epoch": 2.746566791510612, "eval_college_mathematics_loss": 2.0484695434570312, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.489, "eval_college_mathematics_steps_per_second": 3.744, "step": 6600 }, { "epoch": 2.746566791510612, "eval_international_law_loss": 3.0677947998046875, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.493, "eval_international_law_steps_per_second": 3.746, "step": 6600 }, { "epoch": 2.754889721181856, "grad_norm": 0.267578125, "learning_rate": 4.890952926115581e-06, "loss": 0.2308, "step": 6620 }, { "epoch": 2.7632126508531005, "grad_norm": 0.2890625, "learning_rate": 4.888553228744842e-06, "loss": 0.233, "step": 6640 }, { "epoch": 2.7715355805243447, "grad_norm": 0.306640625, "learning_rate": 4.886128015716925e-06, "loss": 0.2397, "step": 6660 }, { "epoch": 2.779858510195589, "grad_norm": 0.3203125, "learning_rate": 4.883677312939103e-06, "loss": 0.2345, "step": 6680 }, { "epoch": 2.788181439866833, "grad_norm": 0.291015625, "learning_rate": 4.881201146590945e-06, "loss": 0.2314, "step": 6700 }, { "epoch": 2.788181439866833, "eval_main_loss": 0.23880185186862946, "eval_main_runtime": 6.3571, "eval_main_samples_per_second": 29.888, "eval_main_steps_per_second": 3.775, "step": 6700 }, { "epoch": 2.788181439866833, "eval_anatomy_loss": 2.83292555809021, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.741, "step": 6700 }, { "epoch": 2.788181439866833, "eval_college_mathematics_loss": 2.048858880996704, "eval_college_mathematics_runtime": 0.2675, "eval_college_mathematics_samples_per_second": 7.478, "eval_college_mathematics_steps_per_second": 3.739, "step": 6700 }, { "epoch": 2.788181439866833, "eval_international_law_loss": 3.066660165786743, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.505, "eval_international_law_steps_per_second": 3.752, "step": 6700 }, { "epoch": 2.796504369538077, "grad_norm": 0.251953125, "learning_rate": 4.878699543124031e-06, "loss": 0.2328, "step": 6720 }, { "epoch": 2.8048272992093217, "grad_norm": 0.291015625, "learning_rate": 4.876172529261678e-06, "loss": 0.2334, "step": 6740 }, { "epoch": 2.813150228880566, "grad_norm": 0.28515625, "learning_rate": 4.873620131998642e-06, "loss": 0.2327, "step": 6760 }, { "epoch": 2.82147315855181, "grad_norm": 0.314453125, "learning_rate": 4.871042378600842e-06, "loss": 0.2309, "step": 6780 }, { "epoch": 2.8297960882230546, "grad_norm": 0.2890625, "learning_rate": 4.8684392966050594e-06, "loss": 0.2307, "step": 6800 }, { "epoch": 2.8297960882230546, "eval_main_loss": 0.2386612743139267, "eval_main_runtime": 6.3547, "eval_main_samples_per_second": 29.899, "eval_main_steps_per_second": 3.777, "step": 6800 }, { "epoch": 2.8297960882230546, "eval_anatomy_loss": 2.83203125, "eval_anatomy_runtime": 0.268, "eval_anatomy_samples_per_second": 7.464, "eval_anatomy_steps_per_second": 3.732, "step": 6800 }, { "epoch": 2.8297960882230546, "eval_college_mathematics_loss": 2.0493810176849365, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.487, "eval_college_mathematics_steps_per_second": 3.743, "step": 6800 }, { "epoch": 2.8297960882230546, "eval_international_law_loss": 3.065821409225464, "eval_international_law_runtime": 0.2667, "eval_international_law_samples_per_second": 7.498, "eval_international_law_steps_per_second": 3.749, "step": 6800 }, { "epoch": 2.8381190178942988, "grad_norm": 0.302734375, "learning_rate": 4.865810913818651e-06, "loss": 0.2294, "step": 6820 }, { "epoch": 2.846441947565543, "grad_norm": 0.291015625, "learning_rate": 4.863157258319245e-06, "loss": 0.2324, "step": 6840 }, { "epoch": 2.8547648772367875, "grad_norm": 0.27734375, "learning_rate": 4.8604783584544475e-06, "loss": 0.2313, "step": 6860 }, { "epoch": 2.8630878069080317, "grad_norm": 0.28515625, "learning_rate": 4.857774242841536e-06, "loss": 0.2281, "step": 6880 }, { "epoch": 2.871410736579276, "grad_norm": 0.296875, "learning_rate": 4.855044940367155e-06, "loss": 0.2329, "step": 6900 }, { "epoch": 2.871410736579276, "eval_main_loss": 0.23859336972236633, "eval_main_runtime": 6.3286, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 6900 }, { "epoch": 2.871410736579276, "eval_anatomy_loss": 2.833575487136841, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.507, "eval_anatomy_steps_per_second": 3.753, "step": 6900 }, { "epoch": 2.871410736579276, "eval_college_mathematics_loss": 2.0479042530059814, "eval_college_mathematics_runtime": 0.2652, "eval_college_mathematics_samples_per_second": 7.542, "eval_college_mathematics_steps_per_second": 3.771, "step": 6900 }, { "epoch": 2.871410736579276, "eval_international_law_loss": 3.0679168701171875, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.516, "eval_international_law_steps_per_second": 3.758, "step": 6900 }, { "epoch": 2.8797336662505204, "grad_norm": 0.275390625, "learning_rate": 4.8522904801870065e-06, "loss": 0.2309, "step": 6920 }, { "epoch": 2.8880565959217646, "grad_norm": 0.330078125, "learning_rate": 4.8495108917255385e-06, "loss": 0.2347, "step": 6940 }, { "epoch": 2.8963795255930087, "grad_norm": 0.296875, "learning_rate": 4.846706204675632e-06, "loss": 0.2301, "step": 6960 }, { "epoch": 2.904702455264253, "grad_norm": 0.35546875, "learning_rate": 4.843876448998283e-06, "loss": 0.2354, "step": 6980 }, { "epoch": 2.9130253849354975, "grad_norm": 0.314453125, "learning_rate": 4.841021654922281e-06, "loss": 0.2359, "step": 7000 }, { "epoch": 2.9130253849354975, "eval_main_loss": 0.23874352872371674, "eval_main_runtime": 6.316, "eval_main_samples_per_second": 30.083, "eval_main_steps_per_second": 3.8, "step": 7000 }, { "epoch": 2.9130253849354975, "eval_anatomy_loss": 2.832608699798584, "eval_anatomy_runtime": 0.2648, "eval_anatomy_samples_per_second": 7.552, "eval_anatomy_steps_per_second": 3.776, "step": 7000 }, { "epoch": 2.9130253849354975, "eval_college_mathematics_loss": 2.0510120391845703, "eval_college_mathematics_runtime": 0.2649, "eval_college_mathematics_samples_per_second": 7.55, "eval_college_mathematics_steps_per_second": 3.775, "step": 7000 }, { "epoch": 2.9130253849354975, "eval_international_law_loss": 3.0677924156188965, "eval_international_law_runtime": 0.2642, "eval_international_law_samples_per_second": 7.57, "eval_international_law_steps_per_second": 3.785, "step": 7000 }, { "epoch": 2.9213483146067416, "grad_norm": 0.298828125, "learning_rate": 4.838141852943891e-06, "loss": 0.2289, "step": 7020 }, { "epoch": 2.9296712442779858, "grad_norm": 0.330078125, "learning_rate": 4.835237073826521e-06, "loss": 0.2305, "step": 7040 }, { "epoch": 2.93799417394923, "grad_norm": 0.279296875, "learning_rate": 4.8323073486003976e-06, "loss": 0.2312, "step": 7060 }, { "epoch": 2.9463171036204745, "grad_norm": 0.28515625, "learning_rate": 4.829352708562233e-06, "loss": 0.2321, "step": 7080 }, { "epoch": 2.9546400332917186, "grad_norm": 0.296875, "learning_rate": 4.826373185274893e-06, "loss": 0.2336, "step": 7100 }, { "epoch": 2.9546400332917186, "eval_main_loss": 0.23865433037281036, "eval_main_runtime": 6.3269, "eval_main_samples_per_second": 30.031, "eval_main_steps_per_second": 3.793, "step": 7100 }, { "epoch": 2.9546400332917186, "eval_anatomy_loss": 2.8344128131866455, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.513, "eval_anatomy_steps_per_second": 3.756, "step": 7100 }, { "epoch": 2.9546400332917186, "eval_college_mathematics_loss": 2.0521576404571533, "eval_college_mathematics_runtime": 0.2649, "eval_college_mathematics_samples_per_second": 7.55, "eval_college_mathematics_steps_per_second": 3.775, "step": 7100 }, { "epoch": 2.9546400332917186, "eval_international_law_loss": 3.0670950412750244, "eval_international_law_runtime": 0.2651, "eval_international_law_samples_per_second": 7.545, "eval_international_law_steps_per_second": 3.772, "step": 7100 }, { "epoch": 2.962962962962963, "grad_norm": 0.267578125, "learning_rate": 4.823368810567056e-06, "loss": 0.2332, "step": 7120 }, { "epoch": 2.9712858926342074, "grad_norm": 0.31640625, "learning_rate": 4.820339616532878e-06, "loss": 0.2297, "step": 7140 }, { "epoch": 2.9796088223054515, "grad_norm": 0.283203125, "learning_rate": 4.817285635531641e-06, "loss": 0.2311, "step": 7160 }, { "epoch": 2.9879317519766957, "grad_norm": 0.30859375, "learning_rate": 4.81420690018742e-06, "loss": 0.231, "step": 7180 }, { "epoch": 2.9962546816479403, "grad_norm": 0.31640625, "learning_rate": 4.811103443388724e-06, "loss": 0.2332, "step": 7200 }, { "epoch": 2.9962546816479403, "eval_main_loss": 0.23862618207931519, "eval_main_runtime": 6.3548, "eval_main_samples_per_second": 29.899, "eval_main_steps_per_second": 3.777, "step": 7200 }, { "epoch": 2.9962546816479403, "eval_anatomy_loss": 2.834916114807129, "eval_anatomy_runtime": 0.2679, "eval_anatomy_samples_per_second": 7.464, "eval_anatomy_steps_per_second": 3.732, "step": 7200 }, { "epoch": 2.9962546816479403, "eval_college_mathematics_loss": 2.0544211864471436, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.475, "eval_college_mathematics_steps_per_second": 3.737, "step": 7200 }, { "epoch": 2.9962546816479403, "eval_international_law_loss": 3.067124843597412, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.49, "eval_international_law_steps_per_second": 3.745, "step": 7200 }, { "epoch": 3.0045776113191844, "grad_norm": 0.3125, "learning_rate": 4.807975298288149e-06, "loss": 0.233, "step": 7220 }, { "epoch": 3.0129005409904286, "grad_norm": 0.271484375, "learning_rate": 4.804822498302021e-06, "loss": 0.2302, "step": 7240 }, { "epoch": 3.0212234706616727, "grad_norm": 0.244140625, "learning_rate": 4.8016450771100455e-06, "loss": 0.2335, "step": 7260 }, { "epoch": 3.0295464003329173, "grad_norm": 0.26171875, "learning_rate": 4.798443068654939e-06, "loss": 0.2322, "step": 7280 }, { "epoch": 3.0378693300041615, "grad_norm": 0.267578125, "learning_rate": 4.795216507142074e-06, "loss": 0.2298, "step": 7300 }, { "epoch": 3.0378693300041615, "eval_main_loss": 0.23858603835105896, "eval_main_runtime": 6.3548, "eval_main_samples_per_second": 29.899, "eval_main_steps_per_second": 3.777, "step": 7300 }, { "epoch": 3.0378693300041615, "eval_anatomy_loss": 2.832512617111206, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.487, "eval_anatomy_steps_per_second": 3.743, "step": 7300 }, { "epoch": 3.0378693300041615, "eval_college_mathematics_loss": 2.0511765480041504, "eval_college_mathematics_runtime": 0.2677, "eval_college_mathematics_samples_per_second": 7.472, "eval_college_mathematics_steps_per_second": 3.736, "step": 7300 }, { "epoch": 3.0378693300041615, "eval_international_law_loss": 3.067012071609497, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.491, "eval_international_law_steps_per_second": 3.745, "step": 7300 }, { "epoch": 3.0461922596754056, "grad_norm": 0.263671875, "learning_rate": 4.791965427039109e-06, "loss": 0.234, "step": 7320 }, { "epoch": 3.0545151893466502, "grad_norm": 0.2734375, "learning_rate": 4.788689863075622e-06, "loss": 0.2319, "step": 7340 }, { "epoch": 3.0628381190178944, "grad_norm": 0.31640625, "learning_rate": 4.785389850242739e-06, "loss": 0.2309, "step": 7360 }, { "epoch": 3.0711610486891385, "grad_norm": 0.240234375, "learning_rate": 4.78206542379276e-06, "loss": 0.2283, "step": 7380 }, { "epoch": 3.0794839783603827, "grad_norm": 0.279296875, "learning_rate": 4.778716619238784e-06, "loss": 0.23, "step": 7400 }, { "epoch": 3.0794839783603827, "eval_main_loss": 0.23865698277950287, "eval_main_runtime": 6.3366, "eval_main_samples_per_second": 29.984, "eval_main_steps_per_second": 3.788, "step": 7400 }, { "epoch": 3.0794839783603827, "eval_anatomy_loss": 2.8329567909240723, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.502, "eval_anatomy_steps_per_second": 3.751, "step": 7400 }, { "epoch": 3.0794839783603827, "eval_college_mathematics_loss": 2.0505635738372803, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.755, "step": 7400 }, { "epoch": 3.0794839783603827, "eval_international_law_loss": 3.06803035736084, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.512, "eval_international_law_steps_per_second": 3.756, "step": 7400 }, { "epoch": 3.0878069080316273, "grad_norm": 0.2412109375, "learning_rate": 4.7753434723543266e-06, "loss": 0.2308, "step": 7420 }, { "epoch": 3.0961298377028714, "grad_norm": 0.28515625, "learning_rate": 4.771946019172942e-06, "loss": 0.2331, "step": 7440 }, { "epoch": 3.1044527673741156, "grad_norm": 0.2890625, "learning_rate": 4.768524295987835e-06, "loss": 0.2383, "step": 7460 }, { "epoch": 3.11277569704536, "grad_norm": 0.25, "learning_rate": 4.765078339351472e-06, "loss": 0.2327, "step": 7480 }, { "epoch": 3.1210986267166043, "grad_norm": 0.3046875, "learning_rate": 4.761608186075196e-06, "loss": 0.2312, "step": 7500 }, { "epoch": 3.1210986267166043, "eval_main_loss": 0.2385028600692749, "eval_main_runtime": 6.3337, "eval_main_samples_per_second": 29.998, "eval_main_steps_per_second": 3.789, "step": 7500 }, { "epoch": 3.1210986267166043, "eval_anatomy_loss": 2.834723949432373, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.522, "eval_anatomy_steps_per_second": 3.761, "step": 7500 }, { "epoch": 3.1210986267166043, "eval_college_mathematics_loss": 2.048158884048462, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.494, "eval_college_mathematics_steps_per_second": 3.747, "step": 7500 }, { "epoch": 3.1210986267166043, "eval_international_law_loss": 3.065969228744507, "eval_international_law_runtime": 0.2652, "eval_international_law_samples_per_second": 7.542, "eval_international_law_steps_per_second": 3.771, "step": 7500 }, { "epoch": 3.1294215563878485, "grad_norm": 0.30078125, "learning_rate": 4.758113873228828e-06, "loss": 0.2361, "step": 7520 }, { "epoch": 3.1377444860590926, "grad_norm": 0.328125, "learning_rate": 4.754595438140272e-06, "loss": 0.2312, "step": 7540 }, { "epoch": 3.146067415730337, "grad_norm": 0.265625, "learning_rate": 4.75105291839512e-06, "loss": 0.232, "step": 7560 }, { "epoch": 3.1543903454015814, "grad_norm": 0.2353515625, "learning_rate": 4.747486351836246e-06, "loss": 0.2308, "step": 7580 }, { "epoch": 3.1627132750728255, "grad_norm": 0.294921875, "learning_rate": 4.743895776563403e-06, "loss": 0.2334, "step": 7600 }, { "epoch": 3.1627132750728255, "eval_main_loss": 0.23862504959106445, "eval_main_runtime": 6.3275, "eval_main_samples_per_second": 30.028, "eval_main_steps_per_second": 3.793, "step": 7600 }, { "epoch": 3.1627132750728255, "eval_anatomy_loss": 2.8308141231536865, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.528, "eval_anatomy_steps_per_second": 3.764, "step": 7600 }, { "epoch": 3.1627132750728255, "eval_college_mathematics_loss": 2.053100347518921, "eval_college_mathematics_runtime": 0.2659, "eval_college_mathematics_samples_per_second": 7.521, "eval_college_mathematics_steps_per_second": 3.761, "step": 7600 }, { "epoch": 3.1627132750728255, "eval_international_law_loss": 3.066927433013916, "eval_international_law_runtime": 0.2659, "eval_international_law_samples_per_second": 7.521, "eval_international_law_steps_per_second": 3.76, "step": 7600 }, { "epoch": 3.17103620474407, "grad_norm": 0.25390625, "learning_rate": 4.740281230932817e-06, "loss": 0.2345, "step": 7620 }, { "epoch": 3.1793591344153143, "grad_norm": 0.236328125, "learning_rate": 4.736642753556777e-06, "loss": 0.2343, "step": 7640 }, { "epoch": 3.1876820640865584, "grad_norm": 0.318359375, "learning_rate": 4.732980383303223e-06, "loss": 0.2327, "step": 7660 }, { "epoch": 3.1960049937578026, "grad_norm": 0.30859375, "learning_rate": 4.729294159295329e-06, "loss": 0.2316, "step": 7680 }, { "epoch": 3.204327923429047, "grad_norm": 0.310546875, "learning_rate": 4.725584120911085e-06, "loss": 0.2324, "step": 7700 }, { "epoch": 3.204327923429047, "eval_main_loss": 0.2384970486164093, "eval_main_runtime": 6.3308, "eval_main_samples_per_second": 30.012, "eval_main_steps_per_second": 3.791, "step": 7700 }, { "epoch": 3.204327923429047, "eval_anatomy_loss": 2.8297290802001953, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.501, "eval_anatomy_steps_per_second": 3.751, "step": 7700 }, { "epoch": 3.204327923429047, "eval_college_mathematics_loss": 2.0536551475524902, "eval_college_mathematics_runtime": 0.2648, "eval_college_mathematics_samples_per_second": 7.552, "eval_college_mathematics_steps_per_second": 3.776, "step": 7700 }, { "epoch": 3.204327923429047, "eval_international_law_loss": 3.0671679973602295, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.513, "eval_international_law_steps_per_second": 3.757, "step": 7700 }, { "epoch": 3.2126508531002913, "grad_norm": 0.296875, "learning_rate": 4.721850307782879e-06, "loss": 0.2278, "step": 7720 }, { "epoch": 3.2209737827715355, "grad_norm": 0.306640625, "learning_rate": 4.718092759797073e-06, "loss": 0.2338, "step": 7740 }, { "epoch": 3.22929671244278, "grad_norm": 0.2734375, "learning_rate": 4.714311517093573e-06, "loss": 0.2315, "step": 7760 }, { "epoch": 3.237619642114024, "grad_norm": 0.2412109375, "learning_rate": 4.710506620065406e-06, "loss": 0.2368, "step": 7780 }, { "epoch": 3.2459425717852683, "grad_norm": 0.291015625, "learning_rate": 4.706678109358285e-06, "loss": 0.2386, "step": 7800 }, { "epoch": 3.2459425717852683, "eval_main_loss": 0.23850186169147491, "eval_main_runtime": 6.3235, "eval_main_samples_per_second": 30.047, "eval_main_steps_per_second": 3.795, "step": 7800 }, { "epoch": 3.2459425717852683, "eval_anatomy_loss": 2.8328044414520264, "eval_anatomy_runtime": 0.265, "eval_anatomy_samples_per_second": 7.548, "eval_anatomy_steps_per_second": 3.774, "step": 7800 }, { "epoch": 3.2459425717852683, "eval_college_mathematics_loss": 2.048779249191284, "eval_college_mathematics_runtime": 0.2654, "eval_college_mathematics_samples_per_second": 7.535, "eval_college_mathematics_steps_per_second": 3.768, "step": 7800 }, { "epoch": 3.2459425717852683, "eval_international_law_loss": 3.0646255016326904, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.529, "eval_international_law_steps_per_second": 3.764, "step": 7800 }, { "epoch": 3.2542655014565125, "grad_norm": 0.2578125, "learning_rate": 4.702826025870173e-06, "loss": 0.2354, "step": 7820 }, { "epoch": 3.262588431127757, "grad_norm": 0.29296875, "learning_rate": 4.698950410750854e-06, "loss": 0.231, "step": 7840 }, { "epoch": 3.2709113607990012, "grad_norm": 0.275390625, "learning_rate": 4.695051305401483e-06, "loss": 0.232, "step": 7860 }, { "epoch": 3.2792342904702454, "grad_norm": 0.341796875, "learning_rate": 4.691128751474149e-06, "loss": 0.2307, "step": 7880 }, { "epoch": 3.28755722014149, "grad_norm": 0.3046875, "learning_rate": 4.6871827908714345e-06, "loss": 0.2306, "step": 7900 }, { "epoch": 3.28755722014149, "eval_main_loss": 0.23848366737365723, "eval_main_runtime": 6.3519, "eval_main_samples_per_second": 29.912, "eval_main_steps_per_second": 3.778, "step": 7900 }, { "epoch": 3.28755722014149, "eval_anatomy_loss": 2.834224224090576, "eval_anatomy_runtime": 0.2672, "eval_anatomy_samples_per_second": 7.485, "eval_anatomy_steps_per_second": 3.742, "step": 7900 }, { "epoch": 3.28755722014149, "eval_college_mathematics_loss": 2.05056095123291, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.486, "eval_college_mathematics_steps_per_second": 3.743, "step": 7900 }, { "epoch": 3.28755722014149, "eval_international_law_loss": 3.0639660358428955, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.496, "eval_international_law_steps_per_second": 3.748, "step": 7900 }, { "epoch": 3.295880149812734, "grad_norm": 0.291015625, "learning_rate": 4.6832134657459586e-06, "loss": 0.2349, "step": 7920 }, { "epoch": 3.3042030794839783, "grad_norm": 0.291015625, "learning_rate": 4.679220818499932e-06, "loss": 0.2316, "step": 7940 }, { "epoch": 3.3125260091552224, "grad_norm": 0.236328125, "learning_rate": 4.675204891784706e-06, "loss": 0.2326, "step": 7960 }, { "epoch": 3.320848938826467, "grad_norm": 0.259765625, "learning_rate": 4.671165728500311e-06, "loss": 0.2325, "step": 7980 }, { "epoch": 3.329171868497711, "grad_norm": 0.306640625, "learning_rate": 4.667103371795003e-06, "loss": 0.2305, "step": 8000 }, { "epoch": 3.329171868497711, "eval_main_loss": 0.23847457766532898, "eval_main_runtime": 6.3564, "eval_main_samples_per_second": 29.891, "eval_main_steps_per_second": 3.776, "step": 8000 }, { "epoch": 3.329171868497711, "eval_anatomy_loss": 2.8327555656433105, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.509, "eval_anatomy_steps_per_second": 3.755, "step": 8000 }, { "epoch": 3.329171868497711, "eval_college_mathematics_loss": 2.0528974533081055, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.505, "eval_college_mathematics_steps_per_second": 3.753, "step": 8000 }, { "epoch": 3.329171868497711, "eval_international_law_loss": 3.065342903137207, "eval_international_law_runtime": 0.2694, "eval_international_law_samples_per_second": 7.423, "eval_international_law_steps_per_second": 3.712, "step": 8000 }, { "epoch": 3.3374947981689553, "grad_norm": 0.3203125, "learning_rate": 4.6630178650648005e-06, "loss": 0.226, "step": 8020 }, { "epoch": 3.3458177278402, "grad_norm": 0.28515625, "learning_rate": 4.658909251953023e-06, "loss": 0.2341, "step": 8040 }, { "epoch": 3.354140657511444, "grad_norm": 0.2578125, "learning_rate": 4.654777576349822e-06, "loss": 0.232, "step": 8060 }, { "epoch": 3.3624635871826882, "grad_norm": 0.271484375, "learning_rate": 4.650622882391713e-06, "loss": 0.2266, "step": 8080 }, { "epoch": 3.370786516853933, "grad_norm": 0.271484375, "learning_rate": 4.646445214461105e-06, "loss": 0.2328, "step": 8100 }, { "epoch": 3.370786516853933, "eval_main_loss": 0.23838673532009125, "eval_main_runtime": 6.3504, "eval_main_samples_per_second": 29.919, "eval_main_steps_per_second": 3.779, "step": 8100 }, { "epoch": 3.370786516853933, "eval_anatomy_loss": 2.8322196006774902, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.518, "eval_anatomy_steps_per_second": 3.759, "step": 8100 }, { "epoch": 3.370786516853933, "eval_college_mathematics_loss": 2.0516698360443115, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.499, "eval_college_mathematics_steps_per_second": 3.75, "step": 8100 }, { "epoch": 3.370786516853933, "eval_international_law_loss": 3.0663843154907227, "eval_international_law_runtime": 0.2677, "eval_international_law_samples_per_second": 7.47, "eval_international_law_steps_per_second": 3.735, "step": 8100 }, { "epoch": 3.379109446525177, "grad_norm": 0.30078125, "learning_rate": 4.642244617185827e-06, "loss": 0.231, "step": 8120 }, { "epoch": 3.387432376196421, "grad_norm": 0.267578125, "learning_rate": 4.6380211354386475e-06, "loss": 0.2312, "step": 8140 }, { "epoch": 3.3957553058676653, "grad_norm": 0.263671875, "learning_rate": 4.633774814336801e-06, "loss": 0.2321, "step": 8160 }, { "epoch": 3.40407823553891, "grad_norm": 0.333984375, "learning_rate": 4.6295056992415026e-06, "loss": 0.2298, "step": 8180 }, { "epoch": 3.412401165210154, "grad_norm": 0.3125, "learning_rate": 4.625213835757458e-06, "loss": 0.2348, "step": 8200 }, { "epoch": 3.412401165210154, "eval_main_loss": 0.23835770785808563, "eval_main_runtime": 6.3288, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 8200 }, { "epoch": 3.412401165210154, "eval_anatomy_loss": 2.8323183059692383, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.531, "eval_anatomy_steps_per_second": 3.766, "step": 8200 }, { "epoch": 3.412401165210154, "eval_college_mathematics_loss": 2.0520193576812744, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.474, "eval_college_mathematics_steps_per_second": 3.737, "step": 8200 }, { "epoch": 3.412401165210154, "eval_international_law_loss": 3.0666942596435547, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.502, "eval_international_law_steps_per_second": 3.751, "step": 8200 }, { "epoch": 3.420724094881398, "grad_norm": 0.263671875, "learning_rate": 4.62089926973239e-06, "loss": 0.2312, "step": 8220 }, { "epoch": 3.4290470245526423, "grad_norm": 0.267578125, "learning_rate": 4.616562047256536e-06, "loss": 0.23, "step": 8240 }, { "epoch": 3.437369954223887, "grad_norm": 0.28125, "learning_rate": 4.612202214662161e-06, "loss": 0.2325, "step": 8260 }, { "epoch": 3.445692883895131, "grad_norm": 0.3046875, "learning_rate": 4.6078198185230605e-06, "loss": 0.2323, "step": 8280 }, { "epoch": 3.454015813566375, "grad_norm": 0.28125, "learning_rate": 4.603414905654069e-06, "loss": 0.2306, "step": 8300 }, { "epoch": 3.454015813566375, "eval_main_loss": 0.23845936357975006, "eval_main_runtime": 6.3281, "eval_main_samples_per_second": 30.025, "eval_main_steps_per_second": 3.793, "step": 8300 }, { "epoch": 3.454015813566375, "eval_anatomy_loss": 2.8332934379577637, "eval_anatomy_runtime": 0.2651, "eval_anatomy_samples_per_second": 7.543, "eval_anatomy_steps_per_second": 3.772, "step": 8300 }, { "epoch": 3.454015813566375, "eval_college_mathematics_loss": 2.0525290966033936, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.524, "eval_college_mathematics_steps_per_second": 3.762, "step": 8300 }, { "epoch": 3.454015813566375, "eval_international_law_loss": 3.0639939308166504, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.508, "eval_international_law_steps_per_second": 3.754, "step": 8300 }, { "epoch": 3.46233874323762, "grad_norm": 0.2734375, "learning_rate": 4.5989875231105514e-06, "loss": 0.2328, "step": 8320 }, { "epoch": 3.470661672908864, "grad_norm": 0.25390625, "learning_rate": 4.594537718187906e-06, "loss": 0.2274, "step": 8340 }, { "epoch": 3.478984602580108, "grad_norm": 0.287109375, "learning_rate": 4.590065538421056e-06, "loss": 0.2304, "step": 8360 }, { "epoch": 3.4873075322513527, "grad_norm": 0.255859375, "learning_rate": 4.585571031583946e-06, "loss": 0.2292, "step": 8380 }, { "epoch": 3.495630461922597, "grad_norm": 0.2734375, "learning_rate": 4.581054245689026e-06, "loss": 0.2309, "step": 8400 }, { "epoch": 3.495630461922597, "eval_main_loss": 0.23835162818431854, "eval_main_runtime": 6.3575, "eval_main_samples_per_second": 29.886, "eval_main_steps_per_second": 3.775, "step": 8400 }, { "epoch": 3.495630461922597, "eval_anatomy_loss": 2.8338778018951416, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.516, "eval_anatomy_steps_per_second": 3.758, "step": 8400 }, { "epoch": 3.495630461922597, "eval_college_mathematics_loss": 2.053218126296997, "eval_college_mathematics_runtime": 0.2688, "eval_college_mathematics_samples_per_second": 7.442, "eval_college_mathematics_steps_per_second": 3.721, "step": 8400 }, { "epoch": 3.495630461922597, "eval_international_law_loss": 3.065638303756714, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.508, "eval_international_law_steps_per_second": 3.754, "step": 8400 }, { "epoch": 3.503953391593841, "grad_norm": 0.30859375, "learning_rate": 4.576515228986743e-06, "loss": 0.235, "step": 8420 }, { "epoch": 3.512276321265085, "grad_norm": 0.328125, "learning_rate": 4.571954029965024e-06, "loss": 0.2322, "step": 8440 }, { "epoch": 3.5205992509363297, "grad_norm": 0.322265625, "learning_rate": 4.567370697348759e-06, "loss": 0.2353, "step": 8460 }, { "epoch": 3.528922180607574, "grad_norm": 0.27734375, "learning_rate": 4.5627652800992765e-06, "loss": 0.2312, "step": 8480 }, { "epoch": 3.537245110278818, "grad_norm": 0.283203125, "learning_rate": 4.558137827413825e-06, "loss": 0.2317, "step": 8500 }, { "epoch": 3.537245110278818, "eval_main_loss": 0.23841607570648193, "eval_main_runtime": 6.3538, "eval_main_samples_per_second": 29.903, "eval_main_steps_per_second": 3.777, "step": 8500 }, { "epoch": 3.537245110278818, "eval_anatomy_loss": 2.83097505569458, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.528, "eval_anatomy_steps_per_second": 3.764, "step": 8500 }, { "epoch": 3.537245110278818, "eval_college_mathematics_loss": 2.0496573448181152, "eval_college_mathematics_runtime": 0.2674, "eval_college_mathematics_samples_per_second": 7.48, "eval_college_mathematics_steps_per_second": 3.74, "step": 8500 }, { "epoch": 3.537245110278818, "eval_international_law_loss": 3.065972089767456, "eval_international_law_runtime": 0.2675, "eval_international_law_samples_per_second": 7.475, "eval_international_law_steps_per_second": 3.738, "step": 8500 }, { "epoch": 3.545568039950062, "grad_norm": 0.28515625, "learning_rate": 4.5534883887250495e-06, "loss": 0.235, "step": 8520 }, { "epoch": 3.553890969621307, "grad_norm": 0.28125, "learning_rate": 4.548817013700454e-06, "loss": 0.2341, "step": 8540 }, { "epoch": 3.562213899292551, "grad_norm": 0.294921875, "learning_rate": 4.5441237522418804e-06, "loss": 0.2295, "step": 8560 }, { "epoch": 3.570536828963795, "grad_norm": 0.302734375, "learning_rate": 4.53940865448497e-06, "loss": 0.2318, "step": 8580 }, { "epoch": 3.5788597586350397, "grad_norm": 0.30078125, "learning_rate": 4.534671770798633e-06, "loss": 0.2298, "step": 8600 }, { "epoch": 3.5788597586350397, "eval_main_loss": 0.23844130337238312, "eval_main_runtime": 6.3401, "eval_main_samples_per_second": 29.968, "eval_main_steps_per_second": 3.785, "step": 8600 }, { "epoch": 3.5788597586350397, "eval_anatomy_loss": 2.8311820030212402, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.482, "eval_anatomy_steps_per_second": 3.741, "step": 8600 }, { "epoch": 3.5788597586350397, "eval_college_mathematics_loss": 2.051837921142578, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.517, "eval_college_mathematics_steps_per_second": 3.758, "step": 8600 }, { "epoch": 3.5788597586350397, "eval_international_law_loss": 3.0655837059020996, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.503, "eval_international_law_steps_per_second": 3.751, "step": 8600 }, { "epoch": 3.587182688306284, "grad_norm": 0.26953125, "learning_rate": 4.529913151784504e-06, "loss": 0.2311, "step": 8620 }, { "epoch": 3.595505617977528, "grad_norm": 0.294921875, "learning_rate": 4.525132848276405e-06, "loss": 0.2341, "step": 8640 }, { "epoch": 3.6038285476487726, "grad_norm": 0.296875, "learning_rate": 4.520330911339805e-06, "loss": 0.2343, "step": 8660 }, { "epoch": 3.6121514773200167, "grad_norm": 0.28515625, "learning_rate": 4.5155073922712665e-06, "loss": 0.2324, "step": 8680 }, { "epoch": 3.620474406991261, "grad_norm": 0.2119140625, "learning_rate": 4.510662342597907e-06, "loss": 0.2335, "step": 8700 }, { "epoch": 3.620474406991261, "eval_main_loss": 0.23842211067676544, "eval_main_runtime": 6.3337, "eval_main_samples_per_second": 29.998, "eval_main_steps_per_second": 3.789, "step": 8700 }, { "epoch": 3.620474406991261, "eval_anatomy_loss": 2.8317272663116455, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.531, "eval_anatomy_steps_per_second": 3.765, "step": 8700 }, { "epoch": 3.620474406991261, "eval_college_mathematics_loss": 2.0518221855163574, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.515, "eval_college_mathematics_steps_per_second": 3.758, "step": 8700 }, { "epoch": 3.620474406991261, "eval_international_law_loss": 3.06583571434021, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.492, "eval_international_law_steps_per_second": 3.746, "step": 8700 }, { "epoch": 3.628797336662505, "grad_norm": 0.263671875, "learning_rate": 4.505795814076842e-06, "loss": 0.2318, "step": 8720 }, { "epoch": 3.6371202663337496, "grad_norm": 0.2578125, "learning_rate": 4.5009078586946355e-06, "loss": 0.2333, "step": 8740 }, { "epoch": 3.645443196004994, "grad_norm": 0.279296875, "learning_rate": 4.495998528666741e-06, "loss": 0.2311, "step": 8760 }, { "epoch": 3.653766125676238, "grad_norm": 0.259765625, "learning_rate": 4.491067876436949e-06, "loss": 0.2322, "step": 8780 }, { "epoch": 3.662089055347482, "grad_norm": 0.318359375, "learning_rate": 4.486115954676821e-06, "loss": 0.2342, "step": 8800 }, { "epoch": 3.662089055347482, "eval_main_loss": 0.23842394351959229, "eval_main_runtime": 6.3208, "eval_main_samples_per_second": 30.06, "eval_main_steps_per_second": 3.797, "step": 8800 }, { "epoch": 3.662089055347482, "eval_anatomy_loss": 2.8346548080444336, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.526, "eval_anatomy_steps_per_second": 3.763, "step": 8800 }, { "epoch": 3.662089055347482, "eval_college_mathematics_loss": 2.0510027408599854, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.526, "eval_college_mathematics_steps_per_second": 3.763, "step": 8800 }, { "epoch": 3.662089055347482, "eval_international_law_loss": 3.0672590732574463, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.493, "eval_international_law_steps_per_second": 3.747, "step": 8800 }, { "epoch": 3.6704119850187267, "grad_norm": 0.25390625, "learning_rate": 4.481142816285133e-06, "loss": 0.2331, "step": 8820 }, { "epoch": 3.678734914689971, "grad_norm": 0.2275390625, "learning_rate": 4.476148514387305e-06, "loss": 0.2343, "step": 8840 }, { "epoch": 3.687057844361215, "grad_norm": 0.267578125, "learning_rate": 4.471133102334836e-06, "loss": 0.2323, "step": 8860 }, { "epoch": 3.6953807740324596, "grad_norm": 0.310546875, "learning_rate": 4.4660966337047325e-06, "loss": 0.2326, "step": 8880 }, { "epoch": 3.7037037037037037, "grad_norm": 0.26953125, "learning_rate": 4.46103916229894e-06, "loss": 0.235, "step": 8900 }, { "epoch": 3.7037037037037037, "eval_main_loss": 0.2383807897567749, "eval_main_runtime": 6.326, "eval_main_samples_per_second": 30.035, "eval_main_steps_per_second": 3.794, "step": 8900 }, { "epoch": 3.7037037037037037, "eval_anatomy_loss": 2.830998420715332, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.531, "eval_anatomy_steps_per_second": 3.765, "step": 8900 }, { "epoch": 3.7037037037037037, "eval_college_mathematics_loss": 2.051891803741455, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.53, "eval_college_mathematics_steps_per_second": 3.765, "step": 8900 }, { "epoch": 3.7037037037037037, "eval_international_law_loss": 3.0654711723327637, "eval_international_law_runtime": 0.2652, "eval_international_law_samples_per_second": 7.541, "eval_international_law_steps_per_second": 3.77, "step": 8900 }, { "epoch": 3.712026633374948, "grad_norm": 0.26171875, "learning_rate": 4.455960742143762e-06, "loss": 0.2316, "step": 8920 }, { "epoch": 3.7203495630461925, "grad_norm": 0.2373046875, "learning_rate": 4.450861427489291e-06, "loss": 0.2319, "step": 8940 }, { "epoch": 3.7286724927174366, "grad_norm": 0.248046875, "learning_rate": 4.445741272808821e-06, "loss": 0.2296, "step": 8960 }, { "epoch": 3.7369954223886808, "grad_norm": 0.2275390625, "learning_rate": 4.440600332798269e-06, "loss": 0.2317, "step": 8980 }, { "epoch": 3.7453183520599254, "grad_norm": 0.28515625, "learning_rate": 4.435438662375593e-06, "loss": 0.237, "step": 9000 }, { "epoch": 3.7453183520599254, "eval_main_loss": 0.23835314810276031, "eval_main_runtime": 6.3284, "eval_main_samples_per_second": 30.023, "eval_main_steps_per_second": 3.792, "step": 9000 }, { "epoch": 3.7453183520599254, "eval_anatomy_loss": 2.832582473754883, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.509, "eval_anatomy_steps_per_second": 3.755, "step": 9000 }, { "epoch": 3.7453183520599254, "eval_college_mathematics_loss": 2.0516104698181152, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.526, "eval_college_mathematics_steps_per_second": 3.763, "step": 9000 }, { "epoch": 3.7453183520599254, "eval_international_law_loss": 3.0663774013519287, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.758, "step": 9000 }, { "epoch": 3.7536412817311695, "grad_norm": 0.2412109375, "learning_rate": 4.430256316680201e-06, "loss": 0.2296, "step": 9020 }, { "epoch": 3.7619642114024137, "grad_norm": 0.248046875, "learning_rate": 4.425053351072365e-06, "loss": 0.2348, "step": 9040 }, { "epoch": 3.770287141073658, "grad_norm": 0.294921875, "learning_rate": 4.419829821132629e-06, "loss": 0.2318, "step": 9060 }, { "epoch": 3.778610070744902, "grad_norm": 0.306640625, "learning_rate": 4.414585782661215e-06, "loss": 0.2311, "step": 9080 }, { "epoch": 3.7869330004161466, "grad_norm": 0.2890625, "learning_rate": 4.4093212916774245e-06, "loss": 0.2342, "step": 9100 }, { "epoch": 3.7869330004161466, "eval_main_loss": 0.23832525312900543, "eval_main_runtime": 6.3321, "eval_main_samples_per_second": 30.006, "eval_main_steps_per_second": 3.79, "step": 9100 }, { "epoch": 3.7869330004161466, "eval_anatomy_loss": 2.83284854888916, "eval_anatomy_runtime": 0.2678, "eval_anatomy_samples_per_second": 7.468, "eval_anatomy_steps_per_second": 3.734, "step": 9100 }, { "epoch": 3.7869330004161466, "eval_college_mathematics_loss": 2.0502493381500244, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.526, "eval_college_mathematics_steps_per_second": 3.763, "step": 9100 }, { "epoch": 3.7869330004161466, "eval_international_law_loss": 3.0675525665283203, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.507, "eval_international_law_steps_per_second": 3.754, "step": 9100 }, { "epoch": 3.7952559300873907, "grad_norm": 0.279296875, "learning_rate": 4.404036404419045e-06, "loss": 0.2319, "step": 9120 }, { "epoch": 3.803578859758635, "grad_norm": 0.314453125, "learning_rate": 4.398731177341747e-06, "loss": 0.2277, "step": 9140 }, { "epoch": 3.8119017894298795, "grad_norm": 0.2734375, "learning_rate": 4.39340566711848e-06, "loss": 0.2322, "step": 9160 }, { "epoch": 3.8202247191011236, "grad_norm": 0.279296875, "learning_rate": 4.388059930638865e-06, "loss": 0.2358, "step": 9180 }, { "epoch": 3.8285476487723678, "grad_norm": 0.271484375, "learning_rate": 4.3826940250085925e-06, "loss": 0.2254, "step": 9200 }, { "epoch": 3.8285476487723678, "eval_main_loss": 0.2383318841457367, "eval_main_runtime": 6.3299, "eval_main_samples_per_second": 30.016, "eval_main_steps_per_second": 3.792, "step": 9200 }, { "epoch": 3.8285476487723678, "eval_anatomy_loss": 2.833568811416626, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 9200 }, { "epoch": 3.8285476487723678, "eval_college_mathematics_loss": 2.055227756500244, "eval_college_mathematics_runtime": 0.2648, "eval_college_mathematics_samples_per_second": 7.554, "eval_college_mathematics_steps_per_second": 3.777, "step": 9200 }, { "epoch": 3.8285476487723678, "eval_international_law_loss": 3.0670459270477295, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.516, "eval_international_law_steps_per_second": 3.758, "step": 9200 }, { "epoch": 3.8368705784436123, "grad_norm": 0.298828125, "learning_rate": 4.377308007548809e-06, "loss": 0.2281, "step": 9220 }, { "epoch": 3.8451935081148565, "grad_norm": 0.26171875, "learning_rate": 4.371901935795504e-06, "loss": 0.2323, "step": 9240 }, { "epoch": 3.8535164377861006, "grad_norm": 0.2119140625, "learning_rate": 4.3664758674988984e-06, "loss": 0.228, "step": 9260 }, { "epoch": 3.8618393674573452, "grad_norm": 0.2490234375, "learning_rate": 4.361029860622822e-06, "loss": 0.2315, "step": 9280 }, { "epoch": 3.8701622971285894, "grad_norm": 0.263671875, "learning_rate": 4.355563973344104e-06, "loss": 0.231, "step": 9300 }, { "epoch": 3.8701622971285894, "eval_main_loss": 0.23839576542377472, "eval_main_runtime": 6.3285, "eval_main_samples_per_second": 30.023, "eval_main_steps_per_second": 3.792, "step": 9300 }, { "epoch": 3.8701622971285894, "eval_anatomy_loss": 2.8333206176757812, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.516, "eval_anatomy_steps_per_second": 3.758, "step": 9300 }, { "epoch": 3.8701622971285894, "eval_college_mathematics_loss": 2.048659563064575, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.5, "eval_college_mathematics_steps_per_second": 3.75, "step": 9300 }, { "epoch": 3.8701622971285894, "eval_international_law_loss": 3.0655484199523926, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.508, "eval_international_law_steps_per_second": 3.754, "step": 9300 }, { "epoch": 3.8784852267998335, "grad_norm": 0.2373046875, "learning_rate": 4.3500782640519375e-06, "loss": 0.2261, "step": 9320 }, { "epoch": 3.8868081564710777, "grad_norm": 0.294921875, "learning_rate": 4.344572791347272e-06, "loss": 0.2274, "step": 9340 }, { "epoch": 3.895131086142322, "grad_norm": 0.302734375, "learning_rate": 4.339047614042172e-06, "loss": 0.2316, "step": 9360 }, { "epoch": 3.9034540158135664, "grad_norm": 0.33203125, "learning_rate": 4.3335027911592004e-06, "loss": 0.23, "step": 9380 }, { "epoch": 3.9117769454848106, "grad_norm": 0.30078125, "learning_rate": 4.327938381930782e-06, "loss": 0.2318, "step": 9400 }, { "epoch": 3.9117769454848106, "eval_main_loss": 0.23833923041820526, "eval_main_runtime": 6.3193, "eval_main_samples_per_second": 30.067, "eval_main_steps_per_second": 3.798, "step": 9400 }, { "epoch": 3.9117769454848106, "eval_anatomy_loss": 2.8319694995880127, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.52, "eval_anatomy_steps_per_second": 3.76, "step": 9400 }, { "epoch": 3.9117769454848106, "eval_college_mathematics_loss": 2.0500192642211914, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.502, "eval_college_mathematics_steps_per_second": 3.751, "step": 9400 }, { "epoch": 3.9117769454848106, "eval_international_law_loss": 3.0644402503967285, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.486, "eval_international_law_steps_per_second": 3.743, "step": 9400 }, { "epoch": 3.9200998751560547, "grad_norm": 0.271484375, "learning_rate": 4.3223544457985735e-06, "loss": 0.2315, "step": 9420 }, { "epoch": 3.9284228048272993, "grad_norm": 0.2890625, "learning_rate": 4.316751042412824e-06, "loss": 0.2362, "step": 9440 }, { "epoch": 3.9367457344985435, "grad_norm": 0.271484375, "learning_rate": 4.311128231631745e-06, "loss": 0.2343, "step": 9460 }, { "epoch": 3.9450686641697876, "grad_norm": 0.328125, "learning_rate": 4.305486073520865e-06, "loss": 0.2349, "step": 9480 }, { "epoch": 3.9533915938410322, "grad_norm": 0.275390625, "learning_rate": 4.299824628352387e-06, "loss": 0.2304, "step": 9500 }, { "epoch": 3.9533915938410322, "eval_main_loss": 0.2382751852273941, "eval_main_runtime": 6.3262, "eval_main_samples_per_second": 30.034, "eval_main_steps_per_second": 3.794, "step": 9500 }, { "epoch": 3.9533915938410322, "eval_anatomy_loss": 2.832981586456299, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.518, "eval_anatomy_steps_per_second": 3.759, "step": 9500 }, { "epoch": 3.9533915938410322, "eval_college_mathematics_loss": 2.0524649620056152, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.484, "eval_college_mathematics_steps_per_second": 3.742, "step": 9500 }, { "epoch": 3.9533915938410322, "eval_international_law_loss": 3.0676116943359375, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.525, "eval_international_law_steps_per_second": 3.763, "step": 9500 }, { "epoch": 3.9617145235122764, "grad_norm": 0.3125, "learning_rate": 4.2941439566045536e-06, "loss": 0.2308, "step": 9520 }, { "epoch": 3.9700374531835205, "grad_norm": 0.291015625, "learning_rate": 4.2884441189609915e-06, "loss": 0.2317, "step": 9540 }, { "epoch": 3.978360382854765, "grad_norm": 0.37109375, "learning_rate": 4.282725176310065e-06, "loss": 0.2318, "step": 9560 }, { "epoch": 3.9866833125260093, "grad_norm": 0.26171875, "learning_rate": 4.27698718974423e-06, "loss": 0.2343, "step": 9580 }, { "epoch": 3.9950062421972534, "grad_norm": 0.267578125, "learning_rate": 4.271230220559378e-06, "loss": 0.2294, "step": 9600 }, { "epoch": 3.9950062421972534, "eval_main_loss": 0.23832967877388, "eval_main_runtime": 6.3277, "eval_main_samples_per_second": 30.027, "eval_main_steps_per_second": 3.793, "step": 9600 }, { "epoch": 3.9950062421972534, "eval_anatomy_loss": 2.8348305225372314, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 9600 }, { "epoch": 3.9950062421972534, "eval_college_mathematics_loss": 2.0557780265808105, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.762, "step": 9600 }, { "epoch": 3.9950062421972534, "eval_international_law_loss": 3.0656328201293945, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.53, "eval_international_law_steps_per_second": 3.765, "step": 9600 }, { "epoch": 4.003329171868498, "grad_norm": 0.27734375, "learning_rate": 4.2654543302541796e-06, "loss": 0.2352, "step": 9620 }, { "epoch": 4.011652101539742, "grad_norm": 0.296875, "learning_rate": 4.259659580529432e-06, "loss": 0.2334, "step": 9640 }, { "epoch": 4.019975031210986, "grad_norm": 0.2412109375, "learning_rate": 4.253846033287398e-06, "loss": 0.2308, "step": 9660 }, { "epoch": 4.028297960882231, "grad_norm": 0.314453125, "learning_rate": 4.248013750631143e-06, "loss": 0.2353, "step": 9680 }, { "epoch": 4.036620890553475, "grad_norm": 0.26953125, "learning_rate": 4.242162794863872e-06, "loss": 0.2312, "step": 9700 }, { "epoch": 4.036620890553475, "eval_main_loss": 0.2384222000837326, "eval_main_runtime": 6.3303, "eval_main_samples_per_second": 30.014, "eval_main_steps_per_second": 3.791, "step": 9700 }, { "epoch": 4.036620890553475, "eval_anatomy_loss": 2.832493305206299, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.525, "eval_anatomy_steps_per_second": 3.763, "step": 9700 }, { "epoch": 4.036620890553475, "eval_college_mathematics_loss": 2.0524394512176514, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 9700 }, { "epoch": 4.036620890553475, "eval_international_law_loss": 3.0663318634033203, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.525, "eval_international_law_steps_per_second": 3.762, "step": 9700 }, { "epoch": 4.044943820224719, "grad_norm": 0.298828125, "learning_rate": 4.236293228488267e-06, "loss": 0.2306, "step": 9720 }, { "epoch": 4.053266749895964, "grad_norm": 0.298828125, "learning_rate": 4.23040511420582e-06, "loss": 0.2331, "step": 9740 }, { "epoch": 4.0615896795672075, "grad_norm": 0.30859375, "learning_rate": 4.224498514916152e-06, "loss": 0.2312, "step": 9760 }, { "epoch": 4.069912609238452, "grad_norm": 0.287109375, "learning_rate": 4.218573493716359e-06, "loss": 0.2321, "step": 9780 }, { "epoch": 4.078235538909696, "grad_norm": 0.30078125, "learning_rate": 4.212630113900322e-06, "loss": 0.2333, "step": 9800 }, { "epoch": 4.078235538909696, "eval_main_loss": 0.23829525709152222, "eval_main_runtime": 6.3313, "eval_main_samples_per_second": 30.01, "eval_main_steps_per_second": 3.791, "step": 9800 }, { "epoch": 4.078235538909696, "eval_anatomy_loss": 2.8339943885803223, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.757, "step": 9800 }, { "epoch": 4.078235538909696, "eval_college_mathematics_loss": 2.0519936084747314, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.53, "eval_college_mathematics_steps_per_second": 3.765, "step": 9800 }, { "epoch": 4.078235538909696, "eval_international_law_loss": 3.06636905670166, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.494, "eval_international_law_steps_per_second": 3.747, "step": 9800 }, { "epoch": 4.08655846858094, "grad_norm": 0.287109375, "learning_rate": 4.206668438958042e-06, "loss": 0.2325, "step": 9820 }, { "epoch": 4.094881398252185, "grad_norm": 0.294921875, "learning_rate": 4.200688532574952e-06, "loss": 0.2294, "step": 9840 }, { "epoch": 4.103204327923429, "grad_norm": 0.275390625, "learning_rate": 4.1946904586312485e-06, "loss": 0.2326, "step": 9860 }, { "epoch": 4.111527257594673, "grad_norm": 0.2578125, "learning_rate": 4.188674281201198e-06, "loss": 0.2348, "step": 9880 }, { "epoch": 4.119850187265918, "grad_norm": 0.333984375, "learning_rate": 4.182640064552456e-06, "loss": 0.2324, "step": 9900 }, { "epoch": 4.119850187265918, "eval_main_loss": 0.2383279949426651, "eval_main_runtime": 6.3331, "eval_main_samples_per_second": 30.001, "eval_main_steps_per_second": 3.79, "step": 9900 }, { "epoch": 4.119850187265918, "eval_anatomy_loss": 2.8347108364105225, "eval_anatomy_runtime": 0.268, "eval_anatomy_samples_per_second": 7.464, "eval_anatomy_steps_per_second": 3.732, "step": 9900 }, { "epoch": 4.119850187265918, "eval_college_mathematics_loss": 2.053701877593994, "eval_college_mathematics_runtime": 0.266, "eval_college_mathematics_samples_per_second": 7.519, "eval_college_mathematics_steps_per_second": 3.759, "step": 9900 }, { "epoch": 4.119850187265918, "eval_international_law_loss": 3.0677242279052734, "eval_international_law_runtime": 0.2678, "eval_international_law_samples_per_second": 7.469, "eval_international_law_steps_per_second": 3.735, "step": 9900 }, { "epoch": 4.128173116937162, "grad_norm": 0.244140625, "learning_rate": 4.176587873145386e-06, "loss": 0.2329, "step": 9920 }, { "epoch": 4.136496046608406, "grad_norm": 0.3046875, "learning_rate": 4.170517771632362e-06, "loss": 0.2316, "step": 9940 }, { "epoch": 4.144818976279651, "grad_norm": 0.2451171875, "learning_rate": 4.164429824857086e-06, "loss": 0.2336, "step": 9960 }, { "epoch": 4.1531419059508945, "grad_norm": 0.255859375, "learning_rate": 4.158324097853887e-06, "loss": 0.2361, "step": 9980 }, { "epoch": 4.161464835622139, "grad_norm": 0.26953125, "learning_rate": 4.1522006558470365e-06, "loss": 0.2359, "step": 10000 }, { "epoch": 4.161464835622139, "eval_main_loss": 0.23838593065738678, "eval_main_runtime": 6.3323, "eval_main_samples_per_second": 30.005, "eval_main_steps_per_second": 3.79, "step": 10000 }, { "epoch": 4.161464835622139, "eval_anatomy_loss": 2.8330607414245605, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.488, "eval_anatomy_steps_per_second": 3.744, "step": 10000 }, { "epoch": 4.161464835622139, "eval_college_mathematics_loss": 2.04879093170166, "eval_college_mathematics_runtime": 0.2649, "eval_college_mathematics_samples_per_second": 7.551, "eval_college_mathematics_steps_per_second": 3.776, "step": 10000 }, { "epoch": 4.161464835622139, "eval_international_law_loss": 3.0646615028381348, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 10000 }, { "epoch": 4.169787765293384, "grad_norm": 0.322265625, "learning_rate": 4.146059564250041e-06, "loss": 0.2326, "step": 10020 }, { "epoch": 4.178110694964627, "grad_norm": 0.2890625, "learning_rate": 4.13990088866495e-06, "loss": 0.2336, "step": 10040 }, { "epoch": 4.186433624635872, "grad_norm": 0.310546875, "learning_rate": 4.133724694881655e-06, "loss": 0.2314, "step": 10060 }, { "epoch": 4.194756554307116, "grad_norm": 0.306640625, "learning_rate": 4.1275310488771855e-06, "loss": 0.2286, "step": 10080 }, { "epoch": 4.20307948397836, "grad_norm": 0.322265625, "learning_rate": 4.1213200168149994e-06, "loss": 0.2337, "step": 10100 }, { "epoch": 4.20307948397836, "eval_main_loss": 0.23833875358104706, "eval_main_runtime": 6.3313, "eval_main_samples_per_second": 30.01, "eval_main_steps_per_second": 3.791, "step": 10100 }, { "epoch": 4.20307948397836, "eval_anatomy_loss": 2.8343758583068848, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.511, "eval_anatomy_steps_per_second": 3.755, "step": 10100 }, { "epoch": 4.20307948397836, "eval_college_mathematics_loss": 2.0506842136383057, "eval_college_mathematics_runtime": 0.2649, "eval_college_mathematics_samples_per_second": 7.551, "eval_college_mathematics_steps_per_second": 3.775, "step": 10100 }, { "epoch": 4.20307948397836, "eval_international_law_loss": 3.0656676292419434, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.491, "eval_international_law_steps_per_second": 3.745, "step": 10100 }, { "epoch": 4.211402413649605, "grad_norm": 0.294921875, "learning_rate": 4.115091665044284e-06, "loss": 0.2332, "step": 10120 }, { "epoch": 4.219725343320849, "grad_norm": 0.265625, "learning_rate": 4.108846060099246e-06, "loss": 0.2285, "step": 10140 }, { "epoch": 4.228048272992093, "grad_norm": 0.3046875, "learning_rate": 4.102583268698393e-06, "loss": 0.2258, "step": 10160 }, { "epoch": 4.236371202663338, "grad_norm": 0.328125, "learning_rate": 4.096303357743834e-06, "loss": 0.2307, "step": 10180 }, { "epoch": 4.2446941323345815, "grad_norm": 0.2890625, "learning_rate": 4.0900063943205485e-06, "loss": 0.232, "step": 10200 }, { "epoch": 4.2446941323345815, "eval_main_loss": 0.23833294212818146, "eval_main_runtime": 6.3324, "eval_main_samples_per_second": 30.004, "eval_main_steps_per_second": 3.79, "step": 10200 }, { "epoch": 4.2446941323345815, "eval_anatomy_loss": 2.8333046436309814, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.511, "eval_anatomy_steps_per_second": 3.756, "step": 10200 }, { "epoch": 4.2446941323345815, "eval_college_mathematics_loss": 2.0533056259155273, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.487, "eval_college_mathematics_steps_per_second": 3.743, "step": 10200 }, { "epoch": 4.2446941323345815, "eval_international_law_loss": 3.0654220581054688, "eval_international_law_runtime": 0.2655, "eval_international_law_samples_per_second": 7.533, "eval_international_law_steps_per_second": 3.766, "step": 10200 }, { "epoch": 4.253017062005826, "grad_norm": 0.263671875, "learning_rate": 4.083692445695686e-06, "loss": 0.2346, "step": 10220 }, { "epoch": 4.261339991677071, "grad_norm": 0.2265625, "learning_rate": 4.077361579317835e-06, "loss": 0.2333, "step": 10240 }, { "epoch": 4.269662921348314, "grad_norm": 0.2734375, "learning_rate": 4.071013862816311e-06, "loss": 0.2304, "step": 10260 }, { "epoch": 4.277985851019559, "grad_norm": 0.2578125, "learning_rate": 4.064649364000429e-06, "loss": 0.235, "step": 10280 }, { "epoch": 4.286308780690804, "grad_norm": 0.251953125, "learning_rate": 4.058268150858779e-06, "loss": 0.2325, "step": 10300 }, { "epoch": 4.286308780690804, "eval_main_loss": 0.23827138543128967, "eval_main_runtime": 6.325, "eval_main_samples_per_second": 30.039, "eval_main_steps_per_second": 3.794, "step": 10300 }, { "epoch": 4.286308780690804, "eval_anatomy_loss": 2.832484006881714, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.509, "eval_anatomy_steps_per_second": 3.754, "step": 10300 }, { "epoch": 4.286308780690804, "eval_college_mathematics_loss": 2.050703287124634, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.512, "eval_college_mathematics_steps_per_second": 3.756, "step": 10300 }, { "epoch": 4.286308780690804, "eval_international_law_loss": 3.06673002243042, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.536, "eval_international_law_steps_per_second": 3.768, "step": 10300 }, { "epoch": 4.294631710362047, "grad_norm": 0.25390625, "learning_rate": 4.051870291558505e-06, "loss": 0.2288, "step": 10320 }, { "epoch": 4.302954640033292, "grad_norm": 0.28125, "learning_rate": 4.045455854444569e-06, "loss": 0.232, "step": 10340 }, { "epoch": 4.3112775697045365, "grad_norm": 0.255859375, "learning_rate": 4.039024908039029e-06, "loss": 0.2337, "step": 10360 }, { "epoch": 4.31960049937578, "grad_norm": 0.3125, "learning_rate": 4.0325775210402995e-06, "loss": 0.2343, "step": 10380 }, { "epoch": 4.327923429047025, "grad_norm": 0.267578125, "learning_rate": 4.026113762322423e-06, "loss": 0.2309, "step": 10400 }, { "epoch": 4.327923429047025, "eval_main_loss": 0.23835839331150055, "eval_main_runtime": 6.3532, "eval_main_samples_per_second": 29.906, "eval_main_steps_per_second": 3.778, "step": 10400 }, { "epoch": 4.327923429047025, "eval_anatomy_loss": 2.8327348232269287, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 10400 }, { "epoch": 4.327923429047025, "eval_college_mathematics_loss": 2.053480625152588, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.487, "eval_college_mathematics_steps_per_second": 3.744, "step": 10400 }, { "epoch": 4.327923429047025, "eval_international_law_loss": 3.0650157928466797, "eval_international_law_runtime": 0.2677, "eval_international_law_samples_per_second": 7.472, "eval_international_law_steps_per_second": 3.736, "step": 10400 }, { "epoch": 4.3362463587182685, "grad_norm": 0.26953125, "learning_rate": 4.019633700934334e-06, "loss": 0.2329, "step": 10420 }, { "epoch": 4.344569288389513, "grad_norm": 0.302734375, "learning_rate": 4.013137406099117e-06, "loss": 0.2306, "step": 10440 }, { "epoch": 4.352892218060758, "grad_norm": 0.271484375, "learning_rate": 4.006624947213272e-06, "loss": 0.2314, "step": 10460 }, { "epoch": 4.361215147732001, "grad_norm": 0.259765625, "learning_rate": 4.000096393845968e-06, "loss": 0.2319, "step": 10480 }, { "epoch": 4.369538077403246, "grad_norm": 0.271484375, "learning_rate": 3.993551815738307e-06, "loss": 0.2279, "step": 10500 }, { "epoch": 4.369538077403246, "eval_main_loss": 0.23829132318496704, "eval_main_runtime": 6.3595, "eval_main_samples_per_second": 29.876, "eval_main_steps_per_second": 3.774, "step": 10500 }, { "epoch": 4.369538077403246, "eval_anatomy_loss": 2.8348493576049805, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.502, "eval_anatomy_steps_per_second": 3.751, "step": 10500 }, { "epoch": 4.369538077403246, "eval_college_mathematics_loss": 2.051111936569214, "eval_college_mathematics_runtime": 0.2673, "eval_college_mathematics_samples_per_second": 7.481, "eval_college_mathematics_steps_per_second": 3.741, "step": 10500 }, { "epoch": 4.369538077403246, "eval_international_law_loss": 3.0652830600738525, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.483, "eval_international_law_steps_per_second": 3.742, "step": 10500 }, { "epoch": 4.3778610070744906, "grad_norm": 0.267578125, "learning_rate": 3.9869912828025735e-06, "loss": 0.2286, "step": 10520 }, { "epoch": 4.386183936745734, "grad_norm": 0.3203125, "learning_rate": 3.980414865121486e-06, "loss": 0.2314, "step": 10540 }, { "epoch": 4.394506866416979, "grad_norm": 0.2578125, "learning_rate": 3.973822632947455e-06, "loss": 0.2295, "step": 10560 }, { "epoch": 4.4028297960882234, "grad_norm": 0.337890625, "learning_rate": 3.9672146567018275e-06, "loss": 0.2325, "step": 10580 }, { "epoch": 4.411152725759467, "grad_norm": 0.279296875, "learning_rate": 3.9605910069741375e-06, "loss": 0.2316, "step": 10600 }, { "epoch": 4.411152725759467, "eval_main_loss": 0.23840609192848206, "eval_main_runtime": 6.3547, "eval_main_samples_per_second": 29.899, "eval_main_steps_per_second": 3.777, "step": 10600 }, { "epoch": 4.411152725759467, "eval_anatomy_loss": 2.8325912952423096, "eval_anatomy_runtime": 0.2688, "eval_anatomy_samples_per_second": 7.442, "eval_anatomy_steps_per_second": 3.721, "step": 10600 }, { "epoch": 4.411152725759467, "eval_college_mathematics_loss": 2.0505118370056152, "eval_college_mathematics_runtime": 0.2654, "eval_college_mathematics_samples_per_second": 7.537, "eval_college_mathematics_steps_per_second": 3.769, "step": 10600 }, { "epoch": 4.411152725759467, "eval_international_law_loss": 3.0657410621643066, "eval_international_law_runtime": 0.2676, "eval_international_law_samples_per_second": 7.475, "eval_international_law_steps_per_second": 3.738, "step": 10600 }, { "epoch": 4.419475655430712, "grad_norm": 0.287109375, "learning_rate": 3.953951754521348e-06, "loss": 0.2281, "step": 10620 }, { "epoch": 4.4277985851019555, "grad_norm": 0.3046875, "learning_rate": 3.947296970267098e-06, "loss": 0.231, "step": 10640 }, { "epoch": 4.4361215147732, "grad_norm": 0.2197265625, "learning_rate": 3.940626725300949e-06, "loss": 0.2298, "step": 10660 }, { "epoch": 4.444444444444445, "grad_norm": 0.26953125, "learning_rate": 3.933941090877615e-06, "loss": 0.2314, "step": 10680 }, { "epoch": 4.452767374115688, "grad_norm": 0.287109375, "learning_rate": 3.927240138416212e-06, "loss": 0.234, "step": 10700 }, { "epoch": 4.452767374115688, "eval_main_loss": 0.2382432222366333, "eval_main_runtime": 6.321, "eval_main_samples_per_second": 30.059, "eval_main_steps_per_second": 3.797, "step": 10700 }, { "epoch": 4.452767374115688, "eval_anatomy_loss": 2.833085536956787, "eval_anatomy_runtime": 0.2641, "eval_anatomy_samples_per_second": 7.574, "eval_anatomy_steps_per_second": 3.787, "step": 10700 }, { "epoch": 4.452767374115688, "eval_college_mathematics_loss": 2.0491299629211426, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.494, "eval_college_mathematics_steps_per_second": 3.747, "step": 10700 }, { "epoch": 4.452767374115688, "eval_international_law_loss": 3.064594268798828, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.492, "eval_international_law_steps_per_second": 3.746, "step": 10700 }, { "epoch": 4.461090303786933, "grad_norm": 0.2734375, "learning_rate": 3.920523939499487e-06, "loss": 0.2293, "step": 10720 }, { "epoch": 4.4694132334581775, "grad_norm": 0.28125, "learning_rate": 3.913792565873061e-06, "loss": 0.2285, "step": 10740 }, { "epoch": 4.477736163129421, "grad_norm": 0.275390625, "learning_rate": 3.907046089444654e-06, "loss": 0.2337, "step": 10760 }, { "epoch": 4.486059092800666, "grad_norm": 0.34375, "learning_rate": 3.900284582283323e-06, "loss": 0.2325, "step": 10780 }, { "epoch": 4.49438202247191, "grad_norm": 0.369140625, "learning_rate": 3.8935081166186935e-06, "loss": 0.2343, "step": 10800 }, { "epoch": 4.49438202247191, "eval_main_loss": 0.23841159045696259, "eval_main_runtime": 6.3194, "eval_main_samples_per_second": 30.066, "eval_main_steps_per_second": 3.798, "step": 10800 }, { "epoch": 4.49438202247191, "eval_anatomy_loss": 2.8321523666381836, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.521, "eval_anatomy_steps_per_second": 3.76, "step": 10800 }, { "epoch": 4.49438202247191, "eval_college_mathematics_loss": 2.050726890563965, "eval_college_mathematics_runtime": 0.2654, "eval_college_mathematics_samples_per_second": 7.536, "eval_college_mathematics_steps_per_second": 3.768, "step": 10800 }, { "epoch": 4.49438202247191, "eval_international_law_loss": 3.0631279945373535, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.759, "step": 10800 }, { "epoch": 4.502704952143154, "grad_norm": 0.314453125, "learning_rate": 3.88671676484018e-06, "loss": 0.2325, "step": 10820 }, { "epoch": 4.511027881814399, "grad_norm": 0.322265625, "learning_rate": 3.87991059949622e-06, "loss": 0.2315, "step": 10840 }, { "epoch": 4.519350811485643, "grad_norm": 0.29296875, "learning_rate": 3.873089693293497e-06, "loss": 0.2325, "step": 10860 }, { "epoch": 4.527673741156887, "grad_norm": 0.25, "learning_rate": 3.866254119096161e-06, "loss": 0.2298, "step": 10880 }, { "epoch": 4.535996670828132, "grad_norm": 0.275390625, "learning_rate": 3.8594039499250545e-06, "loss": 0.232, "step": 10900 }, { "epoch": 4.535996670828132, "eval_main_loss": 0.2383212000131607, "eval_main_runtime": 6.3117, "eval_main_samples_per_second": 30.103, "eval_main_steps_per_second": 3.802, "step": 10900 }, { "epoch": 4.535996670828132, "eval_anatomy_loss": 2.833933115005493, "eval_anatomy_runtime": 0.2655, "eval_anatomy_samples_per_second": 7.534, "eval_anatomy_steps_per_second": 3.767, "step": 10900 }, { "epoch": 4.535996670828132, "eval_college_mathematics_loss": 2.0495407581329346, "eval_college_mathematics_runtime": 0.2654, "eval_college_mathematics_samples_per_second": 7.537, "eval_college_mathematics_steps_per_second": 3.768, "step": 10900 }, { "epoch": 4.535996670828132, "eval_international_law_loss": 3.0634400844573975, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.537, "eval_international_law_steps_per_second": 3.768, "step": 10900 }, { "epoch": 4.544319600499376, "grad_norm": 0.248046875, "learning_rate": 3.852539258956931e-06, "loss": 0.2345, "step": 10920 }, { "epoch": 4.55264253017062, "grad_norm": 0.302734375, "learning_rate": 3.845660119523671e-06, "loss": 0.2315, "step": 10940 }, { "epoch": 4.5609654598418645, "grad_norm": 0.27734375, "learning_rate": 3.8387666051114995e-06, "loss": 0.232, "step": 10960 }, { "epoch": 4.569288389513108, "grad_norm": 0.30078125, "learning_rate": 3.831858789360206e-06, "loss": 0.2332, "step": 10980 }, { "epoch": 4.577611319184353, "grad_norm": 0.26171875, "learning_rate": 3.824936746062349e-06, "loss": 0.2349, "step": 11000 }, { "epoch": 4.577611319184353, "eval_main_loss": 0.23827816545963287, "eval_main_runtime": 6.3591, "eval_main_samples_per_second": 29.878, "eval_main_steps_per_second": 3.774, "step": 11000 }, { "epoch": 4.577611319184353, "eval_anatomy_loss": 2.83370304107666, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.51, "eval_anatomy_steps_per_second": 3.755, "step": 11000 }, { "epoch": 4.577611319184353, "eval_college_mathematics_loss": 2.051502227783203, "eval_college_mathematics_runtime": 0.2674, "eval_college_mathematics_samples_per_second": 7.478, "eval_college_mathematics_steps_per_second": 3.739, "step": 11000 }, { "epoch": 4.577611319184353, "eval_international_law_loss": 3.0672590732574463, "eval_international_law_runtime": 0.268, "eval_international_law_samples_per_second": 7.463, "eval_international_law_steps_per_second": 3.731, "step": 11000 }, { "epoch": 4.585934248855597, "grad_norm": 0.291015625, "learning_rate": 3.818000549162474e-06, "loss": 0.2318, "step": 11020 }, { "epoch": 4.594257178526841, "grad_norm": 0.30078125, "learning_rate": 3.811050272756324e-06, "loss": 0.2348, "step": 11040 }, { "epoch": 4.602580108198086, "grad_norm": 0.28515625, "learning_rate": 3.804085991090044e-06, "loss": 0.2342, "step": 11060 }, { "epoch": 4.61090303786933, "grad_norm": 0.3125, "learning_rate": 3.797107778559389e-06, "loss": 0.2307, "step": 11080 }, { "epoch": 4.619225967540574, "grad_norm": 0.2451171875, "learning_rate": 3.7901157097089315e-06, "loss": 0.228, "step": 11100 }, { "epoch": 4.619225967540574, "eval_main_loss": 0.23824092745780945, "eval_main_runtime": 6.3522, "eval_main_samples_per_second": 29.911, "eval_main_steps_per_second": 3.778, "step": 11100 }, { "epoch": 4.619225967540574, "eval_anatomy_loss": 2.834642171859741, "eval_anatomy_runtime": 0.2679, "eval_anatomy_samples_per_second": 7.465, "eval_anatomy_steps_per_second": 3.732, "step": 11100 }, { "epoch": 4.619225967540574, "eval_college_mathematics_loss": 2.054388999938965, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.763, "step": 11100 }, { "epoch": 4.619225967540574, "eval_international_law_loss": 3.0648648738861084, "eval_international_law_runtime": 0.2679, "eval_international_law_samples_per_second": 7.465, "eval_international_law_steps_per_second": 3.732, "step": 11100 }, { "epoch": 4.627548897211819, "grad_norm": 0.275390625, "learning_rate": 3.7831098592312643e-06, "loss": 0.2368, "step": 11120 }, { "epoch": 4.635871826883063, "grad_norm": 0.26171875, "learning_rate": 3.7760903019662008e-06, "loss": 0.2292, "step": 11140 }, { "epoch": 4.644194756554307, "grad_norm": 0.298828125, "learning_rate": 3.7690571128999775e-06, "loss": 0.2371, "step": 11160 }, { "epoch": 4.6525176862255515, "grad_norm": 0.2734375, "learning_rate": 3.7620103671644516e-06, "loss": 0.2305, "step": 11180 }, { "epoch": 4.660840615896795, "grad_norm": 0.26953125, "learning_rate": 3.7549501400362996e-06, "loss": 0.2265, "step": 11200 }, { "epoch": 4.660840615896795, "eval_main_loss": 0.2383652925491333, "eval_main_runtime": 6.3394, "eval_main_samples_per_second": 29.971, "eval_main_steps_per_second": 3.786, "step": 11200 }, { "epoch": 4.660840615896795, "eval_anatomy_loss": 2.8311378955841064, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.477, "eval_anatomy_steps_per_second": 3.738, "step": 11200 }, { "epoch": 4.660840615896795, "eval_college_mathematics_loss": 2.052487373352051, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 11200 }, { "epoch": 4.660840615896795, "eval_international_law_loss": 3.064990758895874, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.501, "eval_international_law_steps_per_second": 3.751, "step": 11200 }, { "epoch": 4.66916354556804, "grad_norm": 0.27734375, "learning_rate": 3.7478765069362122e-06, "loss": 0.2371, "step": 11220 }, { "epoch": 4.677486475239284, "grad_norm": 0.32421875, "learning_rate": 3.7407895434280893e-06, "loss": 0.2293, "step": 11240 }, { "epoch": 4.685809404910529, "grad_norm": 0.333984375, "learning_rate": 3.7336893252182343e-06, "loss": 0.2329, "step": 11260 }, { "epoch": 4.694132334581773, "grad_norm": 0.28515625, "learning_rate": 3.72657592815454e-06, "loss": 0.2364, "step": 11280 }, { "epoch": 4.702455264253017, "grad_norm": 0.259765625, "learning_rate": 3.719449428225685e-06, "loss": 0.2309, "step": 11300 }, { "epoch": 4.702455264253017, "eval_main_loss": 0.23832421004772186, "eval_main_runtime": 6.3244, "eval_main_samples_per_second": 30.042, "eval_main_steps_per_second": 3.795, "step": 11300 }, { "epoch": 4.702455264253017, "eval_anatomy_loss": 2.8343684673309326, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 11300 }, { "epoch": 4.702455264253017, "eval_college_mathematics_loss": 2.049694776535034, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.504, "eval_college_mathematics_steps_per_second": 3.752, "step": 11300 }, { "epoch": 4.702455264253017, "eval_international_law_loss": 3.0649161338806152, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.536, "eval_international_law_steps_per_second": 3.768, "step": 11300 }, { "epoch": 4.710778193924261, "grad_norm": 0.31640625, "learning_rate": 3.712309901560316e-06, "loss": 0.2332, "step": 11320 }, { "epoch": 4.719101123595506, "grad_norm": 0.279296875, "learning_rate": 3.7051574244262412e-06, "loss": 0.2321, "step": 11340 }, { "epoch": 4.72742405326675, "grad_norm": 0.22265625, "learning_rate": 3.6979920732296085e-06, "loss": 0.2368, "step": 11360 }, { "epoch": 4.735746982937994, "grad_norm": 0.25, "learning_rate": 3.690813924514095e-06, "loss": 0.2292, "step": 11380 }, { "epoch": 4.7440699126092385, "grad_norm": 0.283203125, "learning_rate": 3.6836230549600853e-06, "loss": 0.231, "step": 11400 }, { "epoch": 4.7440699126092385, "eval_main_loss": 0.23822040855884552, "eval_main_runtime": 6.3505, "eval_main_samples_per_second": 29.919, "eval_main_steps_per_second": 3.779, "step": 11400 }, { "epoch": 4.7440699126092385, "eval_anatomy_loss": 2.8312675952911377, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.48, "eval_anatomy_steps_per_second": 3.74, "step": 11400 }, { "epoch": 4.7440699126092385, "eval_college_mathematics_loss": 2.0531413555145264, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 11400 }, { "epoch": 4.7440699126092385, "eval_international_law_loss": 3.0663352012634277, "eval_international_law_runtime": 0.2678, "eval_international_law_samples_per_second": 7.467, "eval_international_law_steps_per_second": 3.733, "step": 11400 }, { "epoch": 4.752392842280483, "grad_norm": 0.259765625, "learning_rate": 3.676419541383855e-06, "loss": 0.2289, "step": 11420 }, { "epoch": 4.760715771951727, "grad_norm": 0.2734375, "learning_rate": 3.6692034607367486e-06, "loss": 0.2305, "step": 11440 }, { "epoch": 4.769038701622971, "grad_norm": 0.263671875, "learning_rate": 3.6619748901043583e-06, "loss": 0.2279, "step": 11460 }, { "epoch": 4.777361631294216, "grad_norm": 0.30078125, "learning_rate": 3.6547339067057007e-06, "loss": 0.2382, "step": 11480 }, { "epoch": 4.78568456096546, "grad_norm": 0.328125, "learning_rate": 3.647480587892391e-06, "loss": 0.2302, "step": 11500 }, { "epoch": 4.78568456096546, "eval_main_loss": 0.23836463689804077, "eval_main_runtime": 6.3523, "eval_main_samples_per_second": 29.911, "eval_main_steps_per_second": 3.778, "step": 11500 }, { "epoch": 4.78568456096546, "eval_anatomy_loss": 2.8309946060180664, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.477, "eval_anatomy_steps_per_second": 3.738, "step": 11500 }, { "epoch": 4.78568456096546, "eval_college_mathematics_loss": 2.0509681701660156, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.513, "eval_college_mathematics_steps_per_second": 3.757, "step": 11500 }, { "epoch": 4.78568456096546, "eval_international_law_loss": 3.06599760055542, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.487, "eval_international_law_steps_per_second": 3.743, "step": 11500 }, { "epoch": 4.794007490636704, "grad_norm": 0.2470703125, "learning_rate": 3.640215011147815e-06, "loss": 0.2276, "step": 11520 }, { "epoch": 4.802330420307948, "grad_norm": 0.2734375, "learning_rate": 3.632937254086308e-06, "loss": 0.2298, "step": 11540 }, { "epoch": 4.810653349979193, "grad_norm": 0.28125, "learning_rate": 3.6256473944523175e-06, "loss": 0.2362, "step": 11560 }, { "epoch": 4.818976279650437, "grad_norm": 0.255859375, "learning_rate": 3.6183455101195785e-06, "loss": 0.2301, "step": 11580 }, { "epoch": 4.827299209321681, "grad_norm": 0.30078125, "learning_rate": 3.611031679090278e-06, "loss": 0.2313, "step": 11600 }, { "epoch": 4.827299209321681, "eval_main_loss": 0.23831431567668915, "eval_main_runtime": 6.3233, "eval_main_samples_per_second": 30.048, "eval_main_steps_per_second": 3.795, "step": 11600 }, { "epoch": 4.827299209321681, "eval_anatomy_loss": 2.8330864906311035, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.489, "eval_anatomy_steps_per_second": 3.744, "step": 11600 }, { "epoch": 4.827299209321681, "eval_college_mathematics_loss": 2.0517561435699463, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.544, "eval_college_mathematics_steps_per_second": 3.772, "step": 11600 }, { "epoch": 4.827299209321681, "eval_international_law_loss": 3.066469669342041, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.523, "eval_international_law_steps_per_second": 3.762, "step": 11600 }, { "epoch": 4.8356221389929255, "grad_norm": 0.2890625, "learning_rate": 3.603705979494225e-06, "loss": 0.2306, "step": 11620 }, { "epoch": 4.84394506866417, "grad_norm": 0.287109375, "learning_rate": 3.5963684895880123e-06, "loss": 0.229, "step": 11640 }, { "epoch": 4.852267998335414, "grad_norm": 0.341796875, "learning_rate": 3.589019287754183e-06, "loss": 0.2244, "step": 11660 }, { "epoch": 4.860590928006658, "grad_norm": 0.326171875, "learning_rate": 3.581658452500394e-06, "loss": 0.2329, "step": 11680 }, { "epoch": 4.868913857677903, "grad_norm": 0.2421875, "learning_rate": 3.574286062458574e-06, "loss": 0.2329, "step": 11700 }, { "epoch": 4.868913857677903, "eval_main_loss": 0.2383066862821579, "eval_main_runtime": 6.3157, "eval_main_samples_per_second": 30.084, "eval_main_steps_per_second": 3.8, "step": 11700 }, { "epoch": 4.868913857677903, "eval_anatomy_loss": 2.832721710205078, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.52, "eval_anatomy_steps_per_second": 3.76, "step": 11700 }, { "epoch": 4.868913857677903, "eval_college_mathematics_loss": 2.048736095428467, "eval_college_mathematics_runtime": 0.2649, "eval_college_mathematics_samples_per_second": 7.549, "eval_college_mathematics_steps_per_second": 3.774, "step": 11700 }, { "epoch": 4.868913857677903, "eval_international_law_loss": 3.0647518634796143, "eval_international_law_runtime": 0.2646, "eval_international_law_samples_per_second": 7.558, "eval_international_law_steps_per_second": 3.779, "step": 11700 }, { "epoch": 4.877236787349147, "grad_norm": 0.259765625, "learning_rate": 3.5669021963840863e-06, "loss": 0.2342, "step": 11720 }, { "epoch": 4.885559717020391, "grad_norm": 0.26171875, "learning_rate": 3.559506933154886e-06, "loss": 0.2326, "step": 11740 }, { "epoch": 4.893882646691635, "grad_norm": 0.28515625, "learning_rate": 3.552100351770679e-06, "loss": 0.2283, "step": 11760 }, { "epoch": 4.90220557636288, "grad_norm": 0.265625, "learning_rate": 3.544682531352076e-06, "loss": 0.2359, "step": 11780 }, { "epoch": 4.910528506034124, "grad_norm": 0.314453125, "learning_rate": 3.53725355113975e-06, "loss": 0.2338, "step": 11800 }, { "epoch": 4.910528506034124, "eval_main_loss": 0.23833368718624115, "eval_main_runtime": 6.3601, "eval_main_samples_per_second": 29.874, "eval_main_steps_per_second": 3.774, "step": 11800 }, { "epoch": 4.910528506034124, "eval_anatomy_loss": 2.8294174671173096, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.479, "eval_anatomy_steps_per_second": 3.74, "step": 11800 }, { "epoch": 4.910528506034124, "eval_college_mathematics_loss": 2.054882764816284, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.495, "eval_college_mathematics_steps_per_second": 3.747, "step": 11800 }, { "epoch": 4.910528506034124, "eval_international_law_loss": 3.0666239261627197, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.493, "eval_international_law_steps_per_second": 3.746, "step": 11800 }, { "epoch": 4.918851435705369, "grad_norm": 0.2451171875, "learning_rate": 3.529813490493586e-06, "loss": 0.2312, "step": 11820 }, { "epoch": 4.9271743653766125, "grad_norm": 0.298828125, "learning_rate": 3.5223624288918368e-06, "loss": 0.2272, "step": 11840 }, { "epoch": 4.935497295047857, "grad_norm": 0.390625, "learning_rate": 3.514900445930273e-06, "loss": 0.2308, "step": 11860 }, { "epoch": 4.943820224719101, "grad_norm": 0.306640625, "learning_rate": 3.507427621321331e-06, "loss": 0.2286, "step": 11880 }, { "epoch": 4.952143154390345, "grad_norm": 0.3046875, "learning_rate": 3.4999440348932644e-06, "loss": 0.2363, "step": 11900 }, { "epoch": 4.952143154390345, "eval_main_loss": 0.23830710351467133, "eval_main_runtime": 6.3539, "eval_main_samples_per_second": 29.903, "eval_main_steps_per_second": 3.777, "step": 11900 }, { "epoch": 4.952143154390345, "eval_anatomy_loss": 2.832731008529663, "eval_anatomy_runtime": 0.2683, "eval_anatomy_samples_per_second": 7.453, "eval_anatomy_steps_per_second": 3.727, "step": 11900 }, { "epoch": 4.952143154390345, "eval_college_mathematics_loss": 2.0541601181030273, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 11900 }, { "epoch": 4.952143154390345, "eval_international_law_loss": 3.064253091812134, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.48, "eval_international_law_steps_per_second": 3.74, "step": 11900 }, { "epoch": 4.96046608406159, "grad_norm": 0.271484375, "learning_rate": 3.4924497665892886e-06, "loss": 0.2364, "step": 11920 }, { "epoch": 4.968789013732834, "grad_norm": 0.271484375, "learning_rate": 3.484944896466727e-06, "loss": 0.2331, "step": 11940 }, { "epoch": 4.977111943404078, "grad_norm": 0.2734375, "learning_rate": 3.4774295046961593e-06, "loss": 0.2344, "step": 11960 }, { "epoch": 4.985434873075323, "grad_norm": 0.28515625, "learning_rate": 3.4699036715605595e-06, "loss": 0.2307, "step": 11980 }, { "epoch": 4.9937578027465666, "grad_norm": 0.294921875, "learning_rate": 3.4623674774544435e-06, "loss": 0.2271, "step": 12000 }, { "epoch": 4.9937578027465666, "eval_main_loss": 0.23841014504432678, "eval_main_runtime": 6.3245, "eval_main_samples_per_second": 30.042, "eval_main_steps_per_second": 3.795, "step": 12000 }, { "epoch": 4.9937578027465666, "eval_anatomy_loss": 2.8334553241729736, "eval_anatomy_runtime": 0.2654, "eval_anatomy_samples_per_second": 7.535, "eval_anatomy_steps_per_second": 3.767, "step": 12000 }, { "epoch": 4.9937578027465666, "eval_college_mathematics_loss": 2.050748348236084, "eval_college_mathematics_runtime": 0.2648, "eval_college_mathematics_samples_per_second": 7.553, "eval_college_mathematics_steps_per_second": 3.776, "step": 12000 }, { "epoch": 4.9937578027465666, "eval_international_law_loss": 3.066818952560425, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.479, "eval_international_law_steps_per_second": 3.74, "step": 12000 }, { "epoch": 5.002080732417811, "grad_norm": 0.267578125, "learning_rate": 3.454821002883007e-06, "loss": 0.2312, "step": 12020 }, { "epoch": 5.010403662089056, "grad_norm": 0.29296875, "learning_rate": 3.4472643284612656e-06, "loss": 0.2312, "step": 12040 }, { "epoch": 5.0187265917602994, "grad_norm": 0.287109375, "learning_rate": 3.439697534913197e-06, "loss": 0.2339, "step": 12060 }, { "epoch": 5.027049521431544, "grad_norm": 0.23046875, "learning_rate": 3.4321207030708725e-06, "loss": 0.2329, "step": 12080 }, { "epoch": 5.035372451102788, "grad_norm": 0.2734375, "learning_rate": 3.4245339138736023e-06, "loss": 0.2298, "step": 12100 }, { "epoch": 5.035372451102788, "eval_main_loss": 0.23836223781108856, "eval_main_runtime": 6.3231, "eval_main_samples_per_second": 30.049, "eval_main_steps_per_second": 3.796, "step": 12100 }, { "epoch": 5.035372451102788, "eval_anatomy_loss": 2.8340742588043213, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.507, "eval_anatomy_steps_per_second": 3.754, "step": 12100 }, { "epoch": 5.035372451102788, "eval_college_mathematics_loss": 2.0523436069488525, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.545, "eval_college_mathematics_steps_per_second": 3.773, "step": 12100 }, { "epoch": 5.035372451102788, "eval_international_law_loss": 3.0669960975646973, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.529, "eval_international_law_steps_per_second": 3.765, "step": 12100 }, { "epoch": 5.043695380774032, "grad_norm": 0.255859375, "learning_rate": 3.416937248367061e-06, "loss": 0.2311, "step": 12120 }, { "epoch": 5.052018310445277, "grad_norm": 0.25390625, "learning_rate": 3.409330787702428e-06, "loss": 0.2297, "step": 12140 }, { "epoch": 5.060341240116521, "grad_norm": 0.263671875, "learning_rate": 3.4017146131355205e-06, "loss": 0.2329, "step": 12160 }, { "epoch": 5.068664169787765, "grad_norm": 0.267578125, "learning_rate": 3.394088806025925e-06, "loss": 0.2279, "step": 12180 }, { "epoch": 5.07698709945901, "grad_norm": 0.267578125, "learning_rate": 3.3864534478361235e-06, "loss": 0.2314, "step": 12200 }, { "epoch": 5.07698709945901, "eval_main_loss": 0.2382633239030838, "eval_main_runtime": 6.3506, "eval_main_samples_per_second": 29.919, "eval_main_steps_per_second": 3.779, "step": 12200 }, { "epoch": 5.07698709945901, "eval_anatomy_loss": 2.834725856781006, "eval_anatomy_runtime": 0.2681, "eval_anatomy_samples_per_second": 7.461, "eval_anatomy_steps_per_second": 3.73, "step": 12200 }, { "epoch": 5.07698709945901, "eval_college_mathematics_loss": 2.052021026611328, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.511, "eval_college_mathematics_steps_per_second": 3.756, "step": 12200 }, { "epoch": 5.07698709945901, "eval_international_law_loss": 3.0643866062164307, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.51, "eval_international_law_steps_per_second": 3.755, "step": 12200 }, { "epoch": 5.0853100291302535, "grad_norm": 0.2890625, "learning_rate": 3.3788086201306295e-06, "loss": 0.2321, "step": 12220 }, { "epoch": 5.093632958801498, "grad_norm": 0.263671875, "learning_rate": 3.371154404575116e-06, "loss": 0.2346, "step": 12240 }, { "epoch": 5.101955888472743, "grad_norm": 0.33203125, "learning_rate": 3.3634908829355384e-06, "loss": 0.2298, "step": 12260 }, { "epoch": 5.110278818143986, "grad_norm": 0.330078125, "learning_rate": 3.3558181370772657e-06, "loss": 0.2324, "step": 12280 }, { "epoch": 5.118601747815231, "grad_norm": 0.236328125, "learning_rate": 3.3481362489642055e-06, "loss": 0.2314, "step": 12300 }, { "epoch": 5.118601747815231, "eval_main_loss": 0.23837122321128845, "eval_main_runtime": 6.354, "eval_main_samples_per_second": 29.902, "eval_main_steps_per_second": 3.777, "step": 12300 }, { "epoch": 5.118601747815231, "eval_anatomy_loss": 2.833470582962036, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.488, "eval_anatomy_steps_per_second": 3.744, "step": 12300 }, { "epoch": 5.118601747815231, "eval_college_mathematics_loss": 2.0529050827026367, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.505, "eval_college_mathematics_steps_per_second": 3.752, "step": 12300 }, { "epoch": 5.118601747815231, "eval_international_law_loss": 3.0643563270568848, "eval_international_law_runtime": 0.2685, "eval_international_law_samples_per_second": 7.45, "eval_international_law_steps_per_second": 3.725, "step": 12300 }, { "epoch": 5.126924677486476, "grad_norm": 0.310546875, "learning_rate": 3.340445300657924e-06, "loss": 0.2306, "step": 12320 }, { "epoch": 5.135247607157719, "grad_norm": 0.267578125, "learning_rate": 3.3327453743167763e-06, "loss": 0.2299, "step": 12340 }, { "epoch": 5.143570536828964, "grad_norm": 0.27734375, "learning_rate": 3.3250365521950212e-06, "loss": 0.2334, "step": 12360 }, { "epoch": 5.1518934665002085, "grad_norm": 0.24609375, "learning_rate": 3.317318916641952e-06, "loss": 0.2327, "step": 12380 }, { "epoch": 5.160216396171452, "grad_norm": 0.267578125, "learning_rate": 3.309592550101005e-06, "loss": 0.2314, "step": 12400 }, { "epoch": 5.160216396171452, "eval_main_loss": 0.2383902668952942, "eval_main_runtime": 6.3193, "eval_main_samples_per_second": 30.067, "eval_main_steps_per_second": 3.798, "step": 12400 }, { "epoch": 5.160216396171452, "eval_anatomy_loss": 2.8342981338500977, "eval_anatomy_runtime": 0.2654, "eval_anatomy_samples_per_second": 7.536, "eval_anatomy_steps_per_second": 3.768, "step": 12400 }, { "epoch": 5.160216396171452, "eval_college_mathematics_loss": 2.054227113723755, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.493, "eval_college_mathematics_steps_per_second": 3.746, "step": 12400 }, { "epoch": 5.160216396171452, "eval_international_law_loss": 3.067544937133789, "eval_international_law_runtime": 0.2651, "eval_international_law_samples_per_second": 7.543, "eval_international_law_steps_per_second": 3.772, "step": 12400 }, { "epoch": 5.168539325842697, "grad_norm": 0.2734375, "learning_rate": 3.3018575351088894e-06, "loss": 0.2336, "step": 12420 }, { "epoch": 5.1768622555139405, "grad_norm": 0.310546875, "learning_rate": 3.2941139542946996e-06, "loss": 0.2368, "step": 12440 }, { "epoch": 5.185185185185185, "grad_norm": 0.279296875, "learning_rate": 3.2863618903790346e-06, "loss": 0.2295, "step": 12460 }, { "epoch": 5.19350811485643, "grad_norm": 0.267578125, "learning_rate": 3.2786014261731138e-06, "loss": 0.2322, "step": 12480 }, { "epoch": 5.201831044527673, "grad_norm": 0.30859375, "learning_rate": 3.270832644577891e-06, "loss": 0.2365, "step": 12500 }, { "epoch": 5.201831044527673, "eval_main_loss": 0.23834168910980225, "eval_main_runtime": 6.3151, "eval_main_samples_per_second": 30.087, "eval_main_steps_per_second": 3.8, "step": 12500 }, { "epoch": 5.201831044527673, "eval_anatomy_loss": 2.833904981613159, "eval_anatomy_runtime": 0.2651, "eval_anatomy_samples_per_second": 7.543, "eval_anatomy_steps_per_second": 3.772, "step": 12500 }, { "epoch": 5.201831044527673, "eval_college_mathematics_loss": 2.05311918258667, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.489, "eval_college_mathematics_steps_per_second": 3.744, "step": 12500 }, { "epoch": 5.201831044527673, "eval_international_law_loss": 3.065603494644165, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.527, "eval_international_law_steps_per_second": 3.764, "step": 12500 }, { "epoch": 5.210153974198918, "grad_norm": 0.26171875, "learning_rate": 3.263055628583174e-06, "loss": 0.2315, "step": 12520 }, { "epoch": 5.218476903870163, "grad_norm": 0.33203125, "learning_rate": 3.25527046126673e-06, "loss": 0.2295, "step": 12540 }, { "epoch": 5.226799833541406, "grad_norm": 0.271484375, "learning_rate": 3.247477225793406e-06, "loss": 0.2349, "step": 12560 }, { "epoch": 5.235122763212651, "grad_norm": 0.26953125, "learning_rate": 3.239676005414234e-06, "loss": 0.231, "step": 12580 }, { "epoch": 5.2434456928838955, "grad_norm": 0.251953125, "learning_rate": 3.231866883465548e-06, "loss": 0.2316, "step": 12600 }, { "epoch": 5.2434456928838955, "eval_main_loss": 0.2384309619665146, "eval_main_runtime": 6.3556, "eval_main_samples_per_second": 29.895, "eval_main_steps_per_second": 3.776, "step": 12600 }, { "epoch": 5.2434456928838955, "eval_anatomy_loss": 2.8339719772338867, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.518, "eval_anatomy_steps_per_second": 3.759, "step": 12600 }, { "epoch": 5.2434456928838955, "eval_college_mathematics_loss": 2.048271417617798, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.486, "eval_college_mathematics_steps_per_second": 3.743, "step": 12600 }, { "epoch": 5.2434456928838955, "eval_international_law_loss": 3.0657992362976074, "eval_international_law_runtime": 0.2694, "eval_international_law_samples_per_second": 7.425, "eval_international_law_steps_per_second": 3.712, "step": 12600 }, { "epoch": 5.251768622555139, "grad_norm": 0.28125, "learning_rate": 3.2240499433680866e-06, "loss": 0.2337, "step": 12620 }, { "epoch": 5.260091552226384, "grad_norm": 0.275390625, "learning_rate": 3.2162252686261077e-06, "loss": 0.2284, "step": 12640 }, { "epoch": 5.2684144818976275, "grad_norm": 0.302734375, "learning_rate": 3.2083929428264938e-06, "loss": 0.2318, "step": 12660 }, { "epoch": 5.276737411568872, "grad_norm": 0.30078125, "learning_rate": 3.2005530496378596e-06, "loss": 0.2267, "step": 12680 }, { "epoch": 5.285060341240117, "grad_norm": 0.271484375, "learning_rate": 3.1927056728096582e-06, "loss": 0.235, "step": 12700 }, { "epoch": 5.285060341240117, "eval_main_loss": 0.23832783102989197, "eval_main_runtime": 6.3472, "eval_main_samples_per_second": 29.934, "eval_main_steps_per_second": 3.781, "step": 12700 }, { "epoch": 5.285060341240117, "eval_anatomy_loss": 2.834886312484741, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.503, "eval_anatomy_steps_per_second": 3.751, "step": 12700 }, { "epoch": 5.285060341240117, "eval_college_mathematics_loss": 2.0527701377868652, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.515, "eval_college_mathematics_steps_per_second": 3.758, "step": 12700 }, { "epoch": 5.285060341240117, "eval_international_law_loss": 3.0642306804656982, "eval_international_law_runtime": 0.2678, "eval_international_law_samples_per_second": 7.468, "eval_international_law_steps_per_second": 3.734, "step": 12700 }, { "epoch": 5.29338327091136, "grad_norm": 0.2734375, "learning_rate": 3.184850896171288e-06, "loss": 0.2332, "step": 12720 }, { "epoch": 5.301706200582605, "grad_norm": 0.26171875, "learning_rate": 3.1769888036311924e-06, "loss": 0.2293, "step": 12740 }, { "epoch": 5.31002913025385, "grad_norm": 0.26953125, "learning_rate": 3.1691194791759693e-06, "loss": 0.2336, "step": 12760 }, { "epoch": 5.318352059925093, "grad_norm": 0.296875, "learning_rate": 3.161243006869471e-06, "loss": 0.2298, "step": 12780 }, { "epoch": 5.326674989596338, "grad_norm": 0.3125, "learning_rate": 3.1533594708519067e-06, "loss": 0.2353, "step": 12800 }, { "epoch": 5.326674989596338, "eval_main_loss": 0.23834091424942017, "eval_main_runtime": 6.345, "eval_main_samples_per_second": 29.945, "eval_main_steps_per_second": 3.783, "step": 12800 }, { "epoch": 5.326674989596338, "eval_anatomy_loss": 2.8317482471466064, "eval_anatomy_runtime": 0.2676, "eval_anatomy_samples_per_second": 7.474, "eval_anatomy_steps_per_second": 3.737, "step": 12800 }, { "epoch": 5.326674989596338, "eval_college_mathematics_loss": 2.0519726276397705, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 12800 }, { "epoch": 5.326674989596338, "eval_international_law_loss": 3.0644750595092773, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.487, "eval_international_law_steps_per_second": 3.744, "step": 12800 }, { "epoch": 5.3349979192675825, "grad_norm": 0.279296875, "learning_rate": 3.145468955338942e-06, "loss": 0.2312, "step": 12820 }, { "epoch": 5.343320848938826, "grad_norm": 0.279296875, "learning_rate": 3.1375715446208014e-06, "loss": 0.2297, "step": 12840 }, { "epoch": 5.351643778610071, "grad_norm": 0.275390625, "learning_rate": 3.129667323061369e-06, "loss": 0.2341, "step": 12860 }, { "epoch": 5.359966708281315, "grad_norm": 0.30859375, "learning_rate": 3.1217563750972827e-06, "loss": 0.232, "step": 12880 }, { "epoch": 5.368289637952559, "grad_norm": 0.265625, "learning_rate": 3.1138387852370385e-06, "loss": 0.2255, "step": 12900 }, { "epoch": 5.368289637952559, "eval_main_loss": 0.23833264410495758, "eval_main_runtime": 6.3508, "eval_main_samples_per_second": 29.917, "eval_main_steps_per_second": 3.779, "step": 12900 }, { "epoch": 5.368289637952559, "eval_anatomy_loss": 2.8347291946411133, "eval_anatomy_runtime": 0.2672, "eval_anatomy_samples_per_second": 7.484, "eval_anatomy_steps_per_second": 3.742, "step": 12900 }, { "epoch": 5.368289637952559, "eval_college_mathematics_loss": 2.050865411758423, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.49, "eval_college_mathematics_steps_per_second": 3.745, "step": 12900 }, { "epoch": 5.368289637952559, "eval_international_law_loss": 3.068070411682129, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.494, "eval_international_law_steps_per_second": 3.747, "step": 12900 }, { "epoch": 5.376612567623804, "grad_norm": 0.27734375, "learning_rate": 3.10591463806008e-06, "loss": 0.2328, "step": 12920 }, { "epoch": 5.384935497295048, "grad_norm": 0.29296875, "learning_rate": 3.0979840182159033e-06, "loss": 0.2346, "step": 12940 }, { "epoch": 5.393258426966292, "grad_norm": 0.275390625, "learning_rate": 3.0900470104231456e-06, "loss": 0.2319, "step": 12960 }, { "epoch": 5.401581356637537, "grad_norm": 0.2890625, "learning_rate": 3.0821036994686837e-06, "loss": 0.2343, "step": 12980 }, { "epoch": 5.40990428630878, "grad_norm": 0.259765625, "learning_rate": 3.074154170206731e-06, "loss": 0.2299, "step": 13000 }, { "epoch": 5.40990428630878, "eval_main_loss": 0.23825454711914062, "eval_main_runtime": 6.3483, "eval_main_samples_per_second": 29.929, "eval_main_steps_per_second": 3.781, "step": 13000 }, { "epoch": 5.40990428630878, "eval_anatomy_loss": 2.83435320854187, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.752, "step": 13000 }, { "epoch": 5.40990428630878, "eval_college_mathematics_loss": 2.0521814823150635, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.485, "eval_college_mathematics_steps_per_second": 3.743, "step": 13000 }, { "epoch": 5.40990428630878, "eval_international_law_loss": 3.065732479095459, "eval_international_law_runtime": 0.2669, "eval_international_law_samples_per_second": 7.494, "eval_international_law_steps_per_second": 3.747, "step": 13000 }, { "epoch": 5.418227215980025, "grad_norm": 0.28125, "learning_rate": 3.066198507557923e-06, "loss": 0.2285, "step": 13020 }, { "epoch": 5.4265501456512695, "grad_norm": 0.259765625, "learning_rate": 3.0582367965084183e-06, "loss": 0.2353, "step": 13040 }, { "epoch": 5.434873075322513, "grad_norm": 0.328125, "learning_rate": 3.0502691221089846e-06, "loss": 0.2346, "step": 13060 }, { "epoch": 5.443196004993758, "grad_norm": 0.27734375, "learning_rate": 3.042295569474096e-06, "loss": 0.2344, "step": 13080 }, { "epoch": 5.451518934665002, "grad_norm": 0.287109375, "learning_rate": 3.03431622378102e-06, "loss": 0.2324, "step": 13100 }, { "epoch": 5.451518934665002, "eval_main_loss": 0.23835337162017822, "eval_main_runtime": 6.3367, "eval_main_samples_per_second": 29.984, "eval_main_steps_per_second": 3.787, "step": 13100 }, { "epoch": 5.451518934665002, "eval_anatomy_loss": 2.835411787033081, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.479, "eval_anatomy_steps_per_second": 3.74, "step": 13100 }, { "epoch": 5.451518934665002, "eval_college_mathematics_loss": 2.0492405891418457, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.527, "eval_college_mathematics_steps_per_second": 3.764, "step": 13100 }, { "epoch": 5.451518934665002, "eval_international_law_loss": 3.0660183429718018, "eval_international_law_runtime": 0.2667, "eval_international_law_samples_per_second": 7.499, "eval_international_law_steps_per_second": 3.75, "step": 13100 }, { "epoch": 5.459841864336246, "grad_norm": 0.3125, "learning_rate": 3.0263311702689045e-06, "loss": 0.2356, "step": 13120 }, { "epoch": 5.468164794007491, "grad_norm": 0.298828125, "learning_rate": 3.018340494237878e-06, "loss": 0.2302, "step": 13140 }, { "epoch": 5.476487723678735, "grad_norm": 0.287109375, "learning_rate": 3.010344281048125e-06, "loss": 0.2322, "step": 13160 }, { "epoch": 5.484810653349979, "grad_norm": 0.279296875, "learning_rate": 3.0023426161189828e-06, "loss": 0.2329, "step": 13180 }, { "epoch": 5.493133583021224, "grad_norm": 0.31640625, "learning_rate": 2.994335584928028e-06, "loss": 0.233, "step": 13200 }, { "epoch": 5.493133583021224, "eval_main_loss": 0.23826710879802704, "eval_main_runtime": 6.3331, "eval_main_samples_per_second": 30.001, "eval_main_steps_per_second": 3.79, "step": 13200 }, { "epoch": 5.493133583021224, "eval_anatomy_loss": 2.8339459896087646, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.501, "eval_anatomy_steps_per_second": 3.751, "step": 13200 }, { "epoch": 5.493133583021224, "eval_college_mathematics_loss": 2.051870107650757, "eval_college_mathematics_runtime": 0.2669, "eval_college_mathematics_samples_per_second": 7.494, "eval_college_mathematics_steps_per_second": 3.747, "step": 13200 }, { "epoch": 5.493133583021224, "eval_international_law_loss": 3.066417694091797, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.758, "step": 13200 }, { "epoch": 5.501456512692467, "grad_norm": 0.283203125, "learning_rate": 2.9863232730101616e-06, "loss": 0.231, "step": 13220 }, { "epoch": 5.509779442363712, "grad_norm": 0.314453125, "learning_rate": 2.9783057659566945e-06, "loss": 0.2367, "step": 13240 }, { "epoch": 5.5181023720349565, "grad_norm": 0.267578125, "learning_rate": 2.9702831494144354e-06, "loss": 0.2299, "step": 13260 }, { "epoch": 5.526425301706201, "grad_norm": 0.3203125, "learning_rate": 2.9622555090847756e-06, "loss": 0.2303, "step": 13280 }, { "epoch": 5.534748231377445, "grad_norm": 0.232421875, "learning_rate": 2.954222930722771e-06, "loss": 0.2281, "step": 13300 }, { "epoch": 5.534748231377445, "eval_main_loss": 0.23840035498142242, "eval_main_runtime": 6.3399, "eval_main_samples_per_second": 29.969, "eval_main_steps_per_second": 3.786, "step": 13300 }, { "epoch": 5.534748231377445, "eval_anatomy_loss": 2.832589864730835, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.501, "eval_anatomy_steps_per_second": 3.751, "step": 13300 }, { "epoch": 5.534748231377445, "eval_college_mathematics_loss": 2.051084518432617, "eval_college_mathematics_runtime": 0.2678, "eval_college_mathematics_samples_per_second": 7.468, "eval_college_mathematics_steps_per_second": 3.734, "step": 13300 }, { "epoch": 5.534748231377445, "eval_international_law_loss": 3.0671823024749756, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.509, "eval_international_law_steps_per_second": 3.754, "step": 13300 }, { "epoch": 5.543071161048689, "grad_norm": 0.2734375, "learning_rate": 2.9461855001362298e-06, "loss": 0.2315, "step": 13320 }, { "epoch": 5.551394090719933, "grad_norm": 0.2060546875, "learning_rate": 2.9381433031847946e-06, "loss": 0.2309, "step": 13340 }, { "epoch": 5.559717020391178, "grad_norm": 0.294921875, "learning_rate": 2.9300964257790215e-06, "loss": 0.2326, "step": 13360 }, { "epoch": 5.568039950062422, "grad_norm": 0.25, "learning_rate": 2.9220449538794676e-06, "loss": 0.2342, "step": 13380 }, { "epoch": 5.576362879733666, "grad_norm": 0.3125, "learning_rate": 2.9139889734957698e-06, "loss": 0.23, "step": 13400 }, { "epoch": 5.576362879733666, "eval_main_loss": 0.23834313452243805, "eval_main_runtime": 6.3321, "eval_main_samples_per_second": 30.006, "eval_main_steps_per_second": 3.79, "step": 13400 }, { "epoch": 5.576362879733666, "eval_anatomy_loss": 2.8331494331359863, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.512, "eval_anatomy_steps_per_second": 3.756, "step": 13400 }, { "epoch": 5.576362879733666, "eval_college_mathematics_loss": 2.0503456592559814, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.529, "eval_college_mathematics_steps_per_second": 3.764, "step": 13400 }, { "epoch": 5.576362879733666, "eval_international_law_loss": 3.065586805343628, "eval_international_law_runtime": 0.2652, "eval_international_law_samples_per_second": 7.542, "eval_international_law_steps_per_second": 3.771, "step": 13400 }, { "epoch": 5.5846858094049106, "grad_norm": 0.296875, "learning_rate": 2.9059285706857287e-06, "loss": 0.2328, "step": 13420 }, { "epoch": 5.593008739076155, "grad_norm": 0.322265625, "learning_rate": 2.897863831554385e-06, "loss": 0.2294, "step": 13440 }, { "epoch": 5.601331668747399, "grad_norm": 0.29296875, "learning_rate": 2.889794842253102e-06, "loss": 0.2362, "step": 13460 }, { "epoch": 5.6096545984186434, "grad_norm": 0.337890625, "learning_rate": 2.881721688978647e-06, "loss": 0.2309, "step": 13480 }, { "epoch": 5.617977528089888, "grad_norm": 0.279296875, "learning_rate": 2.8736444579722665e-06, "loss": 0.2347, "step": 13500 }, { "epoch": 5.617977528089888, "eval_main_loss": 0.238357812166214, "eval_main_runtime": 6.3227, "eval_main_samples_per_second": 30.051, "eval_main_steps_per_second": 3.796, "step": 13500 }, { "epoch": 5.617977528089888, "eval_anatomy_loss": 2.834318161010742, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.511, "eval_anatomy_steps_per_second": 3.756, "step": 13500 }, { "epoch": 5.617977528089888, "eval_college_mathematics_loss": 2.0517024993896484, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.532, "eval_college_mathematics_steps_per_second": 3.766, "step": 13500 }, { "epoch": 5.617977528089888, "eval_international_law_loss": 3.0657360553741455, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 13500 }, { "epoch": 5.626300457761132, "grad_norm": 0.2890625, "learning_rate": 2.865563235518772e-06, "loss": 0.2299, "step": 13520 }, { "epoch": 5.634623387432376, "grad_norm": 0.259765625, "learning_rate": 2.8574781079456065e-06, "loss": 0.2336, "step": 13540 }, { "epoch": 5.64294631710362, "grad_norm": 0.251953125, "learning_rate": 2.8493891616219354e-06, "loss": 0.2347, "step": 13560 }, { "epoch": 5.651269246774865, "grad_norm": 0.23046875, "learning_rate": 2.841296482957715e-06, "loss": 0.2323, "step": 13580 }, { "epoch": 5.659592176446109, "grad_norm": 0.23046875, "learning_rate": 2.8332001584027724e-06, "loss": 0.233, "step": 13600 }, { "epoch": 5.659592176446109, "eval_main_loss": 0.23819133639335632, "eval_main_runtime": 6.333, "eval_main_samples_per_second": 30.002, "eval_main_steps_per_second": 3.79, "step": 13600 }, { "epoch": 5.659592176446109, "eval_anatomy_loss": 2.8354241847991943, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.522, "eval_anatomy_steps_per_second": 3.761, "step": 13600 }, { "epoch": 5.659592176446109, "eval_college_mathematics_loss": 2.0492091178894043, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 13600 }, { "epoch": 5.659592176446109, "eval_international_law_loss": 3.063929557800293, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.52, "eval_international_law_steps_per_second": 3.76, "step": 13600 }, { "epoch": 5.667915106117353, "grad_norm": 0.2392578125, "learning_rate": 2.825100274445882e-06, "loss": 0.2308, "step": 13620 }, { "epoch": 5.6762380357885975, "grad_norm": 0.2890625, "learning_rate": 2.8169969176138435e-06, "loss": 0.2305, "step": 13640 }, { "epoch": 5.684560965459842, "grad_norm": 0.271484375, "learning_rate": 2.808890174470551e-06, "loss": 0.2316, "step": 13660 }, { "epoch": 5.692883895131086, "grad_norm": 0.302734375, "learning_rate": 2.8007801316160767e-06, "loss": 0.2328, "step": 13680 }, { "epoch": 5.70120682480233, "grad_norm": 0.2421875, "learning_rate": 2.79266687568574e-06, "loss": 0.2303, "step": 13700 }, { "epoch": 5.70120682480233, "eval_main_loss": 0.23830586671829224, "eval_main_runtime": 6.3319, "eval_main_samples_per_second": 30.007, "eval_main_steps_per_second": 3.79, "step": 13700 }, { "epoch": 5.70120682480233, "eval_anatomy_loss": 2.833200216293335, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.48, "eval_anatomy_steps_per_second": 3.74, "step": 13700 }, { "epoch": 5.70120682480233, "eval_college_mathematics_loss": 2.051815986633301, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.543, "eval_college_mathematics_steps_per_second": 3.772, "step": 13700 }, { "epoch": 5.70120682480233, "eval_international_law_loss": 3.0680174827575684, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.478, "eval_international_law_steps_per_second": 3.739, "step": 13700 }, { "epoch": 5.709529754473575, "grad_norm": 0.25, "learning_rate": 2.784550493349185e-06, "loss": 0.2286, "step": 13720 }, { "epoch": 5.717852684144819, "grad_norm": 0.271484375, "learning_rate": 2.776431071309453e-06, "loss": 0.227, "step": 13740 }, { "epoch": 5.726175613816063, "grad_norm": 0.3046875, "learning_rate": 2.7683086963020566e-06, "loss": 0.2313, "step": 13760 }, { "epoch": 5.734498543487308, "grad_norm": 0.25390625, "learning_rate": 2.7601834550940538e-06, "loss": 0.2275, "step": 13780 }, { "epoch": 5.742821473158552, "grad_norm": 0.2490234375, "learning_rate": 2.7520554344831194e-06, "loss": 0.2313, "step": 13800 }, { "epoch": 5.742821473158552, "eval_main_loss": 0.23832766711711884, "eval_main_runtime": 6.3298, "eval_main_samples_per_second": 30.017, "eval_main_steps_per_second": 3.792, "step": 13800 }, { "epoch": 5.742821473158552, "eval_anatomy_loss": 2.833317995071411, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.527, "eval_anatomy_steps_per_second": 3.763, "step": 13800 }, { "epoch": 5.742821473158552, "eval_college_mathematics_loss": 2.052196502685547, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.487, "eval_college_mathematics_steps_per_second": 3.743, "step": 13800 }, { "epoch": 5.742821473158552, "eval_international_law_loss": 3.066879987716675, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.536, "eval_international_law_steps_per_second": 3.768, "step": 13800 }, { "epoch": 5.751144402829796, "grad_norm": 0.2451171875, "learning_rate": 2.743924721296622e-06, "loss": 0.2323, "step": 13820 }, { "epoch": 5.759467332501041, "grad_norm": 0.345703125, "learning_rate": 2.735791402390691e-06, "loss": 0.2325, "step": 13840 }, { "epoch": 5.7677902621722845, "grad_norm": 0.275390625, "learning_rate": 2.727655564649293e-06, "loss": 0.2308, "step": 13860 }, { "epoch": 5.776113191843529, "grad_norm": 0.30078125, "learning_rate": 2.719517294983299e-06, "loss": 0.2337, "step": 13880 }, { "epoch": 5.784436121514773, "grad_norm": 0.2109375, "learning_rate": 2.7113766803295637e-06, "loss": 0.2319, "step": 13900 }, { "epoch": 5.784436121514773, "eval_main_loss": 0.23832297325134277, "eval_main_runtime": 6.3179, "eval_main_samples_per_second": 30.073, "eval_main_steps_per_second": 3.799, "step": 13900 }, { "epoch": 5.784436121514773, "eval_anatomy_loss": 2.8318614959716797, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.526, "eval_anatomy_steps_per_second": 3.763, "step": 13900 }, { "epoch": 5.784436121514773, "eval_college_mathematics_loss": 2.0509073734283447, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 13900 }, { "epoch": 5.784436121514773, "eval_international_law_loss": 3.0668725967407227, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.513, "eval_international_law_steps_per_second": 3.756, "step": 13900 }, { "epoch": 5.792759051186017, "grad_norm": 0.298828125, "learning_rate": 2.7032338076499882e-06, "loss": 0.2317, "step": 13920 }, { "epoch": 5.801081980857262, "grad_norm": 0.2890625, "learning_rate": 2.695088763930596e-06, "loss": 0.2333, "step": 13940 }, { "epoch": 5.809404910528506, "grad_norm": 0.2890625, "learning_rate": 2.6869416361806026e-06, "loss": 0.2296, "step": 13960 }, { "epoch": 5.81772784019975, "grad_norm": 0.287109375, "learning_rate": 2.6787925114314885e-06, "loss": 0.2286, "step": 13980 }, { "epoch": 5.826050769870995, "grad_norm": 0.283203125, "learning_rate": 2.6706414767360615e-06, "loss": 0.233, "step": 14000 }, { "epoch": 5.826050769870995, "eval_main_loss": 0.23830385506153107, "eval_main_runtime": 6.3332, "eval_main_samples_per_second": 30.001, "eval_main_steps_per_second": 3.79, "step": 14000 }, { "epoch": 5.826050769870995, "eval_anatomy_loss": 2.8321759700775146, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.53, "eval_anatomy_steps_per_second": 3.765, "step": 14000 }, { "epoch": 5.826050769870995, "eval_college_mathematics_loss": 2.050740957260132, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.754, "step": 14000 }, { "epoch": 5.826050769870995, "eval_international_law_loss": 3.068516492843628, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 14000 }, { "epoch": 5.834373699542239, "grad_norm": 0.2734375, "learning_rate": 2.6624886191675387e-06, "loss": 0.2309, "step": 14020 }, { "epoch": 5.842696629213483, "grad_norm": 0.2392578125, "learning_rate": 2.6543340258186063e-06, "loss": 0.229, "step": 14040 }, { "epoch": 5.851019558884728, "grad_norm": 0.2890625, "learning_rate": 2.6461777838004933e-06, "loss": 0.2307, "step": 14060 }, { "epoch": 5.8593424885559715, "grad_norm": 0.2353515625, "learning_rate": 2.6380199802420414e-06, "loss": 0.2294, "step": 14080 }, { "epoch": 5.867665418227216, "grad_norm": 0.298828125, "learning_rate": 2.629860702288773e-06, "loss": 0.234, "step": 14100 }, { "epoch": 5.867665418227216, "eval_main_loss": 0.23834149539470673, "eval_main_runtime": 6.3326, "eval_main_samples_per_second": 30.003, "eval_main_steps_per_second": 3.79, "step": 14100 }, { "epoch": 5.867665418227216, "eval_anatomy_loss": 2.8320865631103516, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.493, "eval_anatomy_steps_per_second": 3.747, "step": 14100 }, { "epoch": 5.867665418227216, "eval_college_mathematics_loss": 2.049973249435425, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.755, "step": 14100 }, { "epoch": 5.867665418227216, "eval_international_law_loss": 3.065117120742798, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.758, "step": 14100 }, { "epoch": 5.87598834789846, "grad_norm": 0.265625, "learning_rate": 2.6217000371019597e-06, "loss": 0.2286, "step": 14120 }, { "epoch": 5.884311277569704, "grad_norm": 0.2060546875, "learning_rate": 2.6135380718576947e-06, "loss": 0.2321, "step": 14140 }, { "epoch": 5.892634207240949, "grad_norm": 0.30859375, "learning_rate": 2.6053748937459565e-06, "loss": 0.2322, "step": 14160 }, { "epoch": 5.900957136912194, "grad_norm": 0.25, "learning_rate": 2.597210589969682e-06, "loss": 0.2275, "step": 14180 }, { "epoch": 5.909280066583437, "grad_norm": 0.318359375, "learning_rate": 2.5890452477438318e-06, "loss": 0.2323, "step": 14200 }, { "epoch": 5.909280066583437, "eval_main_loss": 0.2382332980632782, "eval_main_runtime": 6.3329, "eval_main_samples_per_second": 30.002, "eval_main_steps_per_second": 3.79, "step": 14200 }, { "epoch": 5.909280066583437, "eval_anatomy_loss": 2.8326704502105713, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.503, "eval_anatomy_steps_per_second": 3.751, "step": 14200 }, { "epoch": 5.909280066583437, "eval_college_mathematics_loss": 2.0510854721069336, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.524, "eval_college_mathematics_steps_per_second": 3.762, "step": 14200 }, { "epoch": 5.909280066583437, "eval_international_law_loss": 3.0666379928588867, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 14200 }, { "epoch": 5.917602996254682, "grad_norm": 0.275390625, "learning_rate": 2.5808789542944585e-06, "loss": 0.2338, "step": 14220 }, { "epoch": 5.925925925925926, "grad_norm": 0.29296875, "learning_rate": 2.572711796857779e-06, "loss": 0.2326, "step": 14240 }, { "epoch": 5.93424885559717, "grad_norm": 0.25390625, "learning_rate": 2.564543862679238e-06, "loss": 0.2278, "step": 14260 }, { "epoch": 5.942571785268415, "grad_norm": 0.279296875, "learning_rate": 2.556375239012578e-06, "loss": 0.2372, "step": 14280 }, { "epoch": 5.9508947149396585, "grad_norm": 0.287109375, "learning_rate": 2.5482060131189058e-06, "loss": 0.232, "step": 14300 }, { "epoch": 5.9508947149396585, "eval_main_loss": 0.2383614033460617, "eval_main_runtime": 6.3311, "eval_main_samples_per_second": 30.01, "eval_main_steps_per_second": 3.791, "step": 14300 }, { "epoch": 5.9508947149396585, "eval_anatomy_loss": 2.8360040187835693, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.524, "eval_anatomy_steps_per_second": 3.762, "step": 14300 }, { "epoch": 5.9508947149396585, "eval_college_mathematics_loss": 2.0509347915649414, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.538, "eval_college_mathematics_steps_per_second": 3.769, "step": 14300 }, { "epoch": 5.9508947149396585, "eval_international_law_loss": 3.064924478530884, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.497, "eval_international_law_steps_per_second": 3.749, "step": 14300 }, { "epoch": 5.959217644610903, "grad_norm": 0.27734375, "learning_rate": 2.540036272265764e-06, "loss": 0.2343, "step": 14320 }, { "epoch": 5.967540574282148, "grad_norm": 0.283203125, "learning_rate": 2.5318661037261955e-06, "loss": 0.233, "step": 14340 }, { "epoch": 5.975863503953391, "grad_norm": 0.2734375, "learning_rate": 2.5236955947778096e-06, "loss": 0.2315, "step": 14360 }, { "epoch": 5.984186433624636, "grad_norm": 0.2890625, "learning_rate": 2.515524832701854e-06, "loss": 0.2357, "step": 14380 }, { "epoch": 5.992509363295881, "grad_norm": 0.310546875, "learning_rate": 2.507353904782281e-06, "loss": 0.2323, "step": 14400 }, { "epoch": 5.992509363295881, "eval_main_loss": 0.23834244906902313, "eval_main_runtime": 6.3321, "eval_main_samples_per_second": 30.006, "eval_main_steps_per_second": 3.79, "step": 14400 }, { "epoch": 5.992509363295881, "eval_anatomy_loss": 2.830299139022827, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.52, "eval_anatomy_steps_per_second": 3.76, "step": 14400 }, { "epoch": 5.992509363295881, "eval_college_mathematics_loss": 2.0514543056488037, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.504, "eval_college_mathematics_steps_per_second": 3.752, "step": 14400 }, { "epoch": 5.992509363295881, "eval_international_law_loss": 3.0648834705352783, "eval_international_law_runtime": 0.2653, "eval_international_law_samples_per_second": 7.539, "eval_international_law_steps_per_second": 3.769, "step": 14400 }, { "epoch": 6.000832292967124, "grad_norm": 0.2890625, "learning_rate": 2.4991828983048126e-06, "loss": 0.2355, "step": 14420 }, { "epoch": 6.009155222638369, "grad_norm": 0.296875, "learning_rate": 2.4910119005560123e-06, "loss": 0.2307, "step": 14440 }, { "epoch": 6.017478152309613, "grad_norm": 0.306640625, "learning_rate": 2.4828409988223487e-06, "loss": 0.2313, "step": 14460 }, { "epoch": 6.025801081980857, "grad_norm": 0.28125, "learning_rate": 2.4746702803892637e-06, "loss": 0.2315, "step": 14480 }, { "epoch": 6.034124011652102, "grad_norm": 0.25390625, "learning_rate": 2.4664998325402442e-06, "loss": 0.2318, "step": 14500 }, { "epoch": 6.034124011652102, "eval_main_loss": 0.23824182152748108, "eval_main_runtime": 6.3272, "eval_main_samples_per_second": 30.029, "eval_main_steps_per_second": 3.793, "step": 14500 }, { "epoch": 6.034124011652102, "eval_anatomy_loss": 2.8344292640686035, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.517, "eval_anatomy_steps_per_second": 3.758, "step": 14500 }, { "epoch": 6.034124011652102, "eval_college_mathematics_loss": 2.0524632930755615, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.517, "eval_college_mathematics_steps_per_second": 3.759, "step": 14500 }, { "epoch": 6.034124011652102, "eval_international_law_loss": 3.0627036094665527, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.514, "eval_international_law_steps_per_second": 3.757, "step": 14500 }, { "epoch": 6.0424469413233455, "grad_norm": 0.30078125, "learning_rate": 2.4583297425558848e-06, "loss": 0.2297, "step": 14520 }, { "epoch": 6.05076987099459, "grad_norm": 0.29296875, "learning_rate": 2.4501600977129564e-06, "loss": 0.2293, "step": 14540 }, { "epoch": 6.059092800665835, "grad_norm": 0.291015625, "learning_rate": 2.441990985283476e-06, "loss": 0.2329, "step": 14560 }, { "epoch": 6.067415730337078, "grad_norm": 0.287109375, "learning_rate": 2.433822492533774e-06, "loss": 0.2308, "step": 14580 }, { "epoch": 6.075738660008323, "grad_norm": 0.306640625, "learning_rate": 2.4256547067235577e-06, "loss": 0.2313, "step": 14600 }, { "epoch": 6.075738660008323, "eval_main_loss": 0.2382703721523285, "eval_main_runtime": 6.3262, "eval_main_samples_per_second": 30.034, "eval_main_steps_per_second": 3.794, "step": 14600 }, { "epoch": 6.075738660008323, "eval_anatomy_loss": 2.832831859588623, "eval_anatomy_runtime": 0.2652, "eval_anatomy_samples_per_second": 7.543, "eval_anatomy_steps_per_second": 3.771, "step": 14600 }, { "epoch": 6.075738660008323, "eval_college_mathematics_loss": 2.0489039421081543, "eval_college_mathematics_runtime": 0.2646, "eval_college_mathematics_samples_per_second": 7.56, "eval_college_mathematics_steps_per_second": 3.78, "step": 14600 }, { "epoch": 6.075738660008323, "eval_international_law_loss": 3.067589044570923, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.514, "eval_international_law_steps_per_second": 3.757, "step": 14600 }, { "epoch": 6.084061589679568, "grad_norm": 0.25, "learning_rate": 2.4174877151049852e-06, "loss": 0.2373, "step": 14620 }, { "epoch": 6.092384519350811, "grad_norm": 0.275390625, "learning_rate": 2.4093216049217315e-06, "loss": 0.2279, "step": 14640 }, { "epoch": 6.100707449022056, "grad_norm": 0.291015625, "learning_rate": 2.4011564634080527e-06, "loss": 0.2341, "step": 14660 }, { "epoch": 6.1090303786933005, "grad_norm": 0.345703125, "learning_rate": 2.3929923777878596e-06, "loss": 0.2316, "step": 14680 }, { "epoch": 6.117353308364544, "grad_norm": 0.25390625, "learning_rate": 2.3848294352737837e-06, "loss": 0.2363, "step": 14700 }, { "epoch": 6.117353308364544, "eval_main_loss": 0.23824332654476166, "eval_main_runtime": 6.3273, "eval_main_samples_per_second": 30.028, "eval_main_steps_per_second": 3.793, "step": 14700 }, { "epoch": 6.117353308364544, "eval_anatomy_loss": 2.832658290863037, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.497, "eval_anatomy_steps_per_second": 3.748, "step": 14700 }, { "epoch": 6.117353308364544, "eval_college_mathematics_loss": 2.05214524269104, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.532, "eval_college_mathematics_steps_per_second": 3.766, "step": 14700 }, { "epoch": 6.117353308364544, "eval_international_law_loss": 3.068110942840576, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.528, "eval_international_law_steps_per_second": 3.764, "step": 14700 }, { "epoch": 6.125676238035789, "grad_norm": 0.31640625, "learning_rate": 2.3766677230662413e-06, "loss": 0.2342, "step": 14720 }, { "epoch": 6.1339991677070325, "grad_norm": 0.267578125, "learning_rate": 2.368507328352511e-06, "loss": 0.2281, "step": 14740 }, { "epoch": 6.142322097378277, "grad_norm": 0.263671875, "learning_rate": 2.3603483383057944e-06, "loss": 0.2332, "step": 14760 }, { "epoch": 6.150645027049522, "grad_norm": 0.27734375, "learning_rate": 2.352190840084288e-06, "loss": 0.2308, "step": 14780 }, { "epoch": 6.158967956720765, "grad_norm": 0.30859375, "learning_rate": 2.3440349208302553e-06, "loss": 0.2364, "step": 14800 }, { "epoch": 6.158967956720765, "eval_main_loss": 0.2383221834897995, "eval_main_runtime": 6.332, "eval_main_samples_per_second": 30.006, "eval_main_steps_per_second": 3.79, "step": 14800 }, { "epoch": 6.158967956720765, "eval_anatomy_loss": 2.832653045654297, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.506, "eval_anatomy_steps_per_second": 3.753, "step": 14800 }, { "epoch": 6.158967956720765, "eval_college_mathematics_loss": 2.0538275241851807, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.509, "eval_college_mathematics_steps_per_second": 3.755, "step": 14800 }, { "epoch": 6.158967956720765, "eval_international_law_loss": 3.0637497901916504, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.536, "eval_international_law_steps_per_second": 3.768, "step": 14800 }, { "epoch": 6.16729088639201, "grad_norm": 0.26171875, "learning_rate": 2.3358806676690855e-06, "loss": 0.2316, "step": 14820 }, { "epoch": 6.1756138160632545, "grad_norm": 0.265625, "learning_rate": 2.327728167708377e-06, "loss": 0.2321, "step": 14840 }, { "epoch": 6.183936745734498, "grad_norm": 0.2109375, "learning_rate": 2.3195775080369954e-06, "loss": 0.2358, "step": 14860 }, { "epoch": 6.192259675405743, "grad_norm": 0.296875, "learning_rate": 2.3114287757241487e-06, "loss": 0.2296, "step": 14880 }, { "epoch": 6.200582605076987, "grad_norm": 0.29296875, "learning_rate": 2.3032820578184567e-06, "loss": 0.2284, "step": 14900 }, { "epoch": 6.200582605076987, "eval_main_loss": 0.23826871812343597, "eval_main_runtime": 6.3212, "eval_main_samples_per_second": 30.058, "eval_main_steps_per_second": 3.797, "step": 14900 }, { "epoch": 6.200582605076987, "eval_anatomy_loss": 2.8343729972839355, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.526, "eval_anatomy_steps_per_second": 3.763, "step": 14900 }, { "epoch": 6.200582605076987, "eval_college_mathematics_loss": 2.051208257675171, "eval_college_mathematics_runtime": 0.266, "eval_college_mathematics_samples_per_second": 7.52, "eval_college_mathematics_steps_per_second": 3.76, "step": 14900 }, { "epoch": 6.200582605076987, "eval_international_law_loss": 3.064528465270996, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.505, "eval_international_law_steps_per_second": 3.753, "step": 14900 }, { "epoch": 6.208905534748231, "grad_norm": 0.2490234375, "learning_rate": 2.29513744134702e-06, "loss": 0.2272, "step": 14920 }, { "epoch": 6.217228464419476, "grad_norm": 0.3046875, "learning_rate": 2.286995013314488e-06, "loss": 0.2331, "step": 14940 }, { "epoch": 6.22555139409072, "grad_norm": 0.318359375, "learning_rate": 2.2788548607021366e-06, "loss": 0.2334, "step": 14960 }, { "epoch": 6.233874323761964, "grad_norm": 0.2734375, "learning_rate": 2.270717070466933e-06, "loss": 0.2327, "step": 14980 }, { "epoch": 6.242197253433209, "grad_norm": 0.306640625, "learning_rate": 2.262581729540605e-06, "loss": 0.2337, "step": 15000 }, { "epoch": 6.242197253433209, "eval_main_loss": 0.238285630941391, "eval_main_runtime": 6.3306, "eval_main_samples_per_second": 30.013, "eval_main_steps_per_second": 3.791, "step": 15000 }, { "epoch": 6.242197253433209, "eval_anatomy_loss": 2.83296537399292, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.521, "eval_anatomy_steps_per_second": 3.761, "step": 15000 }, { "epoch": 6.242197253433209, "eval_college_mathematics_loss": 2.051861047744751, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.532, "eval_college_mathematics_steps_per_second": 3.766, "step": 15000 }, { "epoch": 6.242197253433209, "eval_international_law_loss": 3.066511631011963, "eval_international_law_runtime": 0.2683, "eval_international_law_samples_per_second": 7.455, "eval_international_law_steps_per_second": 3.728, "step": 15000 }, { "epoch": 6.250520183104452, "grad_norm": 0.291015625, "learning_rate": 2.2544489248287218e-06, "loss": 0.2295, "step": 15020 }, { "epoch": 6.258843112775697, "grad_norm": 0.27734375, "learning_rate": 2.246318743209753e-06, "loss": 0.2312, "step": 15040 }, { "epoch": 6.2671660424469415, "grad_norm": 0.2421875, "learning_rate": 2.2381912715341528e-06, "loss": 0.2327, "step": 15060 }, { "epoch": 6.275488972118185, "grad_norm": 0.2890625, "learning_rate": 2.2300665966234243e-06, "loss": 0.2317, "step": 15080 }, { "epoch": 6.28381190178943, "grad_norm": 0.30078125, "learning_rate": 2.221944805269192e-06, "loss": 0.2274, "step": 15100 }, { "epoch": 6.28381190178943, "eval_main_loss": 0.23841296136379242, "eval_main_runtime": 6.3599, "eval_main_samples_per_second": 29.875, "eval_main_steps_per_second": 3.774, "step": 15100 }, { "epoch": 6.28381190178943, "eval_anatomy_loss": 2.8354194164276123, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.753, "step": 15100 }, { "epoch": 6.28381190178943, "eval_college_mathematics_loss": 2.0525858402252197, "eval_college_mathematics_runtime": 0.2681, "eval_college_mathematics_samples_per_second": 7.461, "eval_college_mathematics_steps_per_second": 3.73, "step": 15100 }, { "epoch": 6.28381190178943, "eval_international_law_loss": 3.0661587715148926, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.484, "eval_international_law_steps_per_second": 3.742, "step": 15100 }, { "epoch": 6.292134831460674, "grad_norm": 0.267578125, "learning_rate": 2.2138259842322794e-06, "loss": 0.2311, "step": 15120 }, { "epoch": 6.300457761131918, "grad_norm": 0.3203125, "learning_rate": 2.2057102202417806e-06, "loss": 0.2308, "step": 15140 }, { "epoch": 6.308780690803163, "grad_norm": 0.28125, "learning_rate": 2.1975975999941298e-06, "loss": 0.232, "step": 15160 }, { "epoch": 6.317103620474407, "grad_norm": 0.259765625, "learning_rate": 2.1894882101521807e-06, "loss": 0.2317, "step": 15180 }, { "epoch": 6.325426550145651, "grad_norm": 0.275390625, "learning_rate": 2.181382137344278e-06, "loss": 0.2302, "step": 15200 }, { "epoch": 6.325426550145651, "eval_main_loss": 0.23836155235767365, "eval_main_runtime": 6.3527, "eval_main_samples_per_second": 29.909, "eval_main_steps_per_second": 3.778, "step": 15200 }, { "epoch": 6.325426550145651, "eval_anatomy_loss": 2.8317906856536865, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.506, "eval_anatomy_steps_per_second": 3.753, "step": 15200 }, { "epoch": 6.325426550145651, "eval_college_mathematics_loss": 2.054029703140259, "eval_college_mathematics_runtime": 0.2679, "eval_college_mathematics_samples_per_second": 7.467, "eval_college_mathematics_steps_per_second": 3.733, "step": 15200 }, { "epoch": 6.325426550145651, "eval_international_law_loss": 3.0642471313476562, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.755, "step": 15200 }, { "epoch": 6.333749479816896, "grad_norm": 0.26171875, "learning_rate": 2.173279468163331e-06, "loss": 0.2322, "step": 15220 }, { "epoch": 6.34207240948814, "grad_norm": 0.291015625, "learning_rate": 2.1651802891658897e-06, "loss": 0.2321, "step": 15240 }, { "epoch": 6.350395339159384, "grad_norm": 0.345703125, "learning_rate": 2.1570846868712227e-06, "loss": 0.2378, "step": 15260 }, { "epoch": 6.3587182688306285, "grad_norm": 0.25, "learning_rate": 2.1489927477603884e-06, "loss": 0.2301, "step": 15280 }, { "epoch": 6.367041198501873, "grad_norm": 0.26953125, "learning_rate": 2.1409045582753144e-06, "loss": 0.234, "step": 15300 }, { "epoch": 6.367041198501873, "eval_main_loss": 0.23826418817043304, "eval_main_runtime": 6.3363, "eval_main_samples_per_second": 29.986, "eval_main_steps_per_second": 3.788, "step": 15300 }, { "epoch": 6.367041198501873, "eval_anatomy_loss": 2.8337223529815674, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.741, "step": 15300 }, { "epoch": 6.367041198501873, "eval_college_mathematics_loss": 2.0507359504699707, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.503, "eval_college_mathematics_steps_per_second": 3.751, "step": 15300 }, { "epoch": 6.367041198501873, "eval_international_law_loss": 3.065744161605835, "eval_international_law_runtime": 0.2653, "eval_international_law_samples_per_second": 7.538, "eval_international_law_steps_per_second": 3.769, "step": 15300 }, { "epoch": 6.375364128173117, "grad_norm": 0.318359375, "learning_rate": 2.132820204817872e-06, "loss": 0.2279, "step": 15320 }, { "epoch": 6.383687057844361, "grad_norm": 0.251953125, "learning_rate": 2.124739773748955e-06, "loss": 0.2297, "step": 15340 }, { "epoch": 6.392009987515605, "grad_norm": 0.275390625, "learning_rate": 2.1166633513875563e-06, "loss": 0.2296, "step": 15360 }, { "epoch": 6.40033291718685, "grad_norm": 0.27734375, "learning_rate": 2.1085910240098456e-06, "loss": 0.2306, "step": 15380 }, { "epoch": 6.408655846858094, "grad_norm": 0.2890625, "learning_rate": 2.1005228778482484e-06, "loss": 0.2319, "step": 15400 }, { "epoch": 6.408655846858094, "eval_main_loss": 0.238305002450943, "eval_main_runtime": 6.3303, "eval_main_samples_per_second": 30.014, "eval_main_steps_per_second": 3.791, "step": 15400 }, { "epoch": 6.408655846858094, "eval_anatomy_loss": 2.8330578804016113, "eval_anatomy_runtime": 0.2654, "eval_anatomy_samples_per_second": 7.535, "eval_anatomy_steps_per_second": 3.768, "step": 15400 }, { "epoch": 6.408655846858094, "eval_college_mathematics_loss": 2.0521047115325928, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 15400 }, { "epoch": 6.408655846858094, "eval_international_law_loss": 3.0654828548431396, "eval_international_law_runtime": 0.4213, "eval_international_law_samples_per_second": 4.748, "eval_international_law_steps_per_second": 2.374, "step": 15400 }, { "epoch": 6.416978776529338, "grad_norm": 0.265625, "learning_rate": 2.0924589990905253e-06, "loss": 0.2338, "step": 15420 }, { "epoch": 6.425301706200583, "grad_norm": 0.2734375, "learning_rate": 2.084399473878848e-06, "loss": 0.2353, "step": 15440 }, { "epoch": 6.433624635871827, "grad_norm": 0.28515625, "learning_rate": 2.0763443883088833e-06, "loss": 0.2326, "step": 15460 }, { "epoch": 6.441947565543071, "grad_norm": 0.2255859375, "learning_rate": 2.068293828428872e-06, "loss": 0.2361, "step": 15480 }, { "epoch": 6.4502704952143155, "grad_norm": 0.33203125, "learning_rate": 2.0602478802387085e-06, "loss": 0.2288, "step": 15500 }, { "epoch": 6.4502704952143155, "eval_main_loss": 0.23823831975460052, "eval_main_runtime": 6.3318, "eval_main_samples_per_second": 30.007, "eval_main_steps_per_second": 3.79, "step": 15500 }, { "epoch": 6.4502704952143155, "eval_anatomy_loss": 2.834176540374756, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.51, "eval_anatomy_steps_per_second": 3.755, "step": 15500 }, { "epoch": 6.4502704952143155, "eval_college_mathematics_loss": 2.052133560180664, "eval_college_mathematics_runtime": 0.2676, "eval_college_mathematics_samples_per_second": 7.473, "eval_college_mathematics_steps_per_second": 3.736, "step": 15500 }, { "epoch": 6.4502704952143155, "eval_international_law_loss": 3.066138982772827, "eval_international_law_runtime": 0.4176, "eval_international_law_samples_per_second": 4.789, "eval_international_law_steps_per_second": 2.394, "step": 15500 }, { "epoch": 6.45859342488556, "grad_norm": 0.29296875, "learning_rate": 2.0522066296890226e-06, "loss": 0.2292, "step": 15520 }, { "epoch": 6.466916354556804, "grad_norm": 0.314453125, "learning_rate": 2.0441701626802647e-06, "loss": 0.2313, "step": 15540 }, { "epoch": 6.475239284228048, "grad_norm": 0.29296875, "learning_rate": 2.036138565061779e-06, "loss": 0.2322, "step": 15560 }, { "epoch": 6.483562213899292, "grad_norm": 0.287109375, "learning_rate": 2.0281119226308976e-06, "loss": 0.2331, "step": 15580 }, { "epoch": 6.491885143570537, "grad_norm": 0.26953125, "learning_rate": 2.02009032113202e-06, "loss": 0.232, "step": 15600 }, { "epoch": 6.491885143570537, "eval_main_loss": 0.2383803129196167, "eval_main_runtime": 6.3247, "eval_main_samples_per_second": 30.041, "eval_main_steps_per_second": 3.795, "step": 15600 }, { "epoch": 6.491885143570537, "eval_anatomy_loss": 2.8346643447875977, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.524, "eval_anatomy_steps_per_second": 3.762, "step": 15600 }, { "epoch": 6.491885143570537, "eval_college_mathematics_loss": 2.0503625869750977, "eval_college_mathematics_runtime": 0.2647, "eval_college_mathematics_samples_per_second": 7.554, "eval_college_mathematics_steps_per_second": 3.777, "step": 15600 }, { "epoch": 6.491885143570537, "eval_international_law_loss": 3.066617012023926, "eval_international_law_runtime": 0.2652, "eval_international_law_samples_per_second": 7.542, "eval_international_law_steps_per_second": 3.771, "step": 15600 }, { "epoch": 6.500208073241781, "grad_norm": 0.271484375, "learning_rate": 2.012073846255691e-06, "loss": 0.2364, "step": 15620 }, { "epoch": 6.508531002913025, "grad_norm": 0.291015625, "learning_rate": 2.0040625836376937e-06, "loss": 0.2308, "step": 15640 }, { "epoch": 6.51685393258427, "grad_norm": 0.28515625, "learning_rate": 1.9960566188581306e-06, "loss": 0.2287, "step": 15660 }, { "epoch": 6.525176862255514, "grad_norm": 0.3203125, "learning_rate": 1.9880560374405107e-06, "loss": 0.2299, "step": 15680 }, { "epoch": 6.533499791926758, "grad_norm": 0.29296875, "learning_rate": 1.980060924850836e-06, "loss": 0.2325, "step": 15700 }, { "epoch": 6.533499791926758, "eval_main_loss": 0.2383701652288437, "eval_main_runtime": 6.3198, "eval_main_samples_per_second": 30.064, "eval_main_steps_per_second": 3.798, "step": 15700 }, { "epoch": 6.533499791926758, "eval_anatomy_loss": 2.833786725997925, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.528, "eval_anatomy_steps_per_second": 3.764, "step": 15700 }, { "epoch": 6.533499791926758, "eval_college_mathematics_loss": 2.050478219985962, "eval_college_mathematics_runtime": 0.2678, "eval_college_mathematics_samples_per_second": 7.467, "eval_college_mathematics_steps_per_second": 3.734, "step": 15700 }, { "epoch": 6.533499791926758, "eval_international_law_loss": 3.06551194190979, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.535, "eval_international_law_steps_per_second": 3.767, "step": 15700 }, { "epoch": 6.5418227215980025, "grad_norm": 0.283203125, "learning_rate": 1.972071366496685e-06, "loss": 0.2301, "step": 15720 }, { "epoch": 6.550145651269247, "grad_norm": 0.287109375, "learning_rate": 1.964087447726306e-06, "loss": 0.2336, "step": 15740 }, { "epoch": 6.558468580940491, "grad_norm": 0.275390625, "learning_rate": 1.956109253827702e-06, "loss": 0.2355, "step": 15760 }, { "epoch": 6.566791510611735, "grad_norm": 0.302734375, "learning_rate": 1.9481368700277197e-06, "loss": 0.2309, "step": 15780 }, { "epoch": 6.57511444028298, "grad_norm": 0.267578125, "learning_rate": 1.9401703814911394e-06, "loss": 0.2334, "step": 15800 }, { "epoch": 6.57511444028298, "eval_main_loss": 0.2383488267660141, "eval_main_runtime": 6.3287, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 15800 }, { "epoch": 6.57511444028298, "eval_anatomy_loss": 2.8328428268432617, "eval_anatomy_runtime": 0.2649, "eval_anatomy_samples_per_second": 7.551, "eval_anatomy_steps_per_second": 3.776, "step": 15800 }, { "epoch": 6.57511444028298, "eval_college_mathematics_loss": 2.052703619003296, "eval_college_mathematics_runtime": 0.2652, "eval_college_mathematics_samples_per_second": 7.54, "eval_college_mathematics_steps_per_second": 3.77, "step": 15800 }, { "epoch": 6.57511444028298, "eval_international_law_loss": 3.065504550933838, "eval_international_law_runtime": 0.2679, "eval_international_law_samples_per_second": 7.466, "eval_international_law_steps_per_second": 3.733, "step": 15800 }, { "epoch": 6.583437369954224, "grad_norm": 0.2578125, "learning_rate": 1.9322098733197677e-06, "loss": 0.2323, "step": 15820 }, { "epoch": 6.591760299625468, "grad_norm": 0.283203125, "learning_rate": 1.9242554305515208e-06, "loss": 0.2327, "step": 15840 }, { "epoch": 6.600083229296713, "grad_norm": 0.322265625, "learning_rate": 1.9163071381595273e-06, "loss": 0.2335, "step": 15860 }, { "epoch": 6.608406158967957, "grad_norm": 0.2373046875, "learning_rate": 1.908365081051212e-06, "loss": 0.2286, "step": 15880 }, { "epoch": 6.616729088639201, "grad_norm": 0.35546875, "learning_rate": 1.9004293440673908e-06, "loss": 0.2317, "step": 15900 }, { "epoch": 6.616729088639201, "eval_main_loss": 0.23834270238876343, "eval_main_runtime": 6.3247, "eval_main_samples_per_second": 30.041, "eval_main_steps_per_second": 3.795, "step": 15900 }, { "epoch": 6.616729088639201, "eval_anatomy_loss": 2.832962989807129, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.492, "eval_anatomy_steps_per_second": 3.746, "step": 15900 }, { "epoch": 6.616729088639201, "eval_college_mathematics_loss": 2.0485119819641113, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.495, "eval_college_mathematics_steps_per_second": 3.747, "step": 15900 }, { "epoch": 6.616729088639201, "eval_international_law_loss": 3.066624164581299, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.527, "eval_international_law_steps_per_second": 3.763, "step": 15900 }, { "epoch": 6.625052018310445, "grad_norm": 0.31640625, "learning_rate": 1.8925000119813669e-06, "loss": 0.2292, "step": 15920 }, { "epoch": 6.6333749479816895, "grad_norm": 0.251953125, "learning_rate": 1.88457716949802e-06, "loss": 0.231, "step": 15940 }, { "epoch": 6.641697877652934, "grad_norm": 0.3828125, "learning_rate": 1.8766609012529085e-06, "loss": 0.2292, "step": 15960 }, { "epoch": 6.650020807324178, "grad_norm": 0.2236328125, "learning_rate": 1.8687512918113604e-06, "loss": 0.2347, "step": 15980 }, { "epoch": 6.658343736995422, "grad_norm": 0.28515625, "learning_rate": 1.8608484256675702e-06, "loss": 0.2292, "step": 16000 }, { "epoch": 6.658343736995422, "eval_main_loss": 0.23837390542030334, "eval_main_runtime": 6.3517, "eval_main_samples_per_second": 29.913, "eval_main_steps_per_second": 3.779, "step": 16000 }, { "epoch": 6.658343736995422, "eval_anatomy_loss": 2.8328123092651367, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.488, "eval_anatomy_steps_per_second": 3.744, "step": 16000 }, { "epoch": 6.658343736995422, "eval_college_mathematics_loss": 2.0518500804901123, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.489, "eval_college_mathematics_steps_per_second": 3.745, "step": 16000 }, { "epoch": 6.658343736995422, "eval_international_law_loss": 3.0677542686462402, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.523, "eval_international_law_steps_per_second": 3.762, "step": 16000 }, { "epoch": 6.666666666666667, "grad_norm": 0.244140625, "learning_rate": 1.852952387243698e-06, "loss": 0.2263, "step": 16020 }, { "epoch": 6.674989596337911, "grad_norm": 0.21484375, "learning_rate": 1.845063260888969e-06, "loss": 0.2312, "step": 16040 }, { "epoch": 6.683312526009155, "grad_norm": 0.298828125, "learning_rate": 1.8371811308787655e-06, "loss": 0.2325, "step": 16060 }, { "epoch": 6.6916354556804, "grad_norm": 0.24609375, "learning_rate": 1.829306081413736e-06, "loss": 0.2292, "step": 16080 }, { "epoch": 6.699958385351644, "grad_norm": 0.328125, "learning_rate": 1.8214381966188898e-06, "loss": 0.2314, "step": 16100 }, { "epoch": 6.699958385351644, "eval_main_loss": 0.2383338063955307, "eval_main_runtime": 6.3513, "eval_main_samples_per_second": 29.915, "eval_main_steps_per_second": 3.779, "step": 16100 }, { "epoch": 6.699958385351644, "eval_anatomy_loss": 2.8324191570281982, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.479, "eval_anatomy_steps_per_second": 3.74, "step": 16100 }, { "epoch": 6.699958385351644, "eval_college_mathematics_loss": 2.0509135723114014, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.507, "eval_college_mathematics_steps_per_second": 3.754, "step": 16100 }, { "epoch": 6.699958385351644, "eval_international_law_loss": 3.066457748413086, "eval_international_law_runtime": 0.2667, "eval_international_law_samples_per_second": 7.499, "eval_international_law_steps_per_second": 3.749, "step": 16100 }, { "epoch": 6.708281315022888, "grad_norm": 0.2890625, "learning_rate": 1.813577560542699e-06, "loss": 0.2288, "step": 16120 }, { "epoch": 6.716604244694132, "grad_norm": 0.28515625, "learning_rate": 1.8057242571562034e-06, "loss": 0.2331, "step": 16140 }, { "epoch": 6.7249271743653765, "grad_norm": 0.302734375, "learning_rate": 1.7978783703521102e-06, "loss": 0.2371, "step": 16160 }, { "epoch": 6.733250104036621, "grad_norm": 0.283203125, "learning_rate": 1.7900399839438976e-06, "loss": 0.236, "step": 16180 }, { "epoch": 6.741573033707866, "grad_norm": 0.298828125, "learning_rate": 1.782209181664924e-06, "loss": 0.2352, "step": 16200 }, { "epoch": 6.741573033707866, "eval_main_loss": 0.23834922909736633, "eval_main_runtime": 6.3393, "eval_main_samples_per_second": 29.972, "eval_main_steps_per_second": 3.786, "step": 16200 }, { "epoch": 6.741573033707866, "eval_anatomy_loss": 2.832639217376709, "eval_anatomy_runtime": 0.2655, "eval_anatomy_samples_per_second": 7.533, "eval_anatomy_steps_per_second": 3.766, "step": 16200 }, { "epoch": 6.741573033707866, "eval_college_mathematics_loss": 2.050755023956299, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.762, "step": 16200 }, { "epoch": 6.741573033707866, "eval_international_law_loss": 3.0636000633239746, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.502, "eval_international_law_steps_per_second": 3.751, "step": 16200 }, { "epoch": 6.749895963379109, "grad_norm": 0.291015625, "learning_rate": 1.774386047167529e-06, "loss": 0.2301, "step": 16220 }, { "epoch": 6.758218893050354, "grad_norm": 0.275390625, "learning_rate": 1.7665706640221415e-06, "loss": 0.236, "step": 16240 }, { "epoch": 6.766541822721598, "grad_norm": 0.287109375, "learning_rate": 1.7587631157163876e-06, "loss": 0.2328, "step": 16260 }, { "epoch": 6.774864752392842, "grad_norm": 0.296875, "learning_rate": 1.7509634856541951e-06, "loss": 0.2337, "step": 16280 }, { "epoch": 6.783187682064087, "grad_norm": 0.283203125, "learning_rate": 1.7431718571549092e-06, "loss": 0.231, "step": 16300 }, { "epoch": 6.783187682064087, "eval_main_loss": 0.23830153048038483, "eval_main_runtime": 6.3187, "eval_main_samples_per_second": 30.069, "eval_main_steps_per_second": 3.798, "step": 16300 }, { "epoch": 6.783187682064087, "eval_anatomy_loss": 2.8300514221191406, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.49, "eval_anatomy_steps_per_second": 3.745, "step": 16300 }, { "epoch": 6.783187682064087, "eval_college_mathematics_loss": 2.052163600921631, "eval_college_mathematics_runtime": 0.2643, "eval_college_mathematics_samples_per_second": 7.568, "eval_college_mathematics_steps_per_second": 3.784, "step": 16300 }, { "epoch": 6.783187682064087, "eval_international_law_loss": 3.064903736114502, "eval_international_law_runtime": 0.2653, "eval_international_law_samples_per_second": 7.539, "eval_international_law_steps_per_second": 3.77, "step": 16300 }, { "epoch": 6.7915106117353305, "grad_norm": 0.26171875, "learning_rate": 1.7353883134523975e-06, "loss": 0.231, "step": 16320 }, { "epoch": 6.799833541406575, "grad_norm": 0.259765625, "learning_rate": 1.7276129376941594e-06, "loss": 0.2344, "step": 16340 }, { "epoch": 6.80815647107782, "grad_norm": 0.265625, "learning_rate": 1.7198458129404433e-06, "loss": 0.2312, "step": 16360 }, { "epoch": 6.8164794007490634, "grad_norm": 0.2470703125, "learning_rate": 1.7120870221633556e-06, "loss": 0.2315, "step": 16380 }, { "epoch": 6.824802330420308, "grad_norm": 0.251953125, "learning_rate": 1.704336648245975e-06, "loss": 0.2272, "step": 16400 }, { "epoch": 6.824802330420308, "eval_main_loss": 0.23840200901031494, "eval_main_runtime": 6.3137, "eval_main_samples_per_second": 30.093, "eval_main_steps_per_second": 3.801, "step": 16400 }, { "epoch": 6.824802330420308, "eval_anatomy_loss": 2.8353002071380615, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.53, "eval_anatomy_steps_per_second": 3.765, "step": 16400 }, { "epoch": 6.824802330420308, "eval_college_mathematics_loss": 2.0492591857910156, "eval_college_mathematics_runtime": 0.2643, "eval_college_mathematics_samples_per_second": 7.568, "eval_college_mathematics_steps_per_second": 3.784, "step": 16400 }, { "epoch": 6.824802330420308, "eval_international_law_loss": 3.065768241882324, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.531, "eval_international_law_steps_per_second": 3.765, "step": 16400 }, { "epoch": 6.833125260091553, "grad_norm": 0.271484375, "learning_rate": 1.6965947739814672e-06, "loss": 0.2333, "step": 16420 }, { "epoch": 6.841448189762796, "grad_norm": 0.298828125, "learning_rate": 1.6888614820722024e-06, "loss": 0.2353, "step": 16440 }, { "epoch": 6.849771119434041, "grad_norm": 0.279296875, "learning_rate": 1.681136855128866e-06, "loss": 0.2247, "step": 16460 }, { "epoch": 6.858094049105285, "grad_norm": 0.279296875, "learning_rate": 1.6734209756695843e-06, "loss": 0.2363, "step": 16480 }, { "epoch": 6.866416978776529, "grad_norm": 0.255859375, "learning_rate": 1.6657139261190364e-06, "loss": 0.2328, "step": 16500 }, { "epoch": 6.866416978776529, "eval_main_loss": 0.23840853571891785, "eval_main_runtime": 6.3197, "eval_main_samples_per_second": 30.065, "eval_main_steps_per_second": 3.798, "step": 16500 }, { "epoch": 6.866416978776529, "eval_anatomy_loss": 2.834096908569336, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 16500 }, { "epoch": 6.866416978776529, "eval_college_mathematics_loss": 2.0504603385925293, "eval_college_mathematics_runtime": 0.265, "eval_college_mathematics_samples_per_second": 7.546, "eval_college_mathematics_steps_per_second": 3.773, "step": 16500 }, { "epoch": 6.866416978776529, "eval_international_law_loss": 3.065997838973999, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.525, "eval_international_law_steps_per_second": 3.762, "step": 16500 }, { "epoch": 6.874739908447774, "grad_norm": 0.322265625, "learning_rate": 1.6580157888075766e-06, "loss": 0.2319, "step": 16520 }, { "epoch": 6.8830628381190175, "grad_norm": 0.2578125, "learning_rate": 1.6503266459703566e-06, "loss": 0.2363, "step": 16540 }, { "epoch": 6.891385767790262, "grad_norm": 0.287109375, "learning_rate": 1.6426465797464402e-06, "loss": 0.2318, "step": 16560 }, { "epoch": 6.899708697461507, "grad_norm": 0.23828125, "learning_rate": 1.6349756721779348e-06, "loss": 0.2304, "step": 16580 }, { "epoch": 6.90803162713275, "grad_norm": 0.28125, "learning_rate": 1.6273140052091097e-06, "loss": 0.2345, "step": 16600 }, { "epoch": 6.90803162713275, "eval_main_loss": 0.2384311705827713, "eval_main_runtime": 6.3138, "eval_main_samples_per_second": 30.093, "eval_main_steps_per_second": 3.801, "step": 16600 }, { "epoch": 6.90803162713275, "eval_anatomy_loss": 2.832714319229126, "eval_anatomy_runtime": 0.2643, "eval_anatomy_samples_per_second": 7.566, "eval_anatomy_steps_per_second": 3.783, "step": 16600 }, { "epoch": 6.90803162713275, "eval_college_mathematics_loss": 2.0529298782348633, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.543, "eval_college_mathematics_steps_per_second": 3.772, "step": 16600 }, { "epoch": 6.90803162713275, "eval_international_law_loss": 3.064915180206299, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.497, "eval_international_law_steps_per_second": 3.748, "step": 16600 }, { "epoch": 6.916354556803995, "grad_norm": 0.306640625, "learning_rate": 1.6196616606855194e-06, "loss": 0.231, "step": 16620 }, { "epoch": 6.92467748647524, "grad_norm": 0.265625, "learning_rate": 1.6120187203531351e-06, "loss": 0.2332, "step": 16640 }, { "epoch": 6.933000416146483, "grad_norm": 0.2451171875, "learning_rate": 1.6043852658574666e-06, "loss": 0.2305, "step": 16660 }, { "epoch": 6.941323345817728, "grad_norm": 0.318359375, "learning_rate": 1.596761378742689e-06, "loss": 0.2346, "step": 16680 }, { "epoch": 6.949646275488972, "grad_norm": 0.291015625, "learning_rate": 1.589147140450778e-06, "loss": 0.2263, "step": 16700 }, { "epoch": 6.949646275488972, "eval_main_loss": 0.23828622698783875, "eval_main_runtime": 6.3112, "eval_main_samples_per_second": 30.105, "eval_main_steps_per_second": 3.803, "step": 16700 }, { "epoch": 6.949646275488972, "eval_anatomy_loss": 2.833873748779297, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.497, "eval_anatomy_steps_per_second": 3.748, "step": 16700 }, { "epoch": 6.949646275488972, "eval_college_mathematics_loss": 2.0528018474578857, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.534, "eval_college_mathematics_steps_per_second": 3.767, "step": 16700 }, { "epoch": 6.949646275488972, "eval_international_law_loss": 3.0665361881256104, "eval_international_law_runtime": 0.2655, "eval_international_law_samples_per_second": 7.533, "eval_international_law_steps_per_second": 3.766, "step": 16700 }, { "epoch": 6.957969205160216, "grad_norm": 0.271484375, "learning_rate": 1.5815426323206345e-06, "loss": 0.2324, "step": 16720 }, { "epoch": 6.966292134831461, "grad_norm": 0.2421875, "learning_rate": 1.5739479355872162e-06, "loss": 0.2307, "step": 16740 }, { "epoch": 6.974615064502705, "grad_norm": 0.2421875, "learning_rate": 1.5663631313806726e-06, "loss": 0.235, "step": 16760 }, { "epoch": 6.982937994173949, "grad_norm": 0.212890625, "learning_rate": 1.5587883007254741e-06, "loss": 0.2266, "step": 16780 }, { "epoch": 6.991260923845194, "grad_norm": 0.265625, "learning_rate": 1.5512235245395514e-06, "loss": 0.2316, "step": 16800 }, { "epoch": 6.991260923845194, "eval_main_loss": 0.23832102119922638, "eval_main_runtime": 6.3531, "eval_main_samples_per_second": 29.907, "eval_main_steps_per_second": 3.778, "step": 16800 }, { "epoch": 6.991260923845194, "eval_anatomy_loss": 2.8318288326263428, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.741, "step": 16800 }, { "epoch": 6.991260923845194, "eval_college_mathematics_loss": 2.0499653816223145, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.505, "eval_college_mathematics_steps_per_second": 3.752, "step": 16800 }, { "epoch": 6.991260923845194, "eval_international_law_loss": 3.06691312789917, "eval_international_law_runtime": 0.2681, "eval_international_law_samples_per_second": 7.46, "eval_international_law_steps_per_second": 3.73, "step": 16800 }, { "epoch": 6.999583853516437, "grad_norm": 0.251953125, "learning_rate": 1.5436688836334274e-06, "loss": 0.2314, "step": 16820 }, { "epoch": 7.007906783187682, "grad_norm": 0.294921875, "learning_rate": 1.5361244587093551e-06, "loss": 0.2344, "step": 16840 }, { "epoch": 7.016229712858927, "grad_norm": 0.2421875, "learning_rate": 1.528590330360456e-06, "loss": 0.2314, "step": 16860 }, { "epoch": 7.02455264253017, "grad_norm": 0.2890625, "learning_rate": 1.5210665790698592e-06, "loss": 0.2334, "step": 16880 }, { "epoch": 7.032875572201415, "grad_norm": 0.279296875, "learning_rate": 1.513553285209838e-06, "loss": 0.2308, "step": 16900 }, { "epoch": 7.032875572201415, "eval_main_loss": 0.2382553517818451, "eval_main_runtime": 6.3599, "eval_main_samples_per_second": 29.875, "eval_main_steps_per_second": 3.774, "step": 16900 }, { "epoch": 7.032875572201415, "eval_anatomy_loss": 2.832185983657837, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.476, "eval_anatomy_steps_per_second": 3.738, "step": 16900 }, { "epoch": 7.032875572201415, "eval_college_mathematics_loss": 2.0515077114105225, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 16900 }, { "epoch": 7.032875572201415, "eval_international_law_loss": 3.0670435428619385, "eval_international_law_runtime": 0.2682, "eval_international_law_samples_per_second": 7.458, "eval_international_law_steps_per_second": 3.729, "step": 16900 }, { "epoch": 7.0411985018726595, "grad_norm": 0.31640625, "learning_rate": 1.5060505290409594e-06, "loss": 0.237, "step": 16920 }, { "epoch": 7.049521431543903, "grad_norm": 0.27734375, "learning_rate": 1.4985583907112188e-06, "loss": 0.2303, "step": 16940 }, { "epoch": 7.057844361215148, "grad_norm": 0.26953125, "learning_rate": 1.491076950255186e-06, "loss": 0.2309, "step": 16960 }, { "epoch": 7.066167290886392, "grad_norm": 0.255859375, "learning_rate": 1.4836062875931534e-06, "loss": 0.2307, "step": 16980 }, { "epoch": 7.074490220557636, "grad_norm": 0.275390625, "learning_rate": 1.4761464825302788e-06, "loss": 0.2316, "step": 17000 }, { "epoch": 7.074490220557636, "eval_main_loss": 0.2383512556552887, "eval_main_runtime": 6.3399, "eval_main_samples_per_second": 29.969, "eval_main_steps_per_second": 3.786, "step": 17000 }, { "epoch": 7.074490220557636, "eval_anatomy_loss": 2.8314664363861084, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.481, "eval_anatomy_steps_per_second": 3.74, "step": 17000 }, { "epoch": 7.074490220557636, "eval_college_mathematics_loss": 2.0510685443878174, "eval_college_mathematics_runtime": 0.2673, "eval_college_mathematics_samples_per_second": 7.482, "eval_college_mathematics_steps_per_second": 3.741, "step": 17000 }, { "epoch": 7.074490220557636, "eval_international_law_loss": 3.0641367435455322, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.504, "eval_international_law_steps_per_second": 3.752, "step": 17000 }, { "epoch": 7.082813150228881, "grad_norm": 0.291015625, "learning_rate": 1.4686976147557332e-06, "loss": 0.2291, "step": 17020 }, { "epoch": 7.091136079900124, "grad_norm": 0.279296875, "learning_rate": 1.461259763841853e-06, "loss": 0.2321, "step": 17040 }, { "epoch": 7.099459009571369, "grad_norm": 0.291015625, "learning_rate": 1.4538330092432828e-06, "loss": 0.2324, "step": 17060 }, { "epoch": 7.107781939242614, "grad_norm": 0.294921875, "learning_rate": 1.4464174302961343e-06, "loss": 0.2357, "step": 17080 }, { "epoch": 7.116104868913857, "grad_norm": 0.28125, "learning_rate": 1.4390131062171378e-06, "loss": 0.2319, "step": 17100 }, { "epoch": 7.116104868913857, "eval_main_loss": 0.2382993996143341, "eval_main_runtime": 6.329, "eval_main_samples_per_second": 30.021, "eval_main_steps_per_second": 3.792, "step": 17100 }, { "epoch": 7.116104868913857, "eval_anatomy_loss": 2.833508014678955, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.506, "eval_anatomy_steps_per_second": 3.753, "step": 17100 }, { "epoch": 7.116104868913857, "eval_college_mathematics_loss": 2.0484731197357178, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.538, "eval_college_mathematics_steps_per_second": 3.769, "step": 17100 }, { "epoch": 7.116104868913857, "eval_international_law_loss": 3.0654804706573486, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.508, "eval_international_law_steps_per_second": 3.754, "step": 17100 }, { "epoch": 7.124427798585102, "grad_norm": 0.2890625, "learning_rate": 1.4316201161027865e-06, "loss": 0.227, "step": 17120 }, { "epoch": 7.1327507282563465, "grad_norm": 0.259765625, "learning_rate": 1.4242385389285068e-06, "loss": 0.2312, "step": 17140 }, { "epoch": 7.14107365792759, "grad_norm": 0.310546875, "learning_rate": 1.416868453547802e-06, "loss": 0.2329, "step": 17160 }, { "epoch": 7.149396587598835, "grad_norm": 0.255859375, "learning_rate": 1.4095099386914146e-06, "loss": 0.2376, "step": 17180 }, { "epoch": 7.157719517270079, "grad_norm": 0.294921875, "learning_rate": 1.402163072966488e-06, "loss": 0.2379, "step": 17200 }, { "epoch": 7.157719517270079, "eval_main_loss": 0.2382867932319641, "eval_main_runtime": 6.3286, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 17200 }, { "epoch": 7.157719517270079, "eval_anatomy_loss": 2.8338112831115723, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.491, "eval_anatomy_steps_per_second": 3.746, "step": 17200 }, { "epoch": 7.157719517270079, "eval_college_mathematics_loss": 2.050143241882324, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.532, "eval_college_mathematics_steps_per_second": 3.766, "step": 17200 }, { "epoch": 7.157719517270079, "eval_international_law_loss": 3.067023754119873, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 17200 }, { "epoch": 7.166042446941323, "grad_norm": 0.29296875, "learning_rate": 1.3948279348557225e-06, "loss": 0.234, "step": 17220 }, { "epoch": 7.174365376612568, "grad_norm": 0.244140625, "learning_rate": 1.3875046027165376e-06, "loss": 0.2318, "step": 17240 }, { "epoch": 7.182688306283812, "grad_norm": 0.294921875, "learning_rate": 1.380193154780236e-06, "loss": 0.2343, "step": 17260 }, { "epoch": 7.191011235955056, "grad_norm": 0.2421875, "learning_rate": 1.3728936691511704e-06, "loss": 0.2304, "step": 17280 }, { "epoch": 7.199334165626301, "grad_norm": 0.271484375, "learning_rate": 1.3656062238059035e-06, "loss": 0.2359, "step": 17300 }, { "epoch": 7.199334165626301, "eval_main_loss": 0.23832285404205322, "eval_main_runtime": 6.3287, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 17300 }, { "epoch": 7.199334165626301, "eval_anatomy_loss": 2.833261251449585, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.757, "step": 17300 }, { "epoch": 7.199334165626301, "eval_college_mathematics_loss": 2.0532398223876953, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.497, "eval_college_mathematics_steps_per_second": 3.748, "step": 17300 }, { "epoch": 7.199334165626301, "eval_international_law_loss": 3.065138816833496, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.51, "eval_international_law_steps_per_second": 3.755, "step": 17300 }, { "epoch": 7.207657095297545, "grad_norm": 0.251953125, "learning_rate": 1.3583308965923778e-06, "loss": 0.2289, "step": 17320 }, { "epoch": 7.215980024968789, "grad_norm": 0.26171875, "learning_rate": 1.3510677652290882e-06, "loss": 0.2337, "step": 17340 }, { "epoch": 7.2243029546400335, "grad_norm": 0.2314453125, "learning_rate": 1.3438169073042415e-06, "loss": 0.2328, "step": 17360 }, { "epoch": 7.232625884311277, "grad_norm": 0.275390625, "learning_rate": 1.3365784002749393e-06, "loss": 0.2327, "step": 17380 }, { "epoch": 7.240948813982522, "grad_norm": 0.2734375, "learning_rate": 1.3293523214663428e-06, "loss": 0.2336, "step": 17400 }, { "epoch": 7.240948813982522, "eval_main_loss": 0.23834027349948883, "eval_main_runtime": 6.3246, "eval_main_samples_per_second": 30.041, "eval_main_steps_per_second": 3.795, "step": 17400 }, { "epoch": 7.240948813982522, "eval_anatomy_loss": 2.8343632221221924, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.53, "eval_anatomy_steps_per_second": 3.765, "step": 17400 }, { "epoch": 7.240948813982522, "eval_college_mathematics_loss": 2.052730083465576, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.503, "eval_college_mathematics_steps_per_second": 3.752, "step": 17400 }, { "epoch": 7.240948813982522, "eval_international_law_loss": 3.066589117050171, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 17400 }, { "epoch": 7.249271743653766, "grad_norm": 0.25390625, "learning_rate": 1.3221387480708475e-06, "loss": 0.2331, "step": 17420 }, { "epoch": 7.25759467332501, "grad_norm": 0.271484375, "learning_rate": 1.3149377571472655e-06, "loss": 0.2362, "step": 17440 }, { "epoch": 7.265917602996255, "grad_norm": 0.248046875, "learning_rate": 1.3077494256199892e-06, "loss": 0.2297, "step": 17460 }, { "epoch": 7.274240532667499, "grad_norm": 0.29296875, "learning_rate": 1.3005738302781839e-06, "loss": 0.2319, "step": 17480 }, { "epoch": 7.282563462338743, "grad_norm": 0.29296875, "learning_rate": 1.2934110477749584e-06, "loss": 0.2295, "step": 17500 }, { "epoch": 7.282563462338743, "eval_main_loss": 0.23828759789466858, "eval_main_runtime": 6.3264, "eval_main_samples_per_second": 30.033, "eval_main_steps_per_second": 3.794, "step": 17500 }, { "epoch": 7.282563462338743, "eval_anatomy_loss": 2.8331925868988037, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.513, "eval_anatomy_steps_per_second": 3.756, "step": 17500 }, { "epoch": 7.282563462338743, "eval_college_mathematics_loss": 2.0508506298065186, "eval_college_mathematics_runtime": 0.2652, "eval_college_mathematics_samples_per_second": 7.542, "eval_college_mathematics_steps_per_second": 3.771, "step": 17500 }, { "epoch": 7.282563462338743, "eval_international_law_loss": 3.0639867782592773, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.532, "eval_international_law_steps_per_second": 3.766, "step": 17500 }, { "epoch": 7.290886392009988, "grad_norm": 0.263671875, "learning_rate": 1.2862611546265469e-06, "loss": 0.2287, "step": 17520 }, { "epoch": 7.299209321681232, "grad_norm": 0.27734375, "learning_rate": 1.279124227211498e-06, "loss": 0.2313, "step": 17540 }, { "epoch": 7.307532251352476, "grad_norm": 0.2890625, "learning_rate": 1.2720003417698506e-06, "loss": 0.2324, "step": 17560 }, { "epoch": 7.3158551810237205, "grad_norm": 0.298828125, "learning_rate": 1.2648895744023223e-06, "loss": 0.2323, "step": 17580 }, { "epoch": 7.324178110694964, "grad_norm": 0.318359375, "learning_rate": 1.2577920010695015e-06, "loss": 0.233, "step": 17600 }, { "epoch": 7.324178110694964, "eval_main_loss": 0.2383326143026352, "eval_main_runtime": 6.331, "eval_main_samples_per_second": 30.011, "eval_main_steps_per_second": 3.791, "step": 17600 }, { "epoch": 7.324178110694964, "eval_anatomy_loss": 2.833402633666992, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 17600 }, { "epoch": 7.324178110694964, "eval_college_mathematics_loss": 2.052766799926758, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.511, "eval_college_mathematics_steps_per_second": 3.755, "step": 17600 }, { "epoch": 7.324178110694964, "eval_international_law_loss": 3.0673341751098633, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.756, "step": 17600 }, { "epoch": 7.332501040366209, "grad_norm": 0.279296875, "learning_rate": 1.2507076975910276e-06, "loss": 0.2342, "step": 17620 }, { "epoch": 7.340823970037453, "grad_norm": 0.322265625, "learning_rate": 1.243636739644787e-06, "loss": 0.2301, "step": 17640 }, { "epoch": 7.349146899708697, "grad_norm": 0.28515625, "learning_rate": 1.236579202766102e-06, "loss": 0.2286, "step": 17660 }, { "epoch": 7.357469829379942, "grad_norm": 0.275390625, "learning_rate": 1.229535162346924e-06, "loss": 0.2294, "step": 17680 }, { "epoch": 7.365792759051186, "grad_norm": 0.35546875, "learning_rate": 1.2225046936350324e-06, "loss": 0.2349, "step": 17700 }, { "epoch": 7.365792759051186, "eval_main_loss": 0.2383442372083664, "eval_main_runtime": 6.3529, "eval_main_samples_per_second": 29.908, "eval_main_steps_per_second": 3.778, "step": 17700 }, { "epoch": 7.365792759051186, "eval_anatomy_loss": 2.8332571983337402, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.493, "eval_anatomy_steps_per_second": 3.746, "step": 17700 }, { "epoch": 7.365792759051186, "eval_college_mathematics_loss": 2.0507707595825195, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 17700 }, { "epoch": 7.365792759051186, "eval_international_law_loss": 3.0661237239837646, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.488, "eval_international_law_steps_per_second": 3.744, "step": 17700 }, { "epoch": 7.37411568872243, "grad_norm": 0.2890625, "learning_rate": 1.2154878717332235e-06, "loss": 0.2334, "step": 17720 }, { "epoch": 7.3824386183936745, "grad_norm": 0.25, "learning_rate": 1.208484771598513e-06, "loss": 0.2292, "step": 17740 }, { "epoch": 7.390761548064919, "grad_norm": 0.29296875, "learning_rate": 1.2014954680413334e-06, "loss": 0.2273, "step": 17760 }, { "epoch": 7.399084477736163, "grad_norm": 0.279296875, "learning_rate": 1.1945200357247386e-06, "loss": 0.2305, "step": 17780 }, { "epoch": 7.407407407407407, "grad_norm": 0.271484375, "learning_rate": 1.1875585491636e-06, "loss": 0.2317, "step": 17800 }, { "epoch": 7.407407407407407, "eval_main_loss": 0.23827992379665375, "eval_main_runtime": 6.3476, "eval_main_samples_per_second": 29.932, "eval_main_steps_per_second": 3.781, "step": 17800 }, { "epoch": 7.407407407407407, "eval_anatomy_loss": 2.832733631134033, "eval_anatomy_runtime": 0.2672, "eval_anatomy_samples_per_second": 7.484, "eval_anatomy_steps_per_second": 3.742, "step": 17800 }, { "epoch": 7.407407407407407, "eval_college_mathematics_loss": 2.052572250366211, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.489, "eval_college_mathematics_steps_per_second": 3.744, "step": 17800 }, { "epoch": 7.407407407407407, "eval_international_law_loss": 3.064929723739624, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.484, "eval_international_law_steps_per_second": 3.742, "step": 17800 }, { "epoch": 7.415730337078652, "grad_norm": 0.27734375, "learning_rate": 1.180611082723814e-06, "loss": 0.2316, "step": 17820 }, { "epoch": 7.424053266749896, "grad_norm": 0.28515625, "learning_rate": 1.1736777106215118e-06, "loss": 0.2336, "step": 17840 }, { "epoch": 7.43237619642114, "grad_norm": 0.255859375, "learning_rate": 1.1667585069222554e-06, "loss": 0.2324, "step": 17860 }, { "epoch": 7.440699126092385, "grad_norm": 0.271484375, "learning_rate": 1.1598535455402584e-06, "loss": 0.2301, "step": 17880 }, { "epoch": 7.449022055763629, "grad_norm": 0.30859375, "learning_rate": 1.15296290023759e-06, "loss": 0.2282, "step": 17900 }, { "epoch": 7.449022055763629, "eval_main_loss": 0.2383432686328888, "eval_main_runtime": 6.356, "eval_main_samples_per_second": 29.893, "eval_main_steps_per_second": 3.776, "step": 17900 }, { "epoch": 7.449022055763629, "eval_anatomy_loss": 2.833138942718506, "eval_anatomy_runtime": 0.2678, "eval_anatomy_samples_per_second": 7.468, "eval_anatomy_steps_per_second": 3.734, "step": 17900 }, { "epoch": 7.449022055763629, "eval_college_mathematics_loss": 2.052255868911743, "eval_college_mathematics_runtime": 0.2683, "eval_college_mathematics_samples_per_second": 7.454, "eval_college_mathematics_steps_per_second": 3.727, "step": 17900 }, { "epoch": 7.449022055763629, "eval_international_law_loss": 3.065342664718628, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.519, "eval_international_law_steps_per_second": 3.76, "step": 17900 }, { "epoch": 7.457344985434873, "grad_norm": 0.283203125, "learning_rate": 1.1460866446233857e-06, "loss": 0.2326, "step": 17920 }, { "epoch": 7.465667915106117, "grad_norm": 0.26953125, "learning_rate": 1.1392248521530692e-06, "loss": 0.2321, "step": 17940 }, { "epoch": 7.4739908447773615, "grad_norm": 0.3125, "learning_rate": 1.132377596127554e-06, "loss": 0.2294, "step": 17960 }, { "epoch": 7.482313774448606, "grad_norm": 0.283203125, "learning_rate": 1.1255449496924767e-06, "loss": 0.2333, "step": 17980 }, { "epoch": 7.49063670411985, "grad_norm": 0.267578125, "learning_rate": 1.1187269858374017e-06, "loss": 0.2293, "step": 18000 }, { "epoch": 7.49063670411985, "eval_main_loss": 0.2383362054824829, "eval_main_runtime": 6.3387, "eval_main_samples_per_second": 29.974, "eval_main_steps_per_second": 3.786, "step": 18000 }, { "epoch": 7.49063670411985, "eval_anatomy_loss": 2.832285165786743, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.492, "eval_anatomy_steps_per_second": 3.746, "step": 18000 }, { "epoch": 7.49063670411985, "eval_college_mathematics_loss": 2.0504801273345947, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.54, "eval_college_mathematics_steps_per_second": 3.77, "step": 18000 }, { "epoch": 7.49063670411985, "eval_international_law_loss": 3.0625662803649902, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.524, "eval_international_law_steps_per_second": 3.762, "step": 18000 }, { "epoch": 7.498959633791094, "grad_norm": 0.279296875, "learning_rate": 1.1119237773950484e-06, "loss": 0.2286, "step": 18020 }, { "epoch": 7.507282563462339, "grad_norm": 0.2431640625, "learning_rate": 1.1051353970405142e-06, "loss": 0.2315, "step": 18040 }, { "epoch": 7.515605493133583, "grad_norm": 0.341796875, "learning_rate": 1.0983619172904935e-06, "loss": 0.2284, "step": 18060 }, { "epoch": 7.523928422804827, "grad_norm": 0.3046875, "learning_rate": 1.0916034105025052e-06, "loss": 0.2282, "step": 18080 }, { "epoch": 7.532251352476072, "grad_norm": 0.248046875, "learning_rate": 1.0848599488741208e-06, "loss": 0.2277, "step": 18100 }, { "epoch": 7.532251352476072, "eval_main_loss": 0.2383170872926712, "eval_main_runtime": 6.3286, "eval_main_samples_per_second": 30.023, "eval_main_steps_per_second": 3.792, "step": 18100 }, { "epoch": 7.532251352476072, "eval_anatomy_loss": 2.834378480911255, "eval_anatomy_runtime": 0.2677, "eval_anatomy_samples_per_second": 7.472, "eval_anatomy_steps_per_second": 3.736, "step": 18100 }, { "epoch": 7.532251352476072, "eval_college_mathematics_loss": 2.0512096881866455, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.501, "eval_college_mathematics_steps_per_second": 3.75, "step": 18100 }, { "epoch": 7.532251352476072, "eval_international_law_loss": 3.066354513168335, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.51, "eval_international_law_steps_per_second": 3.755, "step": 18100 }, { "epoch": 7.540574282147316, "grad_norm": 0.3203125, "learning_rate": 1.078131604442193e-06, "loss": 0.2295, "step": 18120 }, { "epoch": 7.54889721181856, "grad_norm": 0.310546875, "learning_rate": 1.0714184490820842e-06, "loss": 0.2296, "step": 18140 }, { "epoch": 7.557220141489804, "grad_norm": 0.2890625, "learning_rate": 1.0647205545068992e-06, "loss": 0.2311, "step": 18160 }, { "epoch": 7.5655430711610485, "grad_norm": 0.259765625, "learning_rate": 1.0580379922667241e-06, "loss": 0.2333, "step": 18180 }, { "epoch": 7.573866000832293, "grad_norm": 0.3046875, "learning_rate": 1.0513708337478509e-06, "loss": 0.2304, "step": 18200 }, { "epoch": 7.573866000832293, "eval_main_loss": 0.2383020520210266, "eval_main_runtime": 6.3287, "eval_main_samples_per_second": 30.022, "eval_main_steps_per_second": 3.792, "step": 18200 }, { "epoch": 7.573866000832293, "eval_anatomy_loss": 2.834932804107666, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.496, "eval_anatomy_steps_per_second": 3.748, "step": 18200 }, { "epoch": 7.573866000832293, "eval_college_mathematics_loss": 2.0513229370117188, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.762, "step": 18200 }, { "epoch": 7.573866000832293, "eval_international_law_loss": 3.0660512447357178, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.504, "eval_international_law_steps_per_second": 3.752, "step": 18200 }, { "epoch": 7.582188930503538, "grad_norm": 0.279296875, "learning_rate": 1.044719150172028e-06, "loss": 0.2259, "step": 18220 }, { "epoch": 7.590511860174781, "grad_norm": 0.3125, "learning_rate": 1.0380830125956897e-06, "loss": 0.2273, "step": 18240 }, { "epoch": 7.598834789846026, "grad_norm": 0.3515625, "learning_rate": 1.0314624919092011e-06, "loss": 0.2365, "step": 18260 }, { "epoch": 7.60715771951727, "grad_norm": 0.322265625, "learning_rate": 1.024857658836102e-06, "loss": 0.2327, "step": 18280 }, { "epoch": 7.615480649188514, "grad_norm": 0.294921875, "learning_rate": 1.0182685839323475e-06, "loss": 0.232, "step": 18300 }, { "epoch": 7.615480649188514, "eval_main_loss": 0.23833827674388885, "eval_main_runtime": 6.3315, "eval_main_samples_per_second": 30.009, "eval_main_steps_per_second": 3.791, "step": 18300 }, { "epoch": 7.615480649188514, "eval_anatomy_loss": 2.8317782878875732, "eval_anatomy_runtime": 0.2681, "eval_anatomy_samples_per_second": 7.46, "eval_anatomy_steps_per_second": 3.73, "step": 18300 }, { "epoch": 7.615480649188514, "eval_college_mathematics_loss": 2.0530128479003906, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.489, "eval_college_mathematics_steps_per_second": 3.744, "step": 18300 }, { "epoch": 7.615480649188514, "eval_international_law_loss": 3.064087152481079, "eval_international_law_runtime": 0.2649, "eval_international_law_samples_per_second": 7.551, "eval_international_law_steps_per_second": 3.776, "step": 18300 }, { "epoch": 7.623803578859759, "grad_norm": 0.2734375, "learning_rate": 1.0116953375855565e-06, "loss": 0.2331, "step": 18320 }, { "epoch": 7.632126508531003, "grad_norm": 0.2138671875, "learning_rate": 1.0051379900142635e-06, "loss": 0.2336, "step": 18340 }, { "epoch": 7.640449438202247, "grad_norm": 0.298828125, "learning_rate": 9.98596611267158e-07, "loss": 0.2308, "step": 18360 }, { "epoch": 7.648772367873492, "grad_norm": 0.267578125, "learning_rate": 9.920712712223494e-07, "loss": 0.2258, "step": 18380 }, { "epoch": 7.6570952975447355, "grad_norm": 0.28125, "learning_rate": 9.855620395866107e-07, "loss": 0.2336, "step": 18400 }, { "epoch": 7.6570952975447355, "eval_main_loss": 0.23823559284210205, "eval_main_runtime": 6.3201, "eval_main_samples_per_second": 30.063, "eval_main_steps_per_second": 3.797, "step": 18400 }, { "epoch": 7.6570952975447355, "eval_anatomy_loss": 2.834606409072876, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 18400 }, { "epoch": 7.6570952975447355, "eval_college_mathematics_loss": 2.0500612258911133, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 18400 }, { "epoch": 7.6570952975447355, "eval_international_law_loss": 3.0666239261627197, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.757, "step": 18400 }, { "epoch": 7.66541822721598, "grad_norm": 0.26953125, "learning_rate": 9.790689858946374e-07, "loss": 0.2301, "step": 18420 }, { "epoch": 7.673741156887225, "grad_norm": 0.2490234375, "learning_rate": 9.725921795083063e-07, "loss": 0.2327, "step": 18440 }, { "epoch": 7.682064086558468, "grad_norm": 0.2578125, "learning_rate": 9.661316896159313e-07, "loss": 0.2304, "step": 18460 }, { "epoch": 7.690387016229713, "grad_norm": 0.263671875, "learning_rate": 9.59687585231526e-07, "loss": 0.2282, "step": 18480 }, { "epoch": 7.698709945900957, "grad_norm": 0.287109375, "learning_rate": 9.53259935194066e-07, "loss": 0.2313, "step": 18500 }, { "epoch": 7.698709945900957, "eval_main_loss": 0.23836714029312134, "eval_main_runtime": 6.351, "eval_main_samples_per_second": 29.917, "eval_main_steps_per_second": 3.779, "step": 18500 }, { "epoch": 7.698709945900957, "eval_anatomy_loss": 2.8336453437805176, "eval_anatomy_runtime": 0.2666, "eval_anatomy_samples_per_second": 7.502, "eval_anatomy_steps_per_second": 3.751, "step": 18500 }, { "epoch": 7.698709945900957, "eval_college_mathematics_loss": 2.0492594242095947, "eval_college_mathematics_runtime": 0.2688, "eval_college_mathematics_samples_per_second": 7.442, "eval_college_mathematics_steps_per_second": 3.721, "step": 18500 }, { "epoch": 7.698709945900957, "eval_international_law_loss": 3.0663108825683594, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.489, "eval_international_law_steps_per_second": 3.745, "step": 18500 }, { "epoch": 7.707032875572201, "grad_norm": 0.302734375, "learning_rate": 9.468488081667537e-07, "loss": 0.2357, "step": 18520 }, { "epoch": 7.715355805243446, "grad_norm": 0.3515625, "learning_rate": 9.404542726362872e-07, "loss": 0.231, "step": 18540 }, { "epoch": 7.72367873491469, "grad_norm": 0.30078125, "learning_rate": 9.34076396912123e-07, "loss": 0.234, "step": 18560 }, { "epoch": 7.732001664585934, "grad_norm": 0.2412109375, "learning_rate": 9.277152491257515e-07, "loss": 0.2341, "step": 18580 }, { "epoch": 7.740324594257179, "grad_norm": 0.296875, "learning_rate": 9.213708972299662e-07, "loss": 0.2339, "step": 18600 }, { "epoch": 7.740324594257179, "eval_main_loss": 0.23831741511821747, "eval_main_runtime": 6.3514, "eval_main_samples_per_second": 29.915, "eval_main_steps_per_second": 3.779, "step": 18600 }, { "epoch": 7.740324594257179, "eval_anatomy_loss": 2.8322861194610596, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.479, "eval_anatomy_steps_per_second": 3.74, "step": 18600 }, { "epoch": 7.740324594257179, "eval_college_mathematics_loss": 2.050147771835327, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.49, "eval_college_mathematics_steps_per_second": 3.745, "step": 18600 }, { "epoch": 7.740324594257179, "eval_international_law_loss": 3.066422700881958, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.512, "eval_international_law_steps_per_second": 3.756, "step": 18600 }, { "epoch": 7.7486475239284225, "grad_norm": 0.271484375, "learning_rate": 9.150434089981413e-07, "loss": 0.234, "step": 18620 }, { "epoch": 7.756970453599667, "grad_norm": 0.2578125, "learning_rate": 9.087328520235028e-07, "loss": 0.2348, "step": 18640 }, { "epoch": 7.765293383270912, "grad_norm": 0.275390625, "learning_rate": 9.024392937184096e-07, "loss": 0.2315, "step": 18660 }, { "epoch": 7.773616312942155, "grad_norm": 0.26953125, "learning_rate": 8.961628013136351e-07, "loss": 0.2295, "step": 18680 }, { "epoch": 7.7819392426134, "grad_norm": 0.259765625, "learning_rate": 8.899034418576413e-07, "loss": 0.2329, "step": 18700 }, { "epoch": 7.7819392426134, "eval_main_loss": 0.2382706105709076, "eval_main_runtime": 6.3343, "eval_main_samples_per_second": 29.996, "eval_main_steps_per_second": 3.789, "step": 18700 }, { "epoch": 7.7819392426134, "eval_anatomy_loss": 2.832164764404297, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.514, "eval_anatomy_steps_per_second": 3.757, "step": 18700 }, { "epoch": 7.7819392426134, "eval_college_mathematics_loss": 2.0492961406707764, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.491, "eval_college_mathematics_steps_per_second": 3.745, "step": 18700 }, { "epoch": 7.7819392426134, "eval_international_law_loss": 3.065026044845581, "eval_international_law_runtime": 0.2654, "eval_international_law_samples_per_second": 7.537, "eval_international_law_steps_per_second": 3.768, "step": 18700 }, { "epoch": 7.790262172284645, "grad_norm": 0.25, "learning_rate": 8.836612822158743e-07, "loss": 0.2279, "step": 18720 }, { "epoch": 7.798585101955888, "grad_norm": 0.318359375, "learning_rate": 8.774363890700394e-07, "loss": 0.2337, "step": 18740 }, { "epoch": 7.806908031627133, "grad_norm": 0.234375, "learning_rate": 8.712288289173937e-07, "loss": 0.2348, "step": 18760 }, { "epoch": 7.8152309612983775, "grad_norm": 0.263671875, "learning_rate": 8.650386680700373e-07, "loss": 0.2285, "step": 18780 }, { "epoch": 7.823553890969621, "grad_norm": 0.26171875, "learning_rate": 8.588659726541998e-07, "loss": 0.2324, "step": 18800 }, { "epoch": 7.823553890969621, "eval_main_loss": 0.23833107948303223, "eval_main_runtime": 6.3262, "eval_main_samples_per_second": 30.034, "eval_main_steps_per_second": 3.794, "step": 18800 }, { "epoch": 7.823553890969621, "eval_anatomy_loss": 2.8310635089874268, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.522, "eval_anatomy_steps_per_second": 3.761, "step": 18800 }, { "epoch": 7.823553890969621, "eval_college_mathematics_loss": 2.0509285926818848, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 18800 }, { "epoch": 7.823553890969621, "eval_international_law_loss": 3.065098285675049, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 18800 }, { "epoch": 7.831876820640866, "grad_norm": 0.306640625, "learning_rate": 8.527108086095375e-07, "loss": 0.232, "step": 18820 }, { "epoch": 7.8401997503121095, "grad_norm": 0.2890625, "learning_rate": 8.465732416884312e-07, "loss": 0.2316, "step": 18840 }, { "epoch": 7.848522679983354, "grad_norm": 0.259765625, "learning_rate": 8.404533374552751e-07, "loss": 0.233, "step": 18860 }, { "epoch": 7.856845609654599, "grad_norm": 0.314453125, "learning_rate": 8.343511612857879e-07, "loss": 0.2333, "step": 18880 }, { "epoch": 7.865168539325842, "grad_norm": 0.298828125, "learning_rate": 8.282667783663056e-07, "loss": 0.235, "step": 18900 }, { "epoch": 7.865168539325842, "eval_main_loss": 0.23828402161598206, "eval_main_runtime": 6.3357, "eval_main_samples_per_second": 29.989, "eval_main_steps_per_second": 3.788, "step": 18900 }, { "epoch": 7.865168539325842, "eval_anatomy_loss": 2.832879066467285, "eval_anatomy_runtime": 0.2655, "eval_anatomy_samples_per_second": 7.532, "eval_anatomy_steps_per_second": 3.766, "step": 18900 }, { "epoch": 7.865168539325842, "eval_college_mathematics_loss": 2.05184006690979, "eval_college_mathematics_runtime": 0.2659, "eval_college_mathematics_samples_per_second": 7.521, "eval_college_mathematics_steps_per_second": 3.761, "step": 18900 }, { "epoch": 7.865168539325842, "eval_international_law_loss": 3.065366506576538, "eval_international_law_runtime": 0.2648, "eval_international_law_samples_per_second": 7.553, "eval_international_law_steps_per_second": 3.776, "step": 18900 }, { "epoch": 7.873491468997087, "grad_norm": 0.2890625, "learning_rate": 8.222002536930887e-07, "loss": 0.2314, "step": 18920 }, { "epoch": 7.881814398668332, "grad_norm": 0.26171875, "learning_rate": 8.161516520716287e-07, "loss": 0.2328, "step": 18940 }, { "epoch": 7.890137328339575, "grad_norm": 0.279296875, "learning_rate": 8.101210381159533e-07, "loss": 0.229, "step": 18960 }, { "epoch": 7.89846025801082, "grad_norm": 0.267578125, "learning_rate": 8.041084762479376e-07, "loss": 0.2307, "step": 18980 }, { "epoch": 7.9067831876820645, "grad_norm": 0.291015625, "learning_rate": 7.981140306966154e-07, "loss": 0.2328, "step": 19000 }, { "epoch": 7.9067831876820645, "eval_main_loss": 0.23827558755874634, "eval_main_runtime": 6.3308, "eval_main_samples_per_second": 30.012, "eval_main_steps_per_second": 3.791, "step": 19000 }, { "epoch": 7.9067831876820645, "eval_anatomy_loss": 2.8324079513549805, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 19000 }, { "epoch": 7.9067831876820645, "eval_college_mathematics_loss": 2.050251007080078, "eval_college_mathematics_runtime": 0.266, "eval_college_mathematics_samples_per_second": 7.519, "eval_college_mathematics_steps_per_second": 3.76, "step": 19000 }, { "epoch": 7.9067831876820645, "eval_international_law_loss": 3.0664069652557373, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.478, "eval_international_law_steps_per_second": 3.739, "step": 19000 }, { "epoch": 7.915106117353308, "grad_norm": 0.306640625, "learning_rate": 7.921377654974955e-07, "loss": 0.2349, "step": 19020 }, { "epoch": 7.923429047024553, "grad_norm": 0.30859375, "learning_rate": 7.861797444918731e-07, "loss": 0.2338, "step": 19040 }, { "epoch": 7.9317519766957965, "grad_norm": 0.265625, "learning_rate": 7.802400313261505e-07, "loss": 0.2378, "step": 19060 }, { "epoch": 7.940074906367041, "grad_norm": 0.302734375, "learning_rate": 7.743186894511603e-07, "loss": 0.2277, "step": 19080 }, { "epoch": 7.948397836038286, "grad_norm": 0.318359375, "learning_rate": 7.684157821214783e-07, "loss": 0.2305, "step": 19100 }, { "epoch": 7.948397836038286, "eval_main_loss": 0.23835448920726776, "eval_main_runtime": 6.3314, "eval_main_samples_per_second": 30.009, "eval_main_steps_per_second": 3.791, "step": 19100 }, { "epoch": 7.948397836038286, "eval_anatomy_loss": 2.8330698013305664, "eval_anatomy_runtime": 0.2647, "eval_anatomy_samples_per_second": 7.555, "eval_anatomy_steps_per_second": 3.777, "step": 19100 }, { "epoch": 7.948397836038286, "eval_college_mathematics_loss": 2.0542187690734863, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.537, "eval_college_mathematics_steps_per_second": 3.769, "step": 19100 }, { "epoch": 7.948397836038286, "eval_international_law_loss": 3.0674057006835938, "eval_international_law_runtime": 0.2667, "eval_international_law_samples_per_second": 7.499, "eval_international_law_steps_per_second": 3.749, "step": 19100 }, { "epoch": 7.95672076570953, "grad_norm": 0.2060546875, "learning_rate": 7.625313723947592e-07, "loss": 0.2358, "step": 19120 }, { "epoch": 7.965043695380774, "grad_norm": 0.279296875, "learning_rate": 7.566655231310551e-07, "loss": 0.2339, "step": 19140 }, { "epoch": 7.9733666250520185, "grad_norm": 0.26171875, "learning_rate": 7.508182969921463e-07, "loss": 0.2359, "step": 19160 }, { "epoch": 7.981689554723262, "grad_norm": 0.333984375, "learning_rate": 7.449897564408743e-07, "loss": 0.2312, "step": 19180 }, { "epoch": 7.990012484394507, "grad_norm": 0.287109375, "learning_rate": 7.391799637404675e-07, "loss": 0.2328, "step": 19200 }, { "epoch": 7.990012484394507, "eval_main_loss": 0.238310769200325, "eval_main_runtime": 6.3251, "eval_main_samples_per_second": 30.039, "eval_main_steps_per_second": 3.794, "step": 19200 }, { "epoch": 7.990012484394507, "eval_anatomy_loss": 2.834007501602173, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.518, "eval_anatomy_steps_per_second": 3.759, "step": 19200 }, { "epoch": 7.990012484394507, "eval_college_mathematics_loss": 2.0495834350585938, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.528, "eval_college_mathematics_steps_per_second": 3.764, "step": 19200 }, { "epoch": 7.990012484394507, "eval_international_law_loss": 3.065659523010254, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.484, "eval_international_law_steps_per_second": 3.742, "step": 19200 }, { "epoch": 7.998335414065751, "grad_norm": 0.232421875, "learning_rate": 7.333889809538869e-07, "loss": 0.2292, "step": 19220 }, { "epoch": 8.006658343736996, "grad_norm": 0.263671875, "learning_rate": 7.276168699431527e-07, "loss": 0.2287, "step": 19240 }, { "epoch": 8.014981273408239, "grad_norm": 0.333984375, "learning_rate": 7.218636923686889e-07, "loss": 0.2308, "step": 19260 }, { "epoch": 8.023304203079483, "grad_norm": 0.3046875, "learning_rate": 7.16129509688665e-07, "loss": 0.2346, "step": 19280 }, { "epoch": 8.031627132750728, "grad_norm": 0.263671875, "learning_rate": 7.104143831583368e-07, "loss": 0.2292, "step": 19300 }, { "epoch": 8.031627132750728, "eval_main_loss": 0.2382253110408783, "eval_main_runtime": 6.3318, "eval_main_samples_per_second": 30.007, "eval_main_steps_per_second": 3.79, "step": 19300 }, { "epoch": 8.031627132750728, "eval_anatomy_loss": 2.833739757537842, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.505, "eval_anatomy_steps_per_second": 3.752, "step": 19300 }, { "epoch": 8.031627132750728, "eval_college_mathematics_loss": 2.053969144821167, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.484, "eval_college_mathematics_steps_per_second": 3.742, "step": 19300 }, { "epoch": 8.031627132750728, "eval_international_law_loss": 3.0664596557617188, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.503, "eval_international_law_steps_per_second": 3.751, "step": 19300 }, { "epoch": 8.039950062421973, "grad_norm": 0.23046875, "learning_rate": 7.047183738293933e-07, "loss": 0.2291, "step": 19320 }, { "epoch": 8.048272992093217, "grad_norm": 0.294921875, "learning_rate": 6.990415425493039e-07, "loss": 0.2317, "step": 19340 }, { "epoch": 8.056595921764462, "grad_norm": 0.28125, "learning_rate": 6.933839499606709e-07, "loss": 0.2325, "step": 19360 }, { "epoch": 8.064918851435705, "grad_norm": 0.26171875, "learning_rate": 6.877456565005783e-07, "loss": 0.2344, "step": 19380 }, { "epoch": 8.07324178110695, "grad_norm": 0.244140625, "learning_rate": 6.82126722399948e-07, "loss": 0.2284, "step": 19400 }, { "epoch": 8.07324178110695, "eval_main_loss": 0.23833835124969482, "eval_main_runtime": 6.3288, "eval_main_samples_per_second": 30.021, "eval_main_steps_per_second": 3.792, "step": 19400 }, { "epoch": 8.07324178110695, "eval_anatomy_loss": 2.831571102142334, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.519, "eval_anatomy_steps_per_second": 3.76, "step": 19400 }, { "epoch": 8.07324178110695, "eval_college_mathematics_loss": 2.052187442779541, "eval_college_mathematics_runtime": 0.2652, "eval_college_mathematics_samples_per_second": 7.542, "eval_college_mathematics_steps_per_second": 3.771, "step": 19400 }, { "epoch": 8.07324178110695, "eval_international_law_loss": 3.0645389556884766, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.516, "eval_international_law_steps_per_second": 3.758, "step": 19400 }, { "epoch": 8.081564710778194, "grad_norm": 0.25390625, "learning_rate": 6.765272076828961e-07, "loss": 0.2323, "step": 19420 }, { "epoch": 8.089887640449438, "grad_norm": 0.265625, "learning_rate": 6.709471721660904e-07, "loss": 0.2336, "step": 19440 }, { "epoch": 8.098210570120683, "grad_norm": 0.294921875, "learning_rate": 6.653866754581159e-07, "loss": 0.2299, "step": 19460 }, { "epoch": 8.106533499791928, "grad_norm": 0.30859375, "learning_rate": 6.598457769588315e-07, "loss": 0.2282, "step": 19480 }, { "epoch": 8.11485642946317, "grad_norm": 0.23828125, "learning_rate": 6.5432453585874e-07, "loss": 0.2324, "step": 19500 }, { "epoch": 8.11485642946317, "eval_main_loss": 0.23833340406417847, "eval_main_runtime": 6.3274, "eval_main_samples_per_second": 30.028, "eval_main_steps_per_second": 3.793, "step": 19500 }, { "epoch": 8.11485642946317, "eval_anatomy_loss": 2.832037925720215, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.514, "eval_anatomy_steps_per_second": 3.757, "step": 19500 }, { "epoch": 8.11485642946317, "eval_college_mathematics_loss": 2.0525174140930176, "eval_college_mathematics_runtime": 0.2659, "eval_college_mathematics_samples_per_second": 7.521, "eval_college_mathematics_steps_per_second": 3.76, "step": 19500 }, { "epoch": 8.11485642946317, "eval_international_law_loss": 3.0670900344848633, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 19500 }, { "epoch": 8.123179359134415, "grad_norm": 0.25390625, "learning_rate": 6.488230111383553e-07, "loss": 0.2317, "step": 19520 }, { "epoch": 8.13150228880566, "grad_norm": 0.3125, "learning_rate": 6.433412615675705e-07, "loss": 0.2324, "step": 19540 }, { "epoch": 8.139825218476904, "grad_norm": 0.265625, "learning_rate": 6.378793457050306e-07, "loss": 0.234, "step": 19560 }, { "epoch": 8.148148148148149, "grad_norm": 0.2451171875, "learning_rate": 6.324373218975105e-07, "loss": 0.234, "step": 19580 }, { "epoch": 8.156471077819392, "grad_norm": 0.283203125, "learning_rate": 6.270152482792844e-07, "loss": 0.2307, "step": 19600 }, { "epoch": 8.156471077819392, "eval_main_loss": 0.23835553228855133, "eval_main_runtime": 6.3341, "eval_main_samples_per_second": 29.996, "eval_main_steps_per_second": 3.789, "step": 19600 }, { "epoch": 8.156471077819392, "eval_anatomy_loss": 2.833484649658203, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.496, "eval_anatomy_steps_per_second": 3.748, "step": 19600 }, { "epoch": 8.156471077819392, "eval_college_mathematics_loss": 2.054138422012329, "eval_college_mathematics_runtime": 0.2646, "eval_college_mathematics_samples_per_second": 7.558, "eval_college_mathematics_steps_per_second": 3.779, "step": 19600 }, { "epoch": 8.156471077819392, "eval_international_law_loss": 3.066187620162964, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.478, "eval_international_law_steps_per_second": 3.739, "step": 19600 }, { "epoch": 8.164794007490636, "grad_norm": 0.30859375, "learning_rate": 6.21613182771513e-07, "loss": 0.2277, "step": 19620 }, { "epoch": 8.17311693716188, "grad_norm": 0.26953125, "learning_rate": 6.162311830816187e-07, "loss": 0.2276, "step": 19640 }, { "epoch": 8.181439866833125, "grad_norm": 0.31640625, "learning_rate": 6.108693067026713e-07, "loss": 0.2329, "step": 19660 }, { "epoch": 8.18976279650437, "grad_norm": 0.291015625, "learning_rate": 6.05527610912777e-07, "loss": 0.2341, "step": 19680 }, { "epoch": 8.198085726175615, "grad_norm": 0.28515625, "learning_rate": 6.002061527744573e-07, "loss": 0.2313, "step": 19700 }, { "epoch": 8.198085726175615, "eval_main_loss": 0.23825512826442719, "eval_main_runtime": 6.325, "eval_main_samples_per_second": 30.04, "eval_main_steps_per_second": 3.794, "step": 19700 }, { "epoch": 8.198085726175615, "eval_anatomy_loss": 2.831984519958496, "eval_anatomy_runtime": 0.2658, "eval_anatomy_samples_per_second": 7.523, "eval_anatomy_steps_per_second": 3.762, "step": 19700 }, { "epoch": 8.198085726175615, "eval_college_mathematics_loss": 2.0513973236083984, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.528, "eval_college_mathematics_steps_per_second": 3.764, "step": 19700 }, { "epoch": 8.198085726175615, "eval_international_law_loss": 3.0651326179504395, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 19700 }, { "epoch": 8.206408655846857, "grad_norm": 0.32421875, "learning_rate": 5.94904989134052e-07, "loss": 0.2367, "step": 19720 }, { "epoch": 8.214731585518102, "grad_norm": 0.296875, "learning_rate": 5.896241766211011e-07, "loss": 0.232, "step": 19740 }, { "epoch": 8.223054515189347, "grad_norm": 0.31640625, "learning_rate": 5.843637716477454e-07, "loss": 0.2336, "step": 19760 }, { "epoch": 8.231377444860591, "grad_norm": 0.318359375, "learning_rate": 5.791238304081245e-07, "loss": 0.235, "step": 19780 }, { "epoch": 8.239700374531836, "grad_norm": 0.26953125, "learning_rate": 5.73904408877772e-07, "loss": 0.2298, "step": 19800 }, { "epoch": 8.239700374531836, "eval_main_loss": 0.23827609419822693, "eval_main_runtime": 6.324, "eval_main_samples_per_second": 30.044, "eval_main_steps_per_second": 3.795, "step": 19800 }, { "epoch": 8.239700374531836, "eval_anatomy_loss": 2.8319296836853027, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.51, "eval_anatomy_steps_per_second": 3.755, "step": 19800 }, { "epoch": 8.239700374531836, "eval_college_mathematics_loss": 2.051198720932007, "eval_college_mathematics_runtime": 0.2656, "eval_college_mathematics_samples_per_second": 7.531, "eval_college_mathematics_steps_per_second": 3.765, "step": 19800 }, { "epoch": 8.239700374531836, "eval_international_law_loss": 3.0645523071289062, "eval_international_law_runtime": 0.2649, "eval_international_law_samples_per_second": 7.551, "eval_international_law_steps_per_second": 3.775, "step": 19800 }, { "epoch": 8.24802330420308, "grad_norm": 0.29296875, "learning_rate": 5.687055628130219e-07, "loss": 0.2318, "step": 19820 }, { "epoch": 8.256346233874323, "grad_norm": 0.2373046875, "learning_rate": 5.6352734775041e-07, "loss": 0.231, "step": 19840 }, { "epoch": 8.264669163545568, "grad_norm": 0.27734375, "learning_rate": 5.58369819006084e-07, "loss": 0.2331, "step": 19860 }, { "epoch": 8.272992093216812, "grad_norm": 0.33203125, "learning_rate": 5.532330316752091e-07, "loss": 0.2343, "step": 19880 }, { "epoch": 8.281315022888057, "grad_norm": 0.298828125, "learning_rate": 5.481170406313799e-07, "loss": 0.2327, "step": 19900 }, { "epoch": 8.281315022888057, "eval_main_loss": 0.23821409046649933, "eval_main_runtime": 6.3564, "eval_main_samples_per_second": 29.891, "eval_main_steps_per_second": 3.776, "step": 19900 }, { "epoch": 8.281315022888057, "eval_anatomy_loss": 2.8317759037017822, "eval_anatomy_runtime": 0.2667, "eval_anatomy_samples_per_second": 7.5, "eval_anatomy_steps_per_second": 3.75, "step": 19900 }, { "epoch": 8.281315022888057, "eval_college_mathematics_loss": 2.051119804382324, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.497, "eval_college_mathematics_steps_per_second": 3.748, "step": 19900 }, { "epoch": 8.281315022888057, "eval_international_law_loss": 3.0645089149475098, "eval_international_law_runtime": 0.2665, "eval_international_law_samples_per_second": 7.506, "eval_international_law_steps_per_second": 3.753, "step": 19900 }, { "epoch": 8.289637952559302, "grad_norm": 0.28125, "learning_rate": 5.430219005260387e-07, "loss": 0.2335, "step": 19920 }, { "epoch": 8.297960882230544, "grad_norm": 0.201171875, "learning_rate": 5.379476657878834e-07, "loss": 0.2349, "step": 19940 }, { "epoch": 8.306283811901789, "grad_norm": 0.302734375, "learning_rate": 5.328943906222955e-07, "loss": 0.2327, "step": 19960 }, { "epoch": 8.314606741573034, "grad_norm": 0.25390625, "learning_rate": 5.278621290107533e-07, "loss": 0.2318, "step": 19980 }, { "epoch": 8.322929671244278, "grad_norm": 0.26171875, "learning_rate": 5.228509347102593e-07, "loss": 0.2314, "step": 20000 }, { "epoch": 8.322929671244278, "eval_main_loss": 0.2383708357810974, "eval_main_runtime": 6.3506, "eval_main_samples_per_second": 29.918, "eval_main_steps_per_second": 3.779, "step": 20000 }, { "epoch": 8.322929671244278, "eval_anatomy_loss": 2.8334150314331055, "eval_anatomy_runtime": 0.2681, "eval_anatomy_samples_per_second": 7.459, "eval_anatomy_steps_per_second": 3.73, "step": 20000 }, { "epoch": 8.322929671244278, "eval_college_mathematics_loss": 2.049489974975586, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 20000 }, { "epoch": 8.322929671244278, "eval_international_law_loss": 3.0655791759490967, "eval_international_law_runtime": 0.2674, "eval_international_law_samples_per_second": 7.48, "eval_international_law_steps_per_second": 3.74, "step": 20000 }, { "epoch": 8.331252600915523, "grad_norm": 0.30859375, "learning_rate": 5.178608612527663e-07, "loss": 0.2332, "step": 20020 }, { "epoch": 8.339575530586767, "grad_norm": 0.2431640625, "learning_rate": 5.12891961944601e-07, "loss": 0.2299, "step": 20040 }, { "epoch": 8.34789846025801, "grad_norm": 0.279296875, "learning_rate": 5.079442898659017e-07, "loss": 0.2326, "step": 20060 }, { "epoch": 8.356221389929255, "grad_norm": 0.265625, "learning_rate": 5.030178978700448e-07, "loss": 0.2295, "step": 20080 }, { "epoch": 8.3645443196005, "grad_norm": 0.24609375, "learning_rate": 4.98112838583083e-07, "loss": 0.2306, "step": 20100 }, { "epoch": 8.3645443196005, "eval_main_loss": 0.23833905160427094, "eval_main_runtime": 6.343, "eval_main_samples_per_second": 29.954, "eval_main_steps_per_second": 3.784, "step": 20100 }, { "epoch": 8.3645443196005, "eval_anatomy_loss": 2.832882881164551, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 20100 }, { "epoch": 8.3645443196005, "eval_college_mathematics_loss": 2.0530946254730225, "eval_college_mathematics_runtime": 0.2677, "eval_college_mathematics_samples_per_second": 7.472, "eval_college_mathematics_steps_per_second": 3.736, "step": 20100 }, { "epoch": 8.3645443196005, "eval_international_law_loss": 3.066558599472046, "eval_international_law_runtime": 0.2659, "eval_international_law_samples_per_second": 7.522, "eval_international_law_steps_per_second": 3.761, "step": 20100 }, { "epoch": 8.372867249271744, "grad_norm": 0.29296875, "learning_rate": 4.932291644031844e-07, "loss": 0.232, "step": 20120 }, { "epoch": 8.381190178942989, "grad_norm": 0.287109375, "learning_rate": 4.883669275000699e-07, "loss": 0.2289, "step": 20140 }, { "epoch": 8.389513108614231, "grad_norm": 0.265625, "learning_rate": 4.835261798144569e-07, "loss": 0.2359, "step": 20160 }, { "epoch": 8.397836038285476, "grad_norm": 0.25390625, "learning_rate": 4.787069730575067e-07, "loss": 0.2288, "step": 20180 }, { "epoch": 8.40615896795672, "grad_norm": 0.267578125, "learning_rate": 4.739093587102686e-07, "loss": 0.2314, "step": 20200 }, { "epoch": 8.40615896795672, "eval_main_loss": 0.23831138014793396, "eval_main_runtime": 6.3296, "eval_main_samples_per_second": 30.018, "eval_main_steps_per_second": 3.792, "step": 20200 }, { "epoch": 8.40615896795672, "eval_anatomy_loss": 2.833329916000366, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.519, "eval_anatomy_steps_per_second": 3.759, "step": 20200 }, { "epoch": 8.40615896795672, "eval_college_mathematics_loss": 2.0539867877960205, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.488, "eval_college_mathematics_steps_per_second": 3.744, "step": 20200 }, { "epoch": 8.40615896795672, "eval_international_law_loss": 3.067291498184204, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.757, "step": 20200 }, { "epoch": 8.414481897627965, "grad_norm": 0.232421875, "learning_rate": 4.6913338802313165e-07, "loss": 0.227, "step": 20220 }, { "epoch": 8.42280482729921, "grad_norm": 0.318359375, "learning_rate": 4.6437911201527686e-07, "loss": 0.2312, "step": 20240 }, { "epoch": 8.431127756970454, "grad_norm": 0.330078125, "learning_rate": 4.596465814741341e-07, "loss": 0.2295, "step": 20260 }, { "epoch": 8.439450686641697, "grad_norm": 0.236328125, "learning_rate": 4.5493584695483593e-07, "loss": 0.2321, "step": 20280 }, { "epoch": 8.447773616312942, "grad_norm": 0.306640625, "learning_rate": 4.502469587796807e-07, "loss": 0.2317, "step": 20300 }, { "epoch": 8.447773616312942, "eval_main_loss": 0.23827072978019714, "eval_main_runtime": 6.3259, "eval_main_samples_per_second": 30.035, "eval_main_steps_per_second": 3.794, "step": 20300 }, { "epoch": 8.447773616312942, "eval_anatomy_loss": 2.834202766418457, "eval_anatomy_runtime": 0.2657, "eval_anatomy_samples_per_second": 7.527, "eval_anatomy_steps_per_second": 3.764, "step": 20300 }, { "epoch": 8.447773616312942, "eval_college_mathematics_loss": 2.0520122051239014, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 20300 }, { "epoch": 8.447773616312942, "eval_international_law_loss": 3.065819025039673, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.759, "step": 20300 }, { "epoch": 8.456096545984186, "grad_norm": 0.26171875, "learning_rate": 4.4557996703759295e-07, "loss": 0.2337, "step": 20320 }, { "epoch": 8.464419475655431, "grad_norm": 0.267578125, "learning_rate": 4.409349215835887e-07, "loss": 0.2304, "step": 20340 }, { "epoch": 8.472742405326676, "grad_norm": 0.27734375, "learning_rate": 4.363118720382456e-07, "loss": 0.2306, "step": 20360 }, { "epoch": 8.481065334997918, "grad_norm": 0.263671875, "learning_rate": 4.317108677871687e-07, "loss": 0.2262, "step": 20380 }, { "epoch": 8.489388264669163, "grad_norm": 0.275390625, "learning_rate": 4.271319579804639e-07, "loss": 0.2297, "step": 20400 }, { "epoch": 8.489388264669163, "eval_main_loss": 0.23833975195884705, "eval_main_runtime": 6.3573, "eval_main_samples_per_second": 29.887, "eval_main_steps_per_second": 3.775, "step": 20400 }, { "epoch": 8.489388264669163, "eval_anatomy_loss": 2.8326468467712402, "eval_anatomy_runtime": 0.2676, "eval_anatomy_samples_per_second": 7.475, "eval_anatomy_steps_per_second": 3.738, "step": 20400 }, { "epoch": 8.489388264669163, "eval_college_mathematics_loss": 2.0523431301116943, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.5, "eval_college_mathematics_steps_per_second": 3.75, "step": 20400 }, { "epoch": 8.489388264669163, "eval_international_law_loss": 3.0675792694091797, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.512, "eval_international_law_steps_per_second": 3.756, "step": 20400 }, { "epoch": 8.497711194340408, "grad_norm": 0.26171875, "learning_rate": 4.2257519153221736e-07, "loss": 0.2366, "step": 20420 }, { "epoch": 8.506034124011652, "grad_norm": 0.349609375, "learning_rate": 4.180406171199644e-07, "loss": 0.2301, "step": 20440 }, { "epoch": 8.514357053682897, "grad_norm": 0.2734375, "learning_rate": 4.1352828318417915e-07, "loss": 0.2337, "step": 20460 }, { "epoch": 8.522679983354141, "grad_norm": 0.265625, "learning_rate": 4.090382379277499e-07, "loss": 0.2325, "step": 20480 }, { "epoch": 8.531002913025384, "grad_norm": 0.255859375, "learning_rate": 4.045705293154664e-07, "loss": 0.2335, "step": 20500 }, { "epoch": 8.531002913025384, "eval_main_loss": 0.2383665144443512, "eval_main_runtime": 6.3513, "eval_main_samples_per_second": 29.915, "eval_main_steps_per_second": 3.779, "step": 20500 }, { "epoch": 8.531002913025384, "eval_anatomy_loss": 2.8339719772338867, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.507, "eval_anatomy_steps_per_second": 3.753, "step": 20500 }, { "epoch": 8.531002913025384, "eval_college_mathematics_loss": 2.0505967140197754, "eval_college_mathematics_runtime": 0.2672, "eval_college_mathematics_samples_per_second": 7.486, "eval_college_mathematics_steps_per_second": 3.743, "step": 20500 }, { "epoch": 8.531002913025384, "eval_international_law_loss": 3.065483808517456, "eval_international_law_runtime": 0.2682, "eval_international_law_samples_per_second": 7.457, "eval_international_law_steps_per_second": 3.729, "step": 20500 }, { "epoch": 8.539325842696629, "grad_norm": 0.251953125, "learning_rate": 4.001252050735102e-07, "loss": 0.2313, "step": 20520 }, { "epoch": 8.547648772367873, "grad_norm": 0.2109375, "learning_rate": 3.9570231268893975e-07, "loss": 0.2308, "step": 20540 }, { "epoch": 8.555971702039118, "grad_norm": 0.314453125, "learning_rate": 3.9130189940918745e-07, "loss": 0.2313, "step": 20560 }, { "epoch": 8.564294631710363, "grad_norm": 0.310546875, "learning_rate": 3.869240122415521e-07, "loss": 0.233, "step": 20580 }, { "epoch": 8.572617561381607, "grad_norm": 0.2265625, "learning_rate": 3.8256869795269824e-07, "loss": 0.2285, "step": 20600 }, { "epoch": 8.572617561381607, "eval_main_loss": 0.2381974309682846, "eval_main_runtime": 6.3331, "eval_main_samples_per_second": 30.001, "eval_main_steps_per_second": 3.79, "step": 20600 }, { "epoch": 8.572617561381607, "eval_anatomy_loss": 2.835426092147827, "eval_anatomy_runtime": 0.2654, "eval_anatomy_samples_per_second": 7.537, "eval_anatomy_steps_per_second": 3.769, "step": 20600 }, { "epoch": 8.572617561381607, "eval_college_mathematics_loss": 2.0525310039520264, "eval_college_mathematics_runtime": 0.2646, "eval_college_mathematics_samples_per_second": 7.559, "eval_college_mathematics_steps_per_second": 3.78, "step": 20600 }, { "epoch": 8.572617561381607, "eval_international_law_loss": 3.064969539642334, "eval_international_law_runtime": 0.265, "eval_international_law_samples_per_second": 7.547, "eval_international_law_steps_per_second": 3.774, "step": 20600 }, { "epoch": 8.58094049105285, "grad_norm": 0.2578125, "learning_rate": 3.782360030681578e-07, "loss": 0.2336, "step": 20620 }, { "epoch": 8.589263420724095, "grad_norm": 0.283203125, "learning_rate": 3.73925973871829e-07, "loss": 0.2336, "step": 20640 }, { "epoch": 8.59758635039534, "grad_norm": 0.314453125, "learning_rate": 3.696386564054863e-07, "loss": 0.2337, "step": 20660 }, { "epoch": 8.605909280066584, "grad_norm": 0.2412109375, "learning_rate": 3.6537409646828505e-07, "loss": 0.2329, "step": 20680 }, { "epoch": 8.614232209737828, "grad_norm": 0.248046875, "learning_rate": 3.611323396162758e-07, "loss": 0.2354, "step": 20700 }, { "epoch": 8.614232209737828, "eval_main_loss": 0.23833750188350677, "eval_main_runtime": 6.3553, "eval_main_samples_per_second": 29.896, "eval_main_steps_per_second": 3.776, "step": 20700 }, { "epoch": 8.614232209737828, "eval_anatomy_loss": 2.8340907096862793, "eval_anatomy_runtime": 0.2683, "eval_anatomy_samples_per_second": 7.455, "eval_anatomy_steps_per_second": 3.728, "step": 20700 }, { "epoch": 8.614232209737828, "eval_college_mathematics_loss": 2.0527515411376953, "eval_college_mathematics_runtime": 0.2665, "eval_college_mathematics_samples_per_second": 7.504, "eval_college_mathematics_steps_per_second": 3.752, "step": 20700 }, { "epoch": 8.614232209737828, "eval_international_law_loss": 3.065247058868408, "eval_international_law_runtime": 0.2686, "eval_international_law_samples_per_second": 7.447, "eval_international_law_steps_per_second": 3.724, "step": 20700 }, { "epoch": 8.622555139409073, "grad_norm": 0.27734375, "learning_rate": 3.569134311619146e-07, "loss": 0.2259, "step": 20720 }, { "epoch": 8.630878069080316, "grad_norm": 0.26953125, "learning_rate": 3.527174161735797e-07, "loss": 0.2324, "step": 20740 }, { "epoch": 8.63920099875156, "grad_norm": 0.28125, "learning_rate": 3.4854433947509256e-07, "loss": 0.2314, "step": 20760 }, { "epoch": 8.647523928422805, "grad_norm": 0.28125, "learning_rate": 3.4439424564523346e-07, "loss": 0.2357, "step": 20780 }, { "epoch": 8.65584685809405, "grad_norm": 0.234375, "learning_rate": 3.402671790172718e-07, "loss": 0.2318, "step": 20800 }, { "epoch": 8.65584685809405, "eval_main_loss": 0.23831304907798767, "eval_main_runtime": 6.3537, "eval_main_samples_per_second": 29.904, "eval_main_steps_per_second": 3.777, "step": 20800 }, { "epoch": 8.65584685809405, "eval_anatomy_loss": 2.8343677520751953, "eval_anatomy_runtime": 0.2677, "eval_anatomy_samples_per_second": 7.472, "eval_anatomy_steps_per_second": 3.736, "step": 20800 }, { "epoch": 8.65584685809405, "eval_college_mathematics_loss": 2.0545074939727783, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.501, "eval_college_mathematics_steps_per_second": 3.751, "step": 20800 }, { "epoch": 8.65584685809405, "eval_international_law_loss": 3.0653839111328125, "eval_international_law_runtime": 0.2681, "eval_international_law_samples_per_second": 7.461, "eval_international_law_steps_per_second": 3.731, "step": 20800 }, { "epoch": 8.664169787765294, "grad_norm": 0.333984375, "learning_rate": 3.361631836784898e-07, "loss": 0.2319, "step": 20820 }, { "epoch": 8.672492717436537, "grad_norm": 0.291015625, "learning_rate": 3.320823034697074e-07, "loss": 0.2332, "step": 20840 }, { "epoch": 8.680815647107782, "grad_norm": 0.28515625, "learning_rate": 3.280245819848224e-07, "loss": 0.2346, "step": 20860 }, { "epoch": 8.689138576779026, "grad_norm": 0.2333984375, "learning_rate": 3.239900625703374e-07, "loss": 0.2327, "step": 20880 }, { "epoch": 8.69746150645027, "grad_norm": 0.291015625, "learning_rate": 3.199787883248992e-07, "loss": 0.2333, "step": 20900 }, { "epoch": 8.69746150645027, "eval_main_loss": 0.23826073110103607, "eval_main_runtime": 6.3163, "eval_main_samples_per_second": 30.081, "eval_main_steps_per_second": 3.8, "step": 20900 }, { "epoch": 8.69746150645027, "eval_anatomy_loss": 2.8350343704223633, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.514, "eval_anatomy_steps_per_second": 3.757, "step": 20900 }, { "epoch": 8.69746150645027, "eval_college_mathematics_loss": 2.052778959274292, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.538, "eval_college_mathematics_steps_per_second": 3.769, "step": 20900 }, { "epoch": 8.69746150645027, "eval_international_law_loss": 3.064143180847168, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.523, "eval_international_law_steps_per_second": 3.762, "step": 20900 }, { "epoch": 8.705784436121515, "grad_norm": 0.24609375, "learning_rate": 3.15990802098842e-07, "loss": 0.2307, "step": 20920 }, { "epoch": 8.71410736579276, "grad_norm": 0.2431640625, "learning_rate": 3.12026146493721e-07, "loss": 0.2292, "step": 20940 }, { "epoch": 8.722430295464003, "grad_norm": 0.30078125, "learning_rate": 3.0808486386186804e-07, "loss": 0.2302, "step": 20960 }, { "epoch": 8.730753225135247, "grad_norm": 0.271484375, "learning_rate": 3.041669963059304e-07, "loss": 0.2284, "step": 20980 }, { "epoch": 8.739076154806492, "grad_norm": 0.31640625, "learning_rate": 3.0027258567842525e-07, "loss": 0.2315, "step": 21000 }, { "epoch": 8.739076154806492, "eval_main_loss": 0.23824970424175262, "eval_main_runtime": 6.3549, "eval_main_samples_per_second": 29.898, "eval_main_steps_per_second": 3.777, "step": 21000 }, { "epoch": 8.739076154806492, "eval_anatomy_loss": 2.8310306072235107, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.497, "eval_anatomy_steps_per_second": 3.749, "step": 21000 }, { "epoch": 8.739076154806492, "eval_college_mathematics_loss": 2.0544137954711914, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.488, "eval_college_mathematics_steps_per_second": 3.744, "step": 21000 }, { "epoch": 8.739076154806492, "eval_international_law_loss": 3.0671803951263428, "eval_international_law_runtime": 0.2679, "eval_international_law_samples_per_second": 7.465, "eval_international_law_steps_per_second": 3.733, "step": 21000 }, { "epoch": 8.747399084477737, "grad_norm": 0.28515625, "learning_rate": 2.9640167358129273e-07, "loss": 0.2344, "step": 21020 }, { "epoch": 8.755722014148981, "grad_norm": 0.30859375, "learning_rate": 2.9255430136544886e-07, "loss": 0.231, "step": 21040 }, { "epoch": 8.764044943820224, "grad_norm": 0.3203125, "learning_rate": 2.8873051013034695e-07, "loss": 0.2341, "step": 21060 }, { "epoch": 8.772367873491469, "grad_norm": 0.25, "learning_rate": 2.84930340723536e-07, "loss": 0.229, "step": 21080 }, { "epoch": 8.780690803162713, "grad_norm": 0.275390625, "learning_rate": 2.811538337402264e-07, "loss": 0.2285, "step": 21100 }, { "epoch": 8.780690803162713, "eval_main_loss": 0.23829315602779388, "eval_main_runtime": 6.3571, "eval_main_samples_per_second": 29.888, "eval_main_steps_per_second": 3.775, "step": 21100 }, { "epoch": 8.780690803162713, "eval_anatomy_loss": 2.8332481384277344, "eval_anatomy_runtime": 0.2673, "eval_anatomy_samples_per_second": 7.483, "eval_anatomy_steps_per_second": 3.742, "step": 21100 }, { "epoch": 8.780690803162713, "eval_college_mathematics_loss": 2.0525107383728027, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.512, "eval_college_mathematics_steps_per_second": 3.756, "step": 21100 }, { "epoch": 8.780690803162713, "eval_international_law_loss": 3.0667715072631836, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.507, "eval_international_law_steps_per_second": 3.754, "step": 21100 }, { "epoch": 8.789013732833958, "grad_norm": 0.306640625, "learning_rate": 2.774010295228549e-07, "loss": 0.2297, "step": 21120 }, { "epoch": 8.797336662505202, "grad_norm": 0.283203125, "learning_rate": 2.7367196816065397e-07, "loss": 0.2364, "step": 21140 }, { "epoch": 8.805659592176447, "grad_norm": 0.283203125, "learning_rate": 2.699666894892236e-07, "loss": 0.2285, "step": 21160 }, { "epoch": 8.81398252184769, "grad_norm": 0.291015625, "learning_rate": 2.662852330901053e-07, "loss": 0.24, "step": 21180 }, { "epoch": 8.822305451518934, "grad_norm": 0.27734375, "learning_rate": 2.6262763829036197e-07, "loss": 0.2316, "step": 21200 }, { "epoch": 8.822305451518934, "eval_main_loss": 0.23829114437103271, "eval_main_runtime": 6.3393, "eval_main_samples_per_second": 29.972, "eval_main_steps_per_second": 3.786, "step": 21200 }, { "epoch": 8.822305451518934, "eval_anatomy_loss": 2.832653045654297, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.515, "eval_anatomy_steps_per_second": 3.758, "step": 21200 }, { "epoch": 8.822305451518934, "eval_college_mathematics_loss": 2.0520215034484863, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.502, "eval_college_mathematics_steps_per_second": 3.751, "step": 21200 }, { "epoch": 8.822305451518934, "eval_international_law_loss": 3.0661661624908447, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.757, "step": 21200 }, { "epoch": 8.830628381190179, "grad_norm": 0.2734375, "learning_rate": 2.5899394416215305e-07, "loss": 0.2294, "step": 21220 }, { "epoch": 8.838951310861423, "grad_norm": 0.2421875, "learning_rate": 2.5538418952232054e-07, "loss": 0.2324, "step": 21240 }, { "epoch": 8.847274240532668, "grad_norm": 0.271484375, "learning_rate": 2.5179841293197476e-07, "loss": 0.2331, "step": 21260 }, { "epoch": 8.855597170203911, "grad_norm": 0.25, "learning_rate": 2.482366526960786e-07, "loss": 0.2328, "step": 21280 }, { "epoch": 8.863920099875156, "grad_norm": 0.28515625, "learning_rate": 2.446989468630434e-07, "loss": 0.2363, "step": 21300 }, { "epoch": 8.863920099875156, "eval_main_loss": 0.23828859627246857, "eval_main_runtime": 6.3239, "eval_main_samples_per_second": 30.045, "eval_main_steps_per_second": 3.795, "step": 21300 }, { "epoch": 8.863920099875156, "eval_anatomy_loss": 2.8329224586486816, "eval_anatomy_runtime": 0.267, "eval_anatomy_samples_per_second": 7.491, "eval_anatomy_steps_per_second": 3.746, "step": 21300 }, { "epoch": 8.863920099875156, "eval_college_mathematics_loss": 2.0518035888671875, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.527, "eval_college_mathematics_steps_per_second": 3.764, "step": 21300 }, { "epoch": 8.863920099875156, "eval_international_law_loss": 3.0643723011016846, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.528, "eval_international_law_steps_per_second": 3.764, "step": 21300 }, { "epoch": 8.8722430295464, "grad_norm": 0.27734375, "learning_rate": 2.411853332243183e-07, "loss": 0.2284, "step": 21320 }, { "epoch": 8.880565959217645, "grad_norm": 0.28515625, "learning_rate": 2.376958493139886e-07, "loss": 0.2363, "step": 21340 }, { "epoch": 8.88888888888889, "grad_norm": 0.234375, "learning_rate": 2.3423053240837518e-07, "loss": 0.2325, "step": 21360 }, { "epoch": 8.897211818560134, "grad_norm": 0.21875, "learning_rate": 2.3078941952563466e-07, "loss": 0.2328, "step": 21380 }, { "epoch": 8.905534748231377, "grad_norm": 0.29296875, "learning_rate": 2.2737254742536547e-07, "loss": 0.23, "step": 21400 }, { "epoch": 8.905534748231377, "eval_main_loss": 0.23842591047286987, "eval_main_runtime": 6.3216, "eval_main_samples_per_second": 30.056, "eval_main_steps_per_second": 3.796, "step": 21400 }, { "epoch": 8.905534748231377, "eval_anatomy_loss": 2.8358285427093506, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.509, "eval_anatomy_steps_per_second": 3.755, "step": 21400 }, { "epoch": 8.905534748231377, "eval_college_mathematics_loss": 2.0552380084991455, "eval_college_mathematics_runtime": 0.2662, "eval_college_mathematics_samples_per_second": 7.514, "eval_college_mathematics_steps_per_second": 3.757, "step": 21400 }, { "epoch": 8.905534748231377, "eval_international_law_loss": 3.0662267208099365, "eval_international_law_runtime": 0.2673, "eval_international_law_samples_per_second": 7.482, "eval_international_law_steps_per_second": 3.741, "step": 21400 }, { "epoch": 8.913857677902621, "grad_norm": 0.228515625, "learning_rate": 2.2397995260821342e-07, "loss": 0.2304, "step": 21420 }, { "epoch": 8.922180607573866, "grad_norm": 0.326171875, "learning_rate": 2.206116713154838e-07, "loss": 0.2309, "step": 21440 }, { "epoch": 8.93050353724511, "grad_norm": 0.263671875, "learning_rate": 2.172677395287537e-07, "loss": 0.2324, "step": 21460 }, { "epoch": 8.938826466916355, "grad_norm": 0.3046875, "learning_rate": 2.1394819296948616e-07, "loss": 0.2306, "step": 21480 }, { "epoch": 8.947149396587598, "grad_norm": 0.263671875, "learning_rate": 2.106530670986498e-07, "loss": 0.2323, "step": 21500 }, { "epoch": 8.947149396587598, "eval_main_loss": 0.23836477100849152, "eval_main_runtime": 6.3511, "eval_main_samples_per_second": 29.916, "eval_main_steps_per_second": 3.779, "step": 21500 }, { "epoch": 8.947149396587598, "eval_anatomy_loss": 2.8358511924743652, "eval_anatomy_runtime": 0.2678, "eval_anatomy_samples_per_second": 7.468, "eval_anatomy_steps_per_second": 3.734, "step": 21500 }, { "epoch": 8.947149396587598, "eval_college_mathematics_loss": 2.0521907806396484, "eval_college_mathematics_runtime": 0.2659, "eval_college_mathematics_samples_per_second": 7.521, "eval_college_mathematics_steps_per_second": 3.761, "step": 21500 }, { "epoch": 8.947149396587598, "eval_international_law_loss": 3.0672295093536377, "eval_international_law_runtime": 0.2671, "eval_international_law_samples_per_second": 7.486, "eval_international_law_steps_per_second": 3.743, "step": 21500 }, { "epoch": 8.955472326258842, "grad_norm": 0.28515625, "learning_rate": 2.0738239711634133e-07, "loss": 0.2317, "step": 21520 }, { "epoch": 8.963795255930087, "grad_norm": 0.2490234375, "learning_rate": 2.0413621796140647e-07, "loss": 0.2289, "step": 21540 }, { "epoch": 8.972118185601332, "grad_norm": 0.28515625, "learning_rate": 2.0091456431106854e-07, "loss": 0.2357, "step": 21560 }, { "epoch": 8.980441115272576, "grad_norm": 0.330078125, "learning_rate": 1.977174705805582e-07, "loss": 0.2299, "step": 21580 }, { "epoch": 8.98876404494382, "grad_norm": 0.2353515625, "learning_rate": 1.9454497092274565e-07, "loss": 0.2327, "step": 21600 }, { "epoch": 8.98876404494382, "eval_main_loss": 0.23839198052883148, "eval_main_runtime": 6.3485, "eval_main_samples_per_second": 29.928, "eval_main_steps_per_second": 3.78, "step": 21600 }, { "epoch": 8.98876404494382, "eval_anatomy_loss": 2.8339269161224365, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.495, "eval_anatomy_steps_per_second": 3.747, "step": 21600 }, { "epoch": 8.98876404494382, "eval_college_mathematics_loss": 2.0503244400024414, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.511, "eval_college_mathematics_steps_per_second": 3.756, "step": 21600 }, { "epoch": 8.98876404494382, "eval_international_law_loss": 3.066997766494751, "eval_international_law_runtime": 0.2675, "eval_international_law_samples_per_second": 7.477, "eval_international_law_steps_per_second": 3.739, "step": 21600 }, { "epoch": 8.997086974615064, "grad_norm": 0.3125, "learning_rate": 1.9139709922777528e-07, "loss": 0.234, "step": 21620 }, { "epoch": 9.005409904286308, "grad_norm": 0.294921875, "learning_rate": 1.8827388912270318e-07, "loss": 0.2279, "step": 21640 }, { "epoch": 9.013732833957553, "grad_norm": 0.3359375, "learning_rate": 1.8517537397114066e-07, "loss": 0.2288, "step": 21660 }, { "epoch": 9.022055763628797, "grad_norm": 0.275390625, "learning_rate": 1.8210158687289258e-07, "loss": 0.2344, "step": 21680 }, { "epoch": 9.030378693300042, "grad_norm": 0.267578125, "learning_rate": 1.7905256066361037e-07, "loss": 0.2313, "step": 21700 }, { "epoch": 9.030378693300042, "eval_main_loss": 0.23835183680057526, "eval_main_runtime": 6.3323, "eval_main_samples_per_second": 30.005, "eval_main_steps_per_second": 3.79, "step": 21700 }, { "epoch": 9.030378693300042, "eval_anatomy_loss": 2.832937479019165, "eval_anatomy_runtime": 0.2671, "eval_anatomy_samples_per_second": 7.487, "eval_anatomy_steps_per_second": 3.743, "step": 21700 }, { "epoch": 9.030378693300042, "eval_college_mathematics_loss": 2.048555612564087, "eval_college_mathematics_runtime": 0.2651, "eval_college_mathematics_samples_per_second": 7.545, "eval_college_mathematics_steps_per_second": 3.773, "step": 21700 }, { "epoch": 9.030378693300042, "eval_international_law_loss": 3.0660414695739746, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.519, "eval_international_law_steps_per_second": 3.759, "step": 21700 }, { "epoch": 9.038701622971287, "grad_norm": 0.255859375, "learning_rate": 1.7602832791443648e-07, "loss": 0.2293, "step": 21720 }, { "epoch": 9.04702455264253, "grad_norm": 0.275390625, "learning_rate": 1.7302892093165684e-07, "loss": 0.2289, "step": 21740 }, { "epoch": 9.055347482313774, "grad_norm": 0.328125, "learning_rate": 1.700543717563591e-07, "loss": 0.2313, "step": 21760 }, { "epoch": 9.063670411985019, "grad_norm": 0.2890625, "learning_rate": 1.6710471216408563e-07, "loss": 0.2315, "step": 21780 }, { "epoch": 9.071993341656263, "grad_norm": 0.2578125, "learning_rate": 1.641799736644986e-07, "loss": 0.2333, "step": 21800 }, { "epoch": 9.071993341656263, "eval_main_loss": 0.2383333146572113, "eval_main_runtime": 6.3262, "eval_main_samples_per_second": 30.034, "eval_main_steps_per_second": 3.794, "step": 21800 }, { "epoch": 9.071993341656263, "eval_anatomy_loss": 2.831967353820801, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.504, "eval_anatomy_steps_per_second": 3.752, "step": 21800 }, { "epoch": 9.071993341656263, "eval_college_mathematics_loss": 2.050198554992676, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 21800 }, { "epoch": 9.071993341656263, "eval_international_law_loss": 3.0654492378234863, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.503, "eval_international_law_steps_per_second": 3.751, "step": 21800 }, { "epoch": 9.080316271327508, "grad_norm": 0.333984375, "learning_rate": 1.6128018750103975e-07, "loss": 0.2335, "step": 21820 }, { "epoch": 9.088639200998752, "grad_norm": 0.24609375, "learning_rate": 1.5840538465059813e-07, "loss": 0.2288, "step": 21840 }, { "epoch": 9.096962130669995, "grad_norm": 0.26953125, "learning_rate": 1.555555958231808e-07, "loss": 0.2326, "step": 21860 }, { "epoch": 9.10528506034124, "grad_norm": 0.263671875, "learning_rate": 1.527308514615819e-07, "loss": 0.2301, "step": 21880 }, { "epoch": 9.113607990012484, "grad_norm": 0.31640625, "learning_rate": 1.4993118174105842e-07, "loss": 0.2275, "step": 21900 }, { "epoch": 9.113607990012484, "eval_main_loss": 0.23825402557849884, "eval_main_runtime": 6.3324, "eval_main_samples_per_second": 30.005, "eval_main_steps_per_second": 3.79, "step": 21900 }, { "epoch": 9.113607990012484, "eval_anatomy_loss": 2.833402395248413, "eval_anatomy_runtime": 0.2656, "eval_anatomy_samples_per_second": 7.529, "eval_anatomy_steps_per_second": 3.765, "step": 21900 }, { "epoch": 9.113607990012484, "eval_college_mathematics_loss": 2.0520825386047363, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.508, "eval_college_mathematics_steps_per_second": 3.754, "step": 21900 }, { "epoch": 9.113607990012484, "eval_international_law_loss": 3.0643093585968018, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.515, "eval_international_law_steps_per_second": 3.758, "step": 21900 }, { "epoch": 9.121930919683729, "grad_norm": 0.275390625, "learning_rate": 1.471566165690086e-07, "loss": 0.2317, "step": 21920 }, { "epoch": 9.130253849354974, "grad_norm": 0.25390625, "learning_rate": 1.4440718558465294e-07, "loss": 0.232, "step": 21940 }, { "epoch": 9.138576779026216, "grad_norm": 0.27734375, "learning_rate": 1.416829181587151e-07, "loss": 0.2346, "step": 21960 }, { "epoch": 9.146899708697461, "grad_norm": 0.212890625, "learning_rate": 1.3898384339311038e-07, "loss": 0.2318, "step": 21980 }, { "epoch": 9.155222638368706, "grad_norm": 0.28125, "learning_rate": 1.3630999012063467e-07, "loss": 0.2328, "step": 22000 }, { "epoch": 9.155222638368706, "eval_main_loss": 0.23840220272541046, "eval_main_runtime": 6.333, "eval_main_samples_per_second": 30.002, "eval_main_steps_per_second": 3.79, "step": 22000 }, { "epoch": 9.155222638368706, "eval_anatomy_loss": 2.8348910808563232, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.511, "eval_anatomy_steps_per_second": 3.755, "step": 22000 }, { "epoch": 9.155222638368706, "eval_college_mathematics_loss": 2.0505683422088623, "eval_college_mathematics_runtime": 0.2666, "eval_college_mathematics_samples_per_second": 7.501, "eval_college_mathematics_steps_per_second": 3.75, "step": 22000 }, { "epoch": 9.155222638368706, "eval_international_law_loss": 3.065016508102417, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.497, "eval_international_law_steps_per_second": 3.748, "step": 22000 }, { "epoch": 9.16354556803995, "grad_norm": 0.2734375, "learning_rate": 1.3366138690465437e-07, "loss": 0.2313, "step": 22020 }, { "epoch": 9.171868497711195, "grad_norm": 0.294921875, "learning_rate": 1.3103806203880493e-07, "loss": 0.2352, "step": 22040 }, { "epoch": 9.18019142738244, "grad_norm": 0.337890625, "learning_rate": 1.2844004354668483e-07, "loss": 0.2299, "step": 22060 }, { "epoch": 9.188514357053682, "grad_norm": 0.3046875, "learning_rate": 1.258673591815579e-07, "loss": 0.2313, "step": 22080 }, { "epoch": 9.196837286724927, "grad_norm": 0.267578125, "learning_rate": 1.2332003642605868e-07, "loss": 0.2304, "step": 22100 }, { "epoch": 9.196837286724927, "eval_main_loss": 0.2382725030183792, "eval_main_runtime": 6.3586, "eval_main_samples_per_second": 29.881, "eval_main_steps_per_second": 3.774, "step": 22100 }, { "epoch": 9.196837286724927, "eval_anatomy_loss": 2.8336524963378906, "eval_anatomy_runtime": 0.268, "eval_anatomy_samples_per_second": 7.462, "eval_anatomy_steps_per_second": 3.731, "step": 22100 }, { "epoch": 9.196837286724927, "eval_college_mathematics_loss": 2.0524938106536865, "eval_college_mathematics_runtime": 0.2678, "eval_college_mathematics_samples_per_second": 7.468, "eval_college_mathematics_steps_per_second": 3.734, "step": 22100 }, { "epoch": 9.196837286724927, "eval_international_law_loss": 3.0638139247894287, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.495, "eval_international_law_steps_per_second": 3.748, "step": 22100 }, { "epoch": 9.205160216396171, "grad_norm": 0.29296875, "learning_rate": 1.2079810249189415e-07, "loss": 0.234, "step": 22120 }, { "epoch": 9.213483146067416, "grad_norm": 0.2734375, "learning_rate": 1.1830158431955841e-07, "loss": 0.2319, "step": 22140 }, { "epoch": 9.22180607573866, "grad_norm": 0.283203125, "learning_rate": 1.1583050857804145e-07, "loss": 0.2295, "step": 22160 }, { "epoch": 9.230129005409903, "grad_norm": 0.29296875, "learning_rate": 1.1338490166454386e-07, "loss": 0.2315, "step": 22180 }, { "epoch": 9.238451935081148, "grad_norm": 0.310546875, "learning_rate": 1.109647897041985e-07, "loss": 0.2329, "step": 22200 }, { "epoch": 9.238451935081148, "eval_main_loss": 0.23832343518733978, "eval_main_runtime": 6.3644, "eval_main_samples_per_second": 29.854, "eval_main_steps_per_second": 3.771, "step": 22200 }, { "epoch": 9.238451935081148, "eval_anatomy_loss": 2.8315091133117676, "eval_anatomy_runtime": 0.2675, "eval_anatomy_samples_per_second": 7.477, "eval_anatomy_steps_per_second": 3.739, "step": 22200 }, { "epoch": 9.238451935081148, "eval_college_mathematics_loss": 2.051854133605957, "eval_college_mathematics_runtime": 0.2658, "eval_college_mathematics_samples_per_second": 7.525, "eval_college_mathematics_steps_per_second": 3.763, "step": 22200 }, { "epoch": 9.238451935081148, "eval_international_law_loss": 3.0672245025634766, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.501, "eval_international_law_steps_per_second": 3.751, "step": 22200 }, { "epoch": 9.246774864752393, "grad_norm": 0.283203125, "learning_rate": 1.085701985497875e-07, "loss": 0.2331, "step": 22220 }, { "epoch": 9.255097794423637, "grad_norm": 0.2431640625, "learning_rate": 1.062011537814675e-07, "loss": 0.2341, "step": 22240 }, { "epoch": 9.263420724094882, "grad_norm": 0.298828125, "learning_rate": 1.0385768070649783e-07, "loss": 0.2314, "step": 22260 }, { "epoch": 9.271743653766126, "grad_norm": 0.31640625, "learning_rate": 1.015398043589677e-07, "loss": 0.2301, "step": 22280 }, { "epoch": 9.28006658343737, "grad_norm": 0.2578125, "learning_rate": 9.924754949953069e-08, "loss": 0.2287, "step": 22300 }, { "epoch": 9.28006658343737, "eval_main_loss": 0.23838910460472107, "eval_main_runtime": 6.3361, "eval_main_samples_per_second": 29.987, "eval_main_steps_per_second": 3.788, "step": 22300 }, { "epoch": 9.28006658343737, "eval_anatomy_loss": 2.8351857662200928, "eval_anatomy_runtime": 0.2664, "eval_anatomy_samples_per_second": 7.508, "eval_anatomy_steps_per_second": 3.754, "step": 22300 }, { "epoch": 9.28006658343737, "eval_college_mathematics_loss": 2.0533783435821533, "eval_college_mathematics_runtime": 0.2663, "eval_college_mathematics_samples_per_second": 7.51, "eval_college_mathematics_steps_per_second": 3.755, "step": 22300 }, { "epoch": 9.28006658343737, "eval_international_law_loss": 3.0666663646698, "eval_international_law_runtime": 0.2658, "eval_international_law_samples_per_second": 7.525, "eval_international_law_steps_per_second": 3.762, "step": 22300 }, { "epoch": 9.288389513108614, "grad_norm": 0.306640625, "learning_rate": 9.698094061513868e-08, "loss": 0.2336, "step": 22320 }, { "epoch": 9.296712442779858, "grad_norm": 0.279296875, "learning_rate": 9.474000191878163e-08, "loss": 0.2304, "step": 22340 }, { "epoch": 9.305035372451103, "grad_norm": 0.296875, "learning_rate": 9.252475734922883e-08, "loss": 0.2302, "step": 22360 }, { "epoch": 9.313358302122348, "grad_norm": 0.31640625, "learning_rate": 9.033523057077193e-08, "loss": 0.2312, "step": 22380 }, { "epoch": 9.321681231793592, "grad_norm": 0.30078125, "learning_rate": 8.817144497297342e-08, "loss": 0.2355, "step": 22400 }, { "epoch": 9.321681231793592, "eval_main_loss": 0.23832879960536957, "eval_main_runtime": 6.3273, "eval_main_samples_per_second": 30.029, "eval_main_steps_per_second": 3.793, "step": 22400 }, { "epoch": 9.321681231793592, "eval_anatomy_loss": 2.8331458568573, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.497, "eval_anatomy_steps_per_second": 3.748, "step": 22400 }, { "epoch": 9.321681231793592, "eval_college_mathematics_loss": 2.0527069568634033, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 22400 }, { "epoch": 9.321681231793592, "eval_international_law_loss": 3.0645482540130615, "eval_international_law_runtime": 0.2664, "eval_international_law_samples_per_second": 7.508, "eval_international_law_steps_per_second": 3.754, "step": 22400 }, { "epoch": 9.330004161464835, "grad_norm": 0.287109375, "learning_rate": 8.603342367041578e-08, "loss": 0.2334, "step": 22420 }, { "epoch": 9.33832709113608, "grad_norm": 0.271484375, "learning_rate": 8.392118950245581e-08, "loss": 0.2309, "step": 22440 }, { "epoch": 9.346650020807324, "grad_norm": 0.26953125, "learning_rate": 8.183476503297982e-08, "loss": 0.2301, "step": 22460 }, { "epoch": 9.354972950478569, "grad_norm": 0.287109375, "learning_rate": 7.977417255016162e-08, "loss": 0.2335, "step": 22480 }, { "epoch": 9.363295880149813, "grad_norm": 0.28125, "learning_rate": 7.77394340662277e-08, "loss": 0.2291, "step": 22500 }, { "epoch": 9.363295880149813, "eval_main_loss": 0.23837444186210632, "eval_main_runtime": 6.3339, "eval_main_samples_per_second": 29.997, "eval_main_steps_per_second": 3.789, "step": 22500 }, { "epoch": 9.363295880149813, "eval_anatomy_loss": 2.834547758102417, "eval_anatomy_runtime": 0.2678, "eval_anatomy_samples_per_second": 7.467, "eval_anatomy_steps_per_second": 3.734, "step": 22500 }, { "epoch": 9.363295880149813, "eval_college_mathematics_loss": 2.0539867877960205, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.526, "eval_college_mathematics_steps_per_second": 3.763, "step": 22500 }, { "epoch": 9.363295880149813, "eval_international_law_loss": 3.0666635036468506, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.756, "step": 22500 }, { "epoch": 9.371618809821056, "grad_norm": 0.3125, "learning_rate": 7.573057131721684e-08, "loss": 0.23, "step": 22520 }, { "epoch": 9.3799417394923, "grad_norm": 0.30859375, "learning_rate": 7.374760576275397e-08, "loss": 0.2319, "step": 22540 }, { "epoch": 9.388264669163545, "grad_norm": 0.294921875, "learning_rate": 7.179055858581586e-08, "loss": 0.2296, "step": 22560 }, { "epoch": 9.39658759883479, "grad_norm": 0.28515625, "learning_rate": 6.985945069250766e-08, "loss": 0.2351, "step": 22580 }, { "epoch": 9.404910528506035, "grad_norm": 0.2734375, "learning_rate": 6.795430271183929e-08, "loss": 0.237, "step": 22600 }, { "epoch": 9.404910528506035, "eval_main_loss": 0.23828138411045074, "eval_main_runtime": 6.3303, "eval_main_samples_per_second": 30.014, "eval_main_steps_per_second": 3.791, "step": 22600 }, { "epoch": 9.404910528506035, "eval_anatomy_loss": 2.8350436687469482, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.516, "eval_anatomy_steps_per_second": 3.758, "step": 22600 }, { "epoch": 9.404910528506035, "eval_college_mathematics_loss": 2.051068067550659, "eval_college_mathematics_runtime": 0.2664, "eval_college_mathematics_samples_per_second": 7.506, "eval_college_mathematics_steps_per_second": 3.753, "step": 22600 }, { "epoch": 9.404910528506035, "eval_international_law_loss": 3.065190315246582, "eval_international_law_runtime": 0.2653, "eval_international_law_samples_per_second": 7.537, "eval_international_law_steps_per_second": 3.769, "step": 22600 }, { "epoch": 9.41323345817728, "grad_norm": 0.291015625, "learning_rate": 6.607513499550328e-08, "loss": 0.233, "step": 22620 }, { "epoch": 9.421556387848522, "grad_norm": 0.310546875, "learning_rate": 6.422196761766031e-08, "loss": 0.2309, "step": 22640 }, { "epoch": 9.429879317519767, "grad_norm": 0.271484375, "learning_rate": 6.239482037472156e-08, "loss": 0.2313, "step": 22660 }, { "epoch": 9.438202247191011, "grad_norm": 0.314453125, "learning_rate": 6.059371278513942e-08, "loss": 0.2308, "step": 22680 }, { "epoch": 9.446525176862256, "grad_norm": 0.29296875, "learning_rate": 5.881866408919912e-08, "loss": 0.2296, "step": 22700 }, { "epoch": 9.446525176862256, "eval_main_loss": 0.23832696676254272, "eval_main_runtime": 6.3308, "eval_main_samples_per_second": 30.012, "eval_main_steps_per_second": 3.791, "step": 22700 }, { "epoch": 9.446525176862256, "eval_anatomy_loss": 2.834747791290283, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.506, "eval_anatomy_steps_per_second": 3.753, "step": 22700 }, { "epoch": 9.446525176862256, "eval_college_mathematics_loss": 2.05255389213562, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.527, "eval_college_mathematics_steps_per_second": 3.763, "step": 22700 }, { "epoch": 9.446525176862256, "eval_international_law_loss": 3.064356803894043, "eval_international_law_runtime": 0.266, "eval_international_law_samples_per_second": 7.518, "eval_international_law_steps_per_second": 3.759, "step": 22700 }, { "epoch": 9.4548481065335, "grad_norm": 0.296875, "learning_rate": 5.7069693248811566e-08, "loss": 0.2266, "step": 22720 }, { "epoch": 9.463171036204745, "grad_norm": 0.283203125, "learning_rate": 5.5346818947311365e-08, "loss": 0.2316, "step": 22740 }, { "epoch": 9.471493965875988, "grad_norm": 0.265625, "learning_rate": 5.3650059589258616e-08, "loss": 0.2313, "step": 22760 }, { "epoch": 9.479816895547232, "grad_norm": 0.26171875, "learning_rate": 5.197943330024019e-08, "loss": 0.2367, "step": 22780 }, { "epoch": 9.488139825218477, "grad_norm": 0.32421875, "learning_rate": 5.0334957926677917e-08, "loss": 0.2302, "step": 22800 }, { "epoch": 9.488139825218477, "eval_main_loss": 0.2384629100561142, "eval_main_runtime": 6.3266, "eval_main_samples_per_second": 30.032, "eval_main_steps_per_second": 3.794, "step": 22800 }, { "epoch": 9.488139825218477, "eval_anatomy_loss": 2.8347084522247314, "eval_anatomy_runtime": 0.2653, "eval_anatomy_samples_per_second": 7.54, "eval_anatomy_steps_per_second": 3.77, "step": 22800 }, { "epoch": 9.488139825218477, "eval_college_mathematics_loss": 2.0520851612091064, "eval_college_mathematics_runtime": 0.267, "eval_college_mathematics_samples_per_second": 7.492, "eval_college_mathematics_steps_per_second": 3.746, "step": 22800 }, { "epoch": 9.488139825218477, "eval_international_law_loss": 3.0670056343078613, "eval_international_law_runtime": 0.2666, "eval_international_law_samples_per_second": 7.501, "eval_international_law_steps_per_second": 3.751, "step": 22800 }, { "epoch": 9.496462754889722, "grad_norm": 0.26171875, "learning_rate": 4.871665103563655e-08, "loss": 0.2362, "step": 22820 }, { "epoch": 9.504785684560966, "grad_norm": 0.298828125, "learning_rate": 4.7124529914637226e-08, "loss": 0.2338, "step": 22840 }, { "epoch": 9.513108614232209, "grad_norm": 0.2353515625, "learning_rate": 4.555861157147179e-08, "loss": 0.2339, "step": 22860 }, { "epoch": 9.521431543903454, "grad_norm": 0.259765625, "learning_rate": 4.401891273402209e-08, "loss": 0.2355, "step": 22880 }, { "epoch": 9.529754473574698, "grad_norm": 0.26171875, "learning_rate": 4.250544985008043e-08, "loss": 0.2331, "step": 22900 }, { "epoch": 9.529754473574698, "eval_main_loss": 0.2383948415517807, "eval_main_runtime": 6.3315, "eval_main_samples_per_second": 30.008, "eval_main_steps_per_second": 3.791, "step": 22900 }, { "epoch": 9.529754473574698, "eval_anatomy_loss": 2.832704782485962, "eval_anatomy_runtime": 0.2655, "eval_anatomy_samples_per_second": 7.532, "eval_anatomy_steps_per_second": 3.766, "step": 22900 }, { "epoch": 9.529754473574698, "eval_college_mathematics_loss": 2.0502614974975586, "eval_college_mathematics_runtime": 0.2673, "eval_college_mathematics_samples_per_second": 7.482, "eval_college_mathematics_steps_per_second": 3.741, "step": 22900 }, { "epoch": 9.529754473574698, "eval_international_law_loss": 3.0666775703430176, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.527, "eval_international_law_steps_per_second": 3.764, "step": 22900 }, { "epoch": 9.538077403245943, "grad_norm": 0.267578125, "learning_rate": 4.1018239087174114e-08, "loss": 0.2385, "step": 22920 }, { "epoch": 9.546400332917187, "grad_norm": 0.2734375, "learning_rate": 3.9557296332393413e-08, "loss": 0.2306, "step": 22940 }, { "epoch": 9.554723262588432, "grad_norm": 0.2353515625, "learning_rate": 3.812263719222081e-08, "loss": 0.2322, "step": 22960 }, { "epoch": 9.563046192259675, "grad_norm": 0.2734375, "learning_rate": 3.671427699236479e-08, "loss": 0.2305, "step": 22980 }, { "epoch": 9.57136912193092, "grad_norm": 0.30078125, "learning_rate": 3.53322307775969e-08, "loss": 0.2349, "step": 23000 }, { "epoch": 9.57136912193092, "eval_main_loss": 0.23835638165473938, "eval_main_runtime": 6.3283, "eval_main_samples_per_second": 30.024, "eval_main_steps_per_second": 3.792, "step": 23000 }, { "epoch": 9.57136912193092, "eval_anatomy_loss": 2.8344595432281494, "eval_anatomy_runtime": 0.2648, "eval_anatomy_samples_per_second": 7.554, "eval_anatomy_steps_per_second": 3.777, "step": 23000 }, { "epoch": 9.57136912193092, "eval_college_mathematics_loss": 2.0526890754699707, "eval_college_mathematics_runtime": 0.2671, "eval_college_mathematics_samples_per_second": 7.488, "eval_college_mathematics_steps_per_second": 3.744, "step": 23000 }, { "epoch": 9.57136912193092, "eval_international_law_loss": 3.0659825801849365, "eval_international_law_runtime": 0.2668, "eval_international_law_samples_per_second": 7.496, "eval_international_law_steps_per_second": 3.748, "step": 23000 }, { "epoch": 9.579692051602164, "grad_norm": 0.24609375, "learning_rate": 3.39765133115888e-08, "loss": 0.2312, "step": 23020 }, { "epoch": 9.588014981273409, "grad_norm": 0.28125, "learning_rate": 3.264713907675687e-08, "loss": 0.2308, "step": 23040 }, { "epoch": 9.596337910944653, "grad_norm": 0.28515625, "learning_rate": 3.134412227410677e-08, "loss": 0.232, "step": 23060 }, { "epoch": 9.604660840615896, "grad_norm": 0.3046875, "learning_rate": 3.006747682308103e-08, "loss": 0.2307, "step": 23080 }, { "epoch": 9.61298377028714, "grad_norm": 0.291015625, "learning_rate": 2.8817216361411438e-08, "loss": 0.234, "step": 23100 }, { "epoch": 9.61298377028714, "eval_main_loss": 0.23833413422107697, "eval_main_runtime": 6.32, "eval_main_samples_per_second": 30.063, "eval_main_steps_per_second": 3.797, "step": 23100 }, { "epoch": 9.61298377028714, "eval_anatomy_loss": 2.832688331604004, "eval_anatomy_runtime": 0.2665, "eval_anatomy_samples_per_second": 7.504, "eval_anatomy_steps_per_second": 3.752, "step": 23100 }, { "epoch": 9.61298377028714, "eval_college_mathematics_loss": 2.050745725631714, "eval_college_mathematics_runtime": 0.2667, "eval_college_mathematics_samples_per_second": 7.498, "eval_college_mathematics_steps_per_second": 3.749, "step": 23100 }, { "epoch": 9.61298377028714, "eval_international_law_loss": 3.0674684047698975, "eval_international_law_runtime": 0.2656, "eval_international_law_samples_per_second": 7.53, "eval_international_law_steps_per_second": 3.765, "step": 23100 }, { "epoch": 9.621306699958385, "grad_norm": 0.267578125, "learning_rate": 2.7593354244972448e-08, "loss": 0.2298, "step": 23120 }, { "epoch": 9.62962962962963, "grad_norm": 0.298828125, "learning_rate": 2.6395903547638825e-08, "loss": 0.2305, "step": 23140 }, { "epoch": 9.637952559300874, "grad_norm": 0.2578125, "learning_rate": 2.5224877061146292e-08, "loss": 0.2332, "step": 23160 }, { "epoch": 9.646275488972119, "grad_norm": 0.287109375, "learning_rate": 2.4080287294954706e-08, "loss": 0.2306, "step": 23180 }, { "epoch": 9.654598418643362, "grad_norm": 0.23828125, "learning_rate": 2.2962146476114e-08, "loss": 0.2315, "step": 23200 }, { "epoch": 9.654598418643362, "eval_main_loss": 0.23836931586265564, "eval_main_runtime": 6.3322, "eval_main_samples_per_second": 30.005, "eval_main_steps_per_second": 3.79, "step": 23200 }, { "epoch": 9.654598418643362, "eval_anatomy_loss": 2.834285259246826, "eval_anatomy_runtime": 0.2661, "eval_anatomy_samples_per_second": 7.517, "eval_anatomy_steps_per_second": 3.758, "step": 23200 }, { "epoch": 9.654598418643362, "eval_college_mathematics_loss": 2.05277943611145, "eval_college_mathematics_runtime": 0.2661, "eval_college_mathematics_samples_per_second": 7.517, "eval_college_mathematics_steps_per_second": 3.759, "step": 23200 }, { "epoch": 9.654598418643362, "eval_international_law_loss": 3.065124273300171, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.759, "step": 23200 }, { "epoch": 9.662921348314606, "grad_norm": 0.26953125, "learning_rate": 2.187046654913455e-08, "loss": 0.2309, "step": 23220 }, { "epoch": 9.671244277985851, "grad_norm": 0.26171875, "learning_rate": 2.080525917585785e-08, "loss": 0.2303, "step": 23240 }, { "epoch": 9.679567207657096, "grad_norm": 0.2734375, "learning_rate": 1.9766535735334102e-08, "loss": 0.2316, "step": 23260 }, { "epoch": 9.68789013732834, "grad_norm": 0.2734375, "learning_rate": 1.875430732369954e-08, "loss": 0.232, "step": 23280 }, { "epoch": 9.696213066999583, "grad_norm": 0.294921875, "learning_rate": 1.7768584754056796e-08, "loss": 0.2298, "step": 23300 }, { "epoch": 9.696213066999583, "eval_main_loss": 0.2383754402399063, "eval_main_runtime": 6.3306, "eval_main_samples_per_second": 30.013, "eval_main_steps_per_second": 3.791, "step": 23300 }, { "epoch": 9.696213066999583, "eval_anatomy_loss": 2.833543539047241, "eval_anatomy_runtime": 0.2663, "eval_anatomy_samples_per_second": 7.509, "eval_anatomy_steps_per_second": 3.755, "step": 23300 }, { "epoch": 9.696213066999583, "eval_college_mathematics_loss": 2.0523107051849365, "eval_college_mathematics_runtime": 0.2657, "eval_college_mathematics_samples_per_second": 7.527, "eval_college_mathematics_steps_per_second": 3.763, "step": 23300 }, { "epoch": 9.696213066999583, "eval_international_law_loss": 3.065718650817871, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.756, "step": 23300 }, { "epoch": 9.704535996670828, "grad_norm": 0.30859375, "learning_rate": 1.6809378556361945e-08, "loss": 0.2306, "step": 23320 }, { "epoch": 9.712858926342072, "grad_norm": 0.30859375, "learning_rate": 1.5876698977310145e-08, "loss": 0.2311, "step": 23340 }, { "epoch": 9.721181856013317, "grad_norm": 0.263671875, "learning_rate": 1.497055598022601e-08, "loss": 0.2291, "step": 23360 }, { "epoch": 9.729504785684561, "grad_norm": 0.21875, "learning_rate": 1.4090959244958402e-08, "loss": 0.2285, "step": 23380 }, { "epoch": 9.737827715355806, "grad_norm": 0.2890625, "learning_rate": 1.3237918167776919e-08, "loss": 0.2278, "step": 23400 }, { "epoch": 9.737827715355806, "eval_main_loss": 0.23843945562839508, "eval_main_runtime": 6.3292, "eval_main_samples_per_second": 30.02, "eval_main_steps_per_second": 3.792, "step": 23400 }, { "epoch": 9.737827715355806, "eval_anatomy_loss": 2.8331258296966553, "eval_anatomy_runtime": 0.2669, "eval_anatomy_samples_per_second": 7.494, "eval_anatomy_steps_per_second": 3.747, "step": 23400 }, { "epoch": 9.737827715355806, "eval_college_mathematics_loss": 2.0540261268615723, "eval_college_mathematics_runtime": 0.2653, "eval_college_mathematics_samples_per_second": 7.539, "eval_college_mathematics_steps_per_second": 3.769, "step": 23400 }, { "epoch": 9.737827715355806, "eval_international_law_loss": 3.066096067428589, "eval_international_law_runtime": 0.2663, "eval_international_law_samples_per_second": 7.511, "eval_international_law_steps_per_second": 3.755, "step": 23400 }, { "epoch": 9.746150645027049, "grad_norm": 0.306640625, "learning_rate": 1.2411441861269746e-08, "loss": 0.2354, "step": 23420 }, { "epoch": 9.754473574698293, "grad_norm": 0.251953125, "learning_rate": 1.161153915424873e-08, "loss": 0.2322, "step": 23440 }, { "epoch": 9.762796504369538, "grad_norm": 0.224609375, "learning_rate": 1.0838218591653348e-08, "loss": 0.237, "step": 23460 }, { "epoch": 9.771119434040783, "grad_norm": 0.3046875, "learning_rate": 1.0091488434460506e-08, "loss": 0.2304, "step": 23480 }, { "epoch": 9.779442363712027, "grad_norm": 0.275390625, "learning_rate": 9.371356659595431e-09, "loss": 0.2284, "step": 23500 }, { "epoch": 9.779442363712027, "eval_main_loss": 0.23842047154903412, "eval_main_runtime": 6.3254, "eval_main_samples_per_second": 30.038, "eval_main_steps_per_second": 3.794, "step": 23500 }, { "epoch": 9.779442363712027, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.266, "eval_anatomy_samples_per_second": 7.517, "eval_anatomy_steps_per_second": 3.759, "step": 23500 }, { "epoch": 9.779442363712027, "eval_college_mathematics_loss": 2.0542471408843994, "eval_college_mathematics_runtime": 0.266, "eval_college_mathematics_samples_per_second": 7.517, "eval_college_mathematics_steps_per_second": 3.759, "step": 23500 }, { "epoch": 9.779442363712027, "eval_international_law_loss": 3.0644288063049316, "eval_international_law_runtime": 0.2657, "eval_international_law_samples_per_second": 7.529, "eval_international_law_steps_per_second": 3.764, "step": 23500 }, { "epoch": 9.787765293383272, "grad_norm": 0.294921875, "learning_rate": 8.677830959846756e-09, "loss": 0.2291, "step": 23520 }, { "epoch": 9.796088223054515, "grad_norm": 0.298828125, "learning_rate": 8.010918743784624e-09, "loss": 0.2362, "step": 23540 }, { "epoch": 9.80441115272576, "grad_norm": 0.283203125, "learning_rate": 7.370627135681319e-09, "loss": 0.2299, "step": 23560 }, { "epoch": 9.812734082397004, "grad_norm": 0.3046875, "learning_rate": 6.75696297543521e-09, "loss": 0.2291, "step": 23580 }, { "epoch": 9.821057012068248, "grad_norm": 0.2890625, "learning_rate": 6.169932818497482e-09, "loss": 0.2299, "step": 23600 }, { "epoch": 9.821057012068248, "eval_main_loss": 0.2384078949689865, "eval_main_runtime": 6.3301, "eval_main_samples_per_second": 30.015, "eval_main_steps_per_second": 3.791, "step": 23600 }, { "epoch": 9.821057012068248, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.2668, "eval_anatomy_samples_per_second": 7.495, "eval_anatomy_steps_per_second": 3.748, "step": 23600 }, { "epoch": 9.821057012068248, "eval_college_mathematics_loss": 2.0545294284820557, "eval_college_mathematics_runtime": 0.2655, "eval_college_mathematics_samples_per_second": 7.533, "eval_college_mathematics_steps_per_second": 3.766, "step": 23600 }, { "epoch": 9.821057012068248, "eval_international_law_loss": 3.0657763481140137, "eval_international_law_runtime": 0.267, "eval_international_law_samples_per_second": 7.492, "eval_international_law_steps_per_second": 3.746, "step": 23600 }, { "epoch": 9.829379941739493, "grad_norm": 0.314453125, "learning_rate": 5.609542935802181e-09, "loss": 0.2375, "step": 23620 }, { "epoch": 9.837702871410738, "grad_norm": 0.236328125, "learning_rate": 5.075799313699892e-09, "loss": 0.2332, "step": 23640 }, { "epoch": 9.84602580108198, "grad_norm": 0.3359375, "learning_rate": 4.568707653892779e-09, "loss": 0.2306, "step": 23660 }, { "epoch": 9.854348730753225, "grad_norm": 0.29296875, "learning_rate": 4.088273373373807e-09, "loss": 0.2353, "step": 23680 }, { "epoch": 9.86267166042447, "grad_norm": 0.32421875, "learning_rate": 3.634501604370122e-09, "loss": 0.2306, "step": 23700 }, { "epoch": 9.86267166042447, "eval_main_loss": 0.23842915892601013, "eval_main_runtime": 6.3292, "eval_main_samples_per_second": 30.019, "eval_main_steps_per_second": 3.792, "step": 23700 }, { "epoch": 9.86267166042447, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.2678, "eval_anatomy_samples_per_second": 7.467, "eval_anatomy_steps_per_second": 3.734, "step": 23700 }, { "epoch": 9.86267166042447, "eval_college_mathematics_loss": 2.0545294284820557, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.495, "eval_college_mathematics_steps_per_second": 3.747, "step": 23700 }, { "epoch": 9.86267166042447, "eval_international_law_loss": 3.066246271133423, "eval_international_law_runtime": 0.2662, "eval_international_law_samples_per_second": 7.514, "eval_international_law_steps_per_second": 3.757, "step": 23700 }, { "epoch": 9.870994590095714, "grad_norm": 0.314453125, "learning_rate": 3.2073971942864214e-09, "loss": 0.2292, "step": 23720 }, { "epoch": 9.879317519766959, "grad_norm": 0.287109375, "learning_rate": 2.806964705654447e-09, "loss": 0.2297, "step": 23740 }, { "epoch": 9.887640449438202, "grad_norm": 0.26953125, "learning_rate": 2.4332084160835766e-09, "loss": 0.2313, "step": 23760 }, { "epoch": 9.895963379109446, "grad_norm": 0.279296875, "learning_rate": 2.086132318215861e-09, "loss": 0.2343, "step": 23780 }, { "epoch": 9.90428630878069, "grad_norm": 0.279296875, "learning_rate": 1.765740119682169e-09, "loss": 0.2333, "step": 23800 }, { "epoch": 9.90428630878069, "eval_main_loss": 0.23843587934970856, "eval_main_runtime": 6.3325, "eval_main_samples_per_second": 30.004, "eval_main_steps_per_second": 3.79, "step": 23800 }, { "epoch": 9.90428630878069, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.2662, "eval_anatomy_samples_per_second": 7.514, "eval_anatomy_steps_per_second": 3.757, "step": 23800 }, { "epoch": 9.90428630878069, "eval_college_mathematics_loss": 2.0545294284820557, "eval_college_mathematics_runtime": 0.2677, "eval_college_mathematics_samples_per_second": 7.471, "eval_college_mathematics_steps_per_second": 3.736, "step": 23800 }, { "epoch": 9.90428630878069, "eval_international_law_loss": 3.066246271133423, "eval_international_law_runtime": 0.2655, "eval_international_law_samples_per_second": 7.534, "eval_international_law_steps_per_second": 3.767, "step": 23800 }, { "epoch": 9.912609238451935, "grad_norm": 0.28515625, "learning_rate": 1.4720352430644402e-09, "loss": 0.2361, "step": 23820 }, { "epoch": 9.92093216812318, "grad_norm": 0.302734375, "learning_rate": 1.205020825856551e-09, "loss": 0.2316, "step": 23840 }, { "epoch": 9.929255097794425, "grad_norm": 0.279296875, "learning_rate": 9.64699720433504e-10, "loss": 0.2324, "step": 23860 }, { "epoch": 9.937578027465667, "grad_norm": 0.26953125, "learning_rate": 7.510744940192327e-10, "loss": 0.2323, "step": 23880 }, { "epoch": 9.945900957136912, "grad_norm": 0.32421875, "learning_rate": 5.64147428659123e-10, "loss": 0.2323, "step": 23900 }, { "epoch": 9.945900957136912, "eval_main_loss": 0.23843084275722504, "eval_main_runtime": 6.3373, "eval_main_samples_per_second": 29.981, "eval_main_steps_per_second": 3.787, "step": 23900 }, { "epoch": 9.945900957136912, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.2674, "eval_anatomy_samples_per_second": 7.479, "eval_anatomy_steps_per_second": 3.74, "step": 23900 }, { "epoch": 9.945900957136912, "eval_college_mathematics_loss": 2.0545294284820557, "eval_college_mathematics_runtime": 0.2668, "eval_college_mathematics_samples_per_second": 7.496, "eval_college_mathematics_steps_per_second": 3.748, "step": 23900 }, { "epoch": 9.945900957136912, "eval_international_law_loss": 3.066246271133423, "eval_international_law_runtime": 0.2661, "eval_international_law_samples_per_second": 7.517, "eval_international_law_steps_per_second": 3.759, "step": 23900 }, { "epoch": 9.954223886808157, "grad_norm": 0.310546875, "learning_rate": 4.0392052119697654e-10, "loss": 0.2338, "step": 23920 }, { "epoch": 9.962546816479401, "grad_norm": 0.3125, "learning_rate": 2.703954832528055e-10, "loss": 0.2311, "step": 23940 }, { "epoch": 9.970869746150646, "grad_norm": 0.2421875, "learning_rate": 1.6357374120368197e-10, "loss": 0.2312, "step": 23960 }, { "epoch": 9.979192675821889, "grad_norm": 0.310546875, "learning_rate": 8.345643617069243e-11, "loss": 0.232, "step": 23980 }, { "epoch": 9.987515605493133, "grad_norm": 0.240234375, "learning_rate": 3.004442400422747e-11, "loss": 0.2373, "step": 24000 }, { "epoch": 9.987515605493133, "eval_main_loss": 0.23843084275722504, "eval_main_runtime": 6.3378, "eval_main_samples_per_second": 29.979, "eval_main_steps_per_second": 3.787, "step": 24000 }, { "epoch": 9.987515605493133, "eval_anatomy_loss": 2.8333277702331543, "eval_anatomy_runtime": 0.2659, "eval_anatomy_samples_per_second": 7.522, "eval_anatomy_steps_per_second": 3.761, "step": 24000 }, { "epoch": 9.987515605493133, "eval_college_mathematics_loss": 2.0545294284820557, "eval_college_mathematics_runtime": 0.2659, "eval_college_mathematics_samples_per_second": 7.521, "eval_college_mathematics_steps_per_second": 3.761, "step": 24000 }, { "epoch": 9.987515605493133, "eval_international_law_loss": 3.066246271133423, "eval_international_law_runtime": 0.2672, "eval_international_law_samples_per_second": 7.486, "eval_international_law_steps_per_second": 3.743, "step": 24000 }, { "epoch": 9.995838535164378, "grad_norm": 0.259765625, "learning_rate": 3.3382752773203352e-12, "loss": 0.2333, "step": 24020 } ], "logging_steps": 20, "max_steps": 24030, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7646735410967085e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }