{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.99992916341999,
  "eval_steps": 500,
  "global_step": 3529,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 5.6118285147712355,
      "learning_rate": 1.8867924528301887e-08,
      "loss": 0.846,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 5.713571723217765,
      "learning_rate": 3.7735849056603774e-08,
      "loss": 0.813,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.189090904191137,
      "learning_rate": 5.660377358490566e-08,
      "loss": 0.8901,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 7.135987059921496,
      "learning_rate": 7.547169811320755e-08,
      "loss": 0.9084,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.9487575503764125,
      "learning_rate": 9.433962264150943e-08,
      "loss": 0.868,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.531405547529674,
      "learning_rate": 1.1320754716981131e-07,
      "loss": 0.8339,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 5.794428405177802,
      "learning_rate": 1.320754716981132e-07,
      "loss": 0.8867,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.383676757195207,
      "learning_rate": 1.509433962264151e-07,
      "loss": 0.8464,
      "step": 8
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.3067158349421915,
      "learning_rate": 1.6981132075471695e-07,
      "loss": 0.876,
      "step": 9
    },
    {
      "epoch": 0.0,
      "grad_norm": 5.879065954925436,
      "learning_rate": 1.8867924528301886e-07,
      "loss": 0.8613,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 7.289965054557051,
      "learning_rate": 2.0754716981132074e-07,
      "loss": 0.8581,
      "step": 11
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.250628393462324,
      "learning_rate": 2.2641509433962263e-07,
      "loss": 0.8659,
      "step": 12
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.897669106714332,
      "learning_rate": 2.452830188679245e-07,
      "loss": 0.8234,
      "step": 13
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.899340907296569,
      "learning_rate": 2.641509433962264e-07,
      "loss": 0.9095,
      "step": 14
    },
    {
      "epoch": 0.0,
      "grad_norm": 6.991040839353428,
      "learning_rate": 2.830188679245283e-07,
      "loss": 0.8476,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 5.18007982067284,
      "learning_rate": 3.018867924528302e-07,
      "loss": 0.7656,
      "step": 16
    },
    {
      "epoch": 0.0,
      "grad_norm": 4.900634108194426,
      "learning_rate": 3.2075471698113204e-07,
      "loss": 0.797,
      "step": 17
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.70859024316582,
      "learning_rate": 3.396226415094339e-07,
      "loss": 0.8428,
      "step": 18
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.980094728579612,
      "learning_rate": 3.584905660377358e-07,
      "loss": 0.8573,
      "step": 19
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.852389477273713,
      "learning_rate": 3.773584905660377e-07,
      "loss": 0.7921,
      "step": 20
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.661373849976143,
      "learning_rate": 3.9622641509433963e-07,
      "loss": 0.718,
      "step": 21
    },
    {
      "epoch": 0.01,
      "grad_norm": 6.23274723855891,
      "learning_rate": 4.150943396226415e-07,
      "loss": 0.8623,
      "step": 22
    },
    {
      "epoch": 0.01,
      "grad_norm": 6.0704700228127635,
      "learning_rate": 4.339622641509434e-07,
      "loss": 0.8782,
      "step": 23
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.700893588615956,
      "learning_rate": 4.5283018867924526e-07,
      "loss": 0.8101,
      "step": 24
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.998135432510591,
      "learning_rate": 4.7169811320754717e-07,
      "loss": 0.7511,
      "step": 25
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.217486426924381,
      "learning_rate": 4.90566037735849e-07,
      "loss": 0.8268,
      "step": 26
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.0541443638950305,
      "learning_rate": 5.094339622641509e-07,
      "loss": 0.7494,
      "step": 27
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.205266954508169,
      "learning_rate": 5.283018867924528e-07,
      "loss": 0.8054,
      "step": 28
    },
    {
      "epoch": 0.01,
      "grad_norm": 11.080559809465251,
      "learning_rate": 5.471698113207546e-07,
      "loss": 0.7088,
      "step": 29
    },
    {
      "epoch": 0.01,
      "grad_norm": 5.230197181619572,
      "learning_rate": 5.660377358490566e-07,
      "loss": 0.7552,
      "step": 30
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.988538877448871,
      "learning_rate": 5.849056603773585e-07,
      "loss": 0.7455,
      "step": 31
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.321562944292788,
      "learning_rate": 6.037735849056604e-07,
      "loss": 0.7366,
      "step": 32
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.520956670013252,
      "learning_rate": 6.226415094339622e-07,
      "loss": 0.7305,
      "step": 33
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.677218814857416,
      "learning_rate": 6.415094339622641e-07,
      "loss": 0.6962,
      "step": 34
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.393544219908874,
      "learning_rate": 6.60377358490566e-07,
      "loss": 0.7401,
      "step": 35
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.64954547771705,
      "learning_rate": 6.792452830188678e-07,
      "loss": 0.706,
      "step": 36
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.274230930931085,
      "learning_rate": 6.981132075471697e-07,
      "loss": 0.7262,
      "step": 37
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.907454657653952,
      "learning_rate": 7.169811320754716e-07,
      "loss": 0.6775,
      "step": 38
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.169754686936093,
      "learning_rate": 7.358490566037735e-07,
      "loss": 0.6674,
      "step": 39
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.464499900362431,
      "learning_rate": 7.547169811320754e-07,
      "loss": 0.6806,
      "step": 40
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.097582890478711,
      "learning_rate": 7.735849056603774e-07,
      "loss": 0.7274,
      "step": 41
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.697964008533405,
      "learning_rate": 7.924528301886793e-07,
      "loss": 0.6486,
      "step": 42
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.7930403767612777,
      "learning_rate": 8.113207547169812e-07,
      "loss": 0.6157,
      "step": 43
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.6371272191211172,
      "learning_rate": 8.30188679245283e-07,
      "loss": 0.5929,
      "step": 44
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.801258780572625,
      "learning_rate": 8.490566037735849e-07,
      "loss": 0.5322,
      "step": 45
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.301139979984435,
      "learning_rate": 8.679245283018868e-07,
      "loss": 0.5699,
      "step": 46
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.498909620163726,
      "learning_rate": 8.867924528301887e-07,
      "loss": 0.5538,
      "step": 47
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.994048255712527,
      "learning_rate": 9.056603773584905e-07,
      "loss": 0.5087,
      "step": 48
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.5927147782012816,
      "learning_rate": 9.245283018867924e-07,
      "loss": 0.6009,
      "step": 49
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.500724906785562,
      "learning_rate": 9.433962264150943e-07,
      "loss": 0.5618,
      "step": 50
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.356984138931124,
      "learning_rate": 9.622641509433961e-07,
      "loss": 0.5287,
      "step": 51
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.3735668806204133,
      "learning_rate": 9.81132075471698e-07,
      "loss": 0.5447,
      "step": 52
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.3326495713070416,
      "learning_rate": 1e-06,
      "loss": 0.5317,
      "step": 53
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.489263896611546,
      "learning_rate": 1.0188679245283019e-06,
      "loss": 0.5549,
      "step": 54
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.1415448527151857,
      "learning_rate": 1.0377358490566038e-06,
      "loss": 0.5329,
      "step": 55
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.1401388087716526,
      "learning_rate": 1.0566037735849057e-06,
      "loss": 0.5074,
      "step": 56
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.2267416931558985,
      "learning_rate": 1.0754716981132074e-06,
      "loss": 0.5431,
      "step": 57
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.978898769392214,
      "learning_rate": 1.0943396226415093e-06,
      "loss": 0.5433,
      "step": 58
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.0011201623997574,
      "learning_rate": 1.1132075471698112e-06,
      "loss": 0.5155,
      "step": 59
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.0273145674095665,
      "learning_rate": 1.1320754716981131e-06,
      "loss": 0.4837,
      "step": 60
    },
    {
      "epoch": 0.02,
      "grad_norm": 4.213734749879202,
      "learning_rate": 1.150943396226415e-06,
      "loss": 0.5608,
      "step": 61
    },
    {
      "epoch": 0.02,
      "grad_norm": 5.958909244082287,
      "learning_rate": 1.169811320754717e-06,
      "loss": 0.4858,
      "step": 62
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8152544386137444,
      "learning_rate": 1.1886792452830188e-06,
      "loss": 0.5165,
      "step": 63
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.6384403894331143,
      "learning_rate": 1.2075471698113208e-06,
      "loss": 0.5016,
      "step": 64
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.87730885494883,
      "learning_rate": 1.2264150943396225e-06,
      "loss": 0.4738,
      "step": 65
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.113820837217724,
      "learning_rate": 1.2452830188679244e-06,
      "loss": 0.4554,
      "step": 66
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.7669377573492886,
      "learning_rate": 1.2641509433962263e-06,
      "loss": 0.466,
      "step": 67
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.2823683218193014,
      "learning_rate": 1.2830188679245282e-06,
      "loss": 0.4613,
      "step": 68
    },
    {
      "epoch": 0.02,
      "grad_norm": 4.673456593571675,
      "learning_rate": 1.30188679245283e-06,
      "loss": 0.4792,
      "step": 69
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.792640744843451,
      "learning_rate": 1.320754716981132e-06,
      "loss": 0.4595,
      "step": 70
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.8347185566760507,
      "learning_rate": 1.339622641509434e-06,
      "loss": 0.4462,
      "step": 71
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.089668904583872,
      "learning_rate": 1.3584905660377356e-06,
      "loss": 0.5026,
      "step": 72
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.2532898025993413,
      "learning_rate": 1.3773584905660375e-06,
      "loss": 0.4678,
      "step": 73
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.9484661367267835,
      "learning_rate": 1.3962264150943394e-06,
      "loss": 0.488,
      "step": 74
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.1818768846061185,
      "learning_rate": 1.4150943396226413e-06,
      "loss": 0.5166,
      "step": 75
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.1426225216317043,
      "learning_rate": 1.4339622641509432e-06,
      "loss": 0.4996,
      "step": 76
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.9238328814899393,
      "learning_rate": 1.4528301886792452e-06,
      "loss": 0.4289,
      "step": 77
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8360366336829568,
      "learning_rate": 1.471698113207547e-06,
      "loss": 0.4543,
      "step": 78
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.6249196520915064,
      "learning_rate": 1.490566037735849e-06,
      "loss": 0.5045,
      "step": 79
    },
    {
      "epoch": 0.02,
      "grad_norm": 5.299329223688592,
      "learning_rate": 1.5094339622641509e-06,
      "loss": 0.4843,
      "step": 80
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.787829361897964,
      "learning_rate": 1.5283018867924528e-06,
      "loss": 0.4455,
      "step": 81
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.7277354760339874,
      "learning_rate": 1.5471698113207547e-06,
      "loss": 0.4194,
      "step": 82
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.6600416644163922,
      "learning_rate": 1.5660377358490566e-06,
      "loss": 0.4048,
      "step": 83
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.1774992527477286,
      "learning_rate": 1.5849056603773585e-06,
      "loss": 0.4497,
      "step": 84
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.9439067645554107,
      "learning_rate": 1.6037735849056604e-06,
      "loss": 0.474,
      "step": 85
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8642919262683084,
      "learning_rate": 1.6226415094339623e-06,
      "loss": 0.4249,
      "step": 86
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.774887911245674,
      "learning_rate": 1.6415094339622643e-06,
      "loss": 0.4294,
      "step": 87
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.9695757117046395,
      "learning_rate": 1.660377358490566e-06,
      "loss": 0.4764,
      "step": 88
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.945368803651741,
      "learning_rate": 1.6792452830188679e-06,
      "loss": 0.4517,
      "step": 89
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.857389843927476,
      "learning_rate": 1.6981132075471698e-06,
      "loss": 0.4651,
      "step": 90
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.0764711362725805,
      "learning_rate": 1.7169811320754717e-06,
      "loss": 0.4735,
      "step": 91
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.887559820554778,
      "learning_rate": 1.7358490566037736e-06,
      "loss": 0.408,
      "step": 92
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.7339438704382357,
      "learning_rate": 1.7547169811320755e-06,
      "loss": 0.4428,
      "step": 93
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.725029573154123,
      "learning_rate": 1.7735849056603774e-06,
      "loss": 0.4066,
      "step": 94
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.785369107959244,
      "learning_rate": 1.7924528301886791e-06,
      "loss": 0.4325,
      "step": 95
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.693163705078504,
      "learning_rate": 1.811320754716981e-06,
      "loss": 0.4303,
      "step": 96
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.815163383579835,
      "learning_rate": 1.830188679245283e-06,
      "loss": 0.409,
      "step": 97
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.1482120663466073,
      "learning_rate": 1.8490566037735848e-06,
      "loss": 0.4413,
      "step": 98
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7451340037459535,
      "learning_rate": 1.8679245283018868e-06,
      "loss": 0.3909,
      "step": 99
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.668547433739357,
      "learning_rate": 1.8867924528301887e-06,
      "loss": 0.4245,
      "step": 100
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.4097066689240445,
      "learning_rate": 1.9056603773584906e-06,
      "loss": 0.4171,
      "step": 101
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7500799535422535,
      "learning_rate": 1.9245283018867923e-06,
      "loss": 0.4316,
      "step": 102
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.772565736820654,
      "learning_rate": 1.9433962264150944e-06,
      "loss": 0.4308,
      "step": 103
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.9388045054687497,
      "learning_rate": 1.962264150943396e-06,
      "loss": 0.3946,
      "step": 104
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6971415729142567,
      "learning_rate": 1.981132075471698e-06,
      "loss": 0.3971,
      "step": 105
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.565759147084175,
      "learning_rate": 2e-06,
      "loss": 0.4335,
      "step": 106
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6533331797170074,
      "learning_rate": 1.9999995788314622e-06,
      "loss": 0.4027,
      "step": 107
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6400141953211516,
      "learning_rate": 1.9999983153262037e-06,
      "loss": 0.3566,
      "step": 108
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.869629191400056,
      "learning_rate": 1.9999962094852885e-06,
      "loss": 0.4321,
      "step": 109
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.1890954493242303,
      "learning_rate": 1.999993261310491e-06,
      "loss": 0.4243,
      "step": 110
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.771919519749318,
      "learning_rate": 1.999989470804294e-06,
      "loss": 0.4053,
      "step": 111
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.3182106784918077,
      "learning_rate": 1.9999848379698903e-06,
      "loss": 0.4299,
      "step": 112
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.809056771173883,
      "learning_rate": 1.999979362811183e-06,
      "loss": 0.4323,
      "step": 113
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.8114945027835807,
      "learning_rate": 1.9999730453327834e-06,
      "loss": 0.4382,
      "step": 114
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7106485289513396,
      "learning_rate": 1.9999658855400133e-06,
      "loss": 0.4195,
      "step": 115
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.0700900731543523,
      "learning_rate": 1.9999578834389034e-06,
      "loss": 0.4007,
      "step": 116
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.581255352359972,
      "learning_rate": 1.9999490390361944e-06,
      "loss": 0.4003,
      "step": 117
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.730583061252001,
      "learning_rate": 1.9999393523393364e-06,
      "loss": 0.3988,
      "step": 118
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.953836584220443,
      "learning_rate": 1.999928823356488e-06,
      "loss": 0.4003,
      "step": 119
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.540162372030467,
      "learning_rate": 1.9999174520965193e-06,
      "loss": 0.4193,
      "step": 120
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.8247216227110754,
      "learning_rate": 1.9999052385690078e-06,
      "loss": 0.4106,
      "step": 121
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.9800895294923313,
      "learning_rate": 1.999892182784242e-06,
      "loss": 0.425,
      "step": 122
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.9580274438434895,
      "learning_rate": 1.9998782847532195e-06,
      "loss": 0.4222,
      "step": 123
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.978550415936864,
      "learning_rate": 1.9998635444876458e-06,
      "loss": 0.4031,
      "step": 124
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8082748015723213,
      "learning_rate": 1.999847961999939e-06,
      "loss": 0.4056,
      "step": 125
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8595783375198818,
      "learning_rate": 1.9998315373032237e-06,
      "loss": 0.4224,
      "step": 126
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6890260114894686,
      "learning_rate": 1.9998142704113346e-06,
      "loss": 0.3496,
      "step": 127
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7450486041972693,
      "learning_rate": 1.9997961613388173e-06,
      "loss": 0.3901,
      "step": 128
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.109712683225939,
      "learning_rate": 1.9997772101009253e-06,
      "loss": 0.4559,
      "step": 129
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8203576670652284,
      "learning_rate": 1.9997574167136223e-06,
      "loss": 0.3763,
      "step": 130
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5062427163533143,
      "learning_rate": 1.9997367811935805e-06,
      "loss": 0.3815,
      "step": 131
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7117271095711204,
      "learning_rate": 1.999715303558182e-06,
      "loss": 0.3941,
      "step": 132
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.55987610733269,
      "learning_rate": 1.999692983825518e-06,
      "loss": 0.3927,
      "step": 133
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.3555921974167564,
      "learning_rate": 1.99966982201439e-06,
      "loss": 0.3758,
      "step": 134
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.808757885402719,
      "learning_rate": 1.999645818144307e-06,
      "loss": 0.4095,
      "step": 135
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.68635260987047,
      "learning_rate": 1.9996209722354897e-06,
      "loss": 0.3608,
      "step": 136
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9268085169131046,
      "learning_rate": 1.9995952843088656e-06,
      "loss": 0.4159,
      "step": 137
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9484082206625333,
      "learning_rate": 1.9995687543860728e-06,
      "loss": 0.4132,
      "step": 138
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.646373861612375,
      "learning_rate": 1.999541382489459e-06,
      "loss": 0.3747,
      "step": 139
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.646592594686166,
      "learning_rate": 1.9995131686420798e-06,
      "loss": 0.3996,
      "step": 140
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7303459670357704,
      "learning_rate": 1.9994841128677014e-06,
      "loss": 0.3988,
      "step": 141
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.799829009143507,
      "learning_rate": 1.9994542151907985e-06,
      "loss": 0.4117,
      "step": 142
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6018316616627173,
      "learning_rate": 1.9994234756365546e-06,
      "loss": 0.3918,
      "step": 143
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8528313633077835,
      "learning_rate": 1.9993918942308625e-06,
      "loss": 0.3901,
      "step": 144
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.206126131387853,
      "learning_rate": 1.999359471000326e-06,
      "loss": 0.3858,
      "step": 145
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6182854163185243,
      "learning_rate": 1.9993262059722546e-06,
      "loss": 0.4106,
      "step": 146
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.002116351588365,
      "learning_rate": 1.9992920991746695e-06,
      "loss": 0.4153,
      "step": 147
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.744545644279597,
      "learning_rate": 1.9992571506362995e-06,
      "loss": 0.3962,
      "step": 148
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9977459472060466,
      "learning_rate": 1.999221360386584e-06,
      "loss": 0.3931,
      "step": 149
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6424581529573303,
      "learning_rate": 1.99918472845567e-06,
      "loss": 0.3706,
      "step": 150
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7069491277467383,
      "learning_rate": 1.999147254874414e-06,
      "loss": 0.4147,
      "step": 151
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6035821918848363,
      "learning_rate": 1.9991089396743805e-06,
      "loss": 0.4249,
      "step": 152
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8388566955132943,
      "learning_rate": 1.999069782887845e-06,
      "loss": 0.3695,
      "step": 153
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7846849561486064,
      "learning_rate": 1.999029784547791e-06,
      "loss": 0.4046,
      "step": 154
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5672347312163266,
      "learning_rate": 1.998988944687909e-06,
      "loss": 0.3784,
      "step": 155
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5262276088682456,
      "learning_rate": 1.998947263342601e-06,
      "loss": 0.3624,
      "step": 156
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.760289710063222,
      "learning_rate": 1.998904740546977e-06,
      "loss": 0.3976,
      "step": 157
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6197959186468402,
      "learning_rate": 1.9988613763368545e-06,
      "loss": 0.3852,
      "step": 158
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9089432590991646,
      "learning_rate": 1.998817170748762e-06,
      "loss": 0.3799,
      "step": 159
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.506679497101866,
      "learning_rate": 1.9987721238199343e-06,
      "loss": 0.3535,
      "step": 160
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.825720790926751,
      "learning_rate": 1.9987262355883173e-06,
      "loss": 0.3744,
      "step": 161
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.906788715253664,
      "learning_rate": 1.9986795060925633e-06,
      "loss": 0.4155,
      "step": 162
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.099980135318557,
      "learning_rate": 1.998631935372035e-06,
      "loss": 0.4039,
      "step": 163
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.473077408954635,
      "learning_rate": 1.9985835234668023e-06,
      "loss": 0.3994,
      "step": 164
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7379960293103736,
      "learning_rate": 1.998534270417645e-06,
      "loss": 0.4067,
      "step": 165
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.934946021108072,
      "learning_rate": 1.9984841762660503e-06,
      "loss": 0.3915,
      "step": 166
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.595844563943777,
      "learning_rate": 1.998433241054215e-06,
      "loss": 0.3917,
      "step": 167
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7121653357980597,
      "learning_rate": 1.998381464825043e-06,
      "loss": 0.3971,
      "step": 168
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.584381509862202,
      "learning_rate": 1.998328847622148e-06,
      "loss": 0.3716,
      "step": 169
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6766288563674414,
      "learning_rate": 1.9982753894898506e-06,
      "loss": 0.3798,
      "step": 170
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5571445545817957,
      "learning_rate": 1.9982210904731812e-06,
      "loss": 0.3643,
      "step": 171
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5407475772937818,
      "learning_rate": 1.9981659506178776e-06,
      "loss": 0.3839,
      "step": 172
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5544944850006597,
      "learning_rate": 1.9981099699703864e-06,
      "loss": 0.368,
      "step": 173
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.531812353121465,
      "learning_rate": 1.998053148577862e-06,
      "loss": 0.3964,
      "step": 174
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.046992089059834,
      "learning_rate": 1.997995486488167e-06,
      "loss": 0.3844,
      "step": 175
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.822735955792875,
      "learning_rate": 1.9979369837498727e-06,
      "loss": 0.3991,
      "step": 176
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9314069703471657,
      "learning_rate": 1.997877640412258e-06,
      "loss": 0.401,
      "step": 177
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.504699351071927,
      "learning_rate": 1.9978174565253095e-06,
      "loss": 0.3805,
      "step": 178
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.566177801931028,
      "learning_rate": 1.9977564321397233e-06,
      "loss": 0.437,
      "step": 179
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6868768656926396,
      "learning_rate": 1.9976945673069015e-06,
      "loss": 0.3873,
      "step": 180
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.456556033093542,
      "learning_rate": 1.9976318620789557e-06,
      "loss": 0.3185,
      "step": 181
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.590060188752798,
      "learning_rate": 1.9975683165087047e-06,
      "loss": 0.3522,
      "step": 182
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.840213817668187,
      "learning_rate": 1.9975039306496755e-06,
      "loss": 0.3667,
      "step": 183
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.8933404886544967,
      "learning_rate": 1.997438704556102e-06,
      "loss": 0.37,
      "step": 184
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7526331247943334,
      "learning_rate": 1.997372638282928e-06,
      "loss": 0.3968,
      "step": 185
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6258818690706627,
      "learning_rate": 1.9973057318858017e-06,
      "loss": 0.3279,
      "step": 186
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7649018240097503,
      "learning_rate": 1.9972379854210823e-06,
      "loss": 0.3689,
      "step": 187
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7999331736072066,
      "learning_rate": 1.9971693989458345e-06,
      "loss": 0.3441,
      "step": 188
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7258205465442513,
      "learning_rate": 1.997099972517831e-06,
      "loss": 0.3396,
      "step": 189
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.536508380657664,
      "learning_rate": 1.997029706195553e-06,
      "loss": 0.3346,
      "step": 190
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.958855867705031,
      "learning_rate": 1.9969586000381883e-06,
      "loss": 0.3903,
      "step": 191
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7156228414424066,
      "learning_rate": 1.9968866541056313e-06,
      "loss": 0.3706,
      "step": 192
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.665030631253663,
      "learning_rate": 1.996813868458486e-06,
      "loss": 0.3883,
      "step": 193
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6313799332939225,
      "learning_rate": 1.9967402431580617e-06,
      "loss": 0.3527,
      "step": 194
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7364262838936404,
      "learning_rate": 1.996665778266376e-06,
      "loss": 0.3676,
      "step": 195
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.810935136720703,
      "learning_rate": 1.996590473846153e-06,
      "loss": 0.3341,
      "step": 196
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9192563229960538,
      "learning_rate": 1.996514329960825e-06,
      "loss": 0.3536,
      "step": 197
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8207777067651287,
      "learning_rate": 1.9964373466745307e-06,
      "loss": 0.3828,
      "step": 198
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.1120567282258955,
      "learning_rate": 1.9963595240521156e-06,
      "loss": 0.386,
      "step": 199
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6529947961693527,
      "learning_rate": 1.996280862159133e-06,
      "loss": 0.3888,
      "step": 200
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.0993864773394,
      "learning_rate": 1.996201361061842e-06,
      "loss": 0.4037,
      "step": 201
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5729470692760734,
      "learning_rate": 1.9961210208272105e-06,
      "loss": 0.3721,
      "step": 202
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8037555614398397,
      "learning_rate": 1.996039841522911e-06,
      "loss": 0.3861,
      "step": 203
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.502805445419668,
      "learning_rate": 1.9959578232173245e-06,
      "loss": 0.3421,
      "step": 204
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6892317392586462,
      "learning_rate": 1.995874965979538e-06,
      "loss": 0.3703,
      "step": 205
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.649856195234877,
      "learning_rate": 1.9957912698793447e-06,
      "loss": 0.3965,
      "step": 206
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6385637045784005,
      "learning_rate": 1.9957067349872456e-06,
      "loss": 0.355,
      "step": 207
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6041098458460032,
      "learning_rate": 1.995621361374447e-06,
      "loss": 0.4005,
      "step": 208
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.791585851603161,
      "learning_rate": 1.995535149112862e-06,
      "loss": 0.3828,
      "step": 209
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.56933881840692,
      "learning_rate": 1.995448098275112e-06,
      "loss": 0.3715,
      "step": 210
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7409053496946076,
      "learning_rate": 1.9953602089345213e-06,
      "loss": 0.385,
      "step": 211
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.916462909652704,
      "learning_rate": 1.995271481165123e-06,
      "loss": 0.3491,
      "step": 212
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.509746621164135,
      "learning_rate": 1.9951819150416564e-06,
      "loss": 0.3333,
      "step": 213
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.491097202916147,
      "learning_rate": 1.9950915106395654e-06,
      "loss": 0.3349,
      "step": 214
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.842582458464046,
      "learning_rate": 1.9950002680350016e-06,
      "loss": 0.3447,
      "step": 215
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6721882016425553,
      "learning_rate": 1.994908187304822e-06,
      "loss": 0.3676,
      "step": 216
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5656168078227406,
      "learning_rate": 1.9948152685265892e-06,
      "loss": 0.3781,
      "step": 217
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.625977722419885,
      "learning_rate": 1.9947215117785727e-06,
      "loss": 0.3553,
      "step": 218
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.845684023469767,
      "learning_rate": 1.9946269171397465e-06,
      "loss": 0.3676,
      "step": 219
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.006378204445308,
      "learning_rate": 1.994531484689792e-06,
      "loss": 0.3609,
      "step": 220
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.734959302329953,
      "learning_rate": 1.994435214509095e-06,
      "loss": 0.3979,
      "step": 221
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8769028978485194,
      "learning_rate": 1.994338106678748e-06,
      "loss": 0.3968,
      "step": 222
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7379521677523693,
      "learning_rate": 1.9942401612805477e-06,
      "loss": 0.433,
      "step": 223
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6234046844572374,
      "learning_rate": 1.9941413783969976e-06,
      "loss": 0.3595,
      "step": 224
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.554494207301178,
      "learning_rate": 1.994041758111306e-06,
      "loss": 0.382,
      "step": 225
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.579323406962154,
      "learning_rate": 1.993941300507387e-06,
      "loss": 0.3521,
      "step": 226
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7795940548748352,
      "learning_rate": 1.9938400056698595e-06,
      "loss": 0.3617,
      "step": 227
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.0176541561507246,
      "learning_rate": 1.9937378736840483e-06,
      "loss": 0.4024,
      "step": 228
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7077194486209333,
      "learning_rate": 1.993634904635983e-06,
      "loss": 0.3594,
      "step": 229
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4884581742716905,
      "learning_rate": 1.9935310986123976e-06,
      "loss": 0.3308,
      "step": 230
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4268425735478183,
      "learning_rate": 1.993426455700732e-06,
      "loss": 0.3419,
      "step": 231
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.665186874653042,
      "learning_rate": 1.993320975989131e-06,
      "loss": 0.3426,
      "step": 232
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.552819929036354,
      "learning_rate": 1.9932146595664446e-06,
      "loss": 0.387,
      "step": 233
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.4824302393777335,
      "learning_rate": 1.993107506522226e-06,
      "loss": 0.3213,
      "step": 234
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.1076257763007455,
      "learning_rate": 1.9929995169467344e-06,
      "loss": 0.3862,
      "step": 235
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.7868572688950537,
      "learning_rate": 1.992890690930934e-06,
      "loss": 0.3902,
      "step": 236
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.527196091690139,
      "learning_rate": 1.9927810285664927e-06,
      "loss": 0.3258,
      "step": 237
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5385263313762674,
      "learning_rate": 1.9926705299457827e-06,
      "loss": 0.3925,
      "step": 238
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6446574691522353,
      "learning_rate": 1.992559195161882e-06,
      "loss": 0.37,
      "step": 239
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4843804067774076,
      "learning_rate": 1.9924470243085712e-06,
      "loss": 0.3525,
      "step": 240
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.463079722299446,
      "learning_rate": 1.9923340174803367e-06,
      "loss": 0.328,
      "step": 241
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5139165394391894,
      "learning_rate": 1.9922201747723677e-06,
      "loss": 0.3499,
      "step": 242
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6807859647502177,
      "learning_rate": 1.9921054962805585e-06,
      "loss": 0.3419,
      "step": 243
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9749725307944406,
      "learning_rate": 1.9919899821015063e-06,
      "loss": 0.3248,
      "step": 244
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.521006728376108,
      "learning_rate": 1.9918736323325142e-06,
      "loss": 0.3565,
      "step": 245
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.690164522320936,
      "learning_rate": 1.9917564470715872e-06,
      "loss": 0.3598,
      "step": 246
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6434030217227464,
      "learning_rate": 1.991638426417435e-06,
      "loss": 0.3628,
      "step": 247
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.406254480523089,
      "learning_rate": 1.991519570469471e-06,
      "loss": 0.3412,
      "step": 248
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8193314112019814,
      "learning_rate": 1.9913998793278113e-06,
      "loss": 0.367,
      "step": 249
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4988815520050247,
      "learning_rate": 1.9912793530932764e-06,
      "loss": 0.3622,
      "step": 250
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.461527020103545,
      "learning_rate": 1.9911579918673904e-06,
      "loss": 0.3299,
      "step": 251
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.773311619061041,
      "learning_rate": 1.99103579575238e-06,
      "loss": 0.3449,
      "step": 252
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5782349089610825,
      "learning_rate": 1.9909127648511754e-06,
      "loss": 0.3594,
      "step": 253
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4446994500795194,
      "learning_rate": 1.990788899267411e-06,
      "loss": 0.344,
      "step": 254
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6855473726715022,
      "learning_rate": 1.9906641991054222e-06,
      "loss": 0.4129,
      "step": 255
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5485938248498092,
      "learning_rate": 1.9905386644702493e-06,
      "loss": 0.3543,
      "step": 256
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5080000647003278,
      "learning_rate": 1.9904122954676345e-06,
      "loss": 0.3202,
      "step": 257
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.457262360426059,
      "learning_rate": 1.9902850922040227e-06,
      "loss": 0.3579,
      "step": 258
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.541677938349451,
      "learning_rate": 1.9901570547865627e-06,
      "loss": 0.3584,
      "step": 259
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6969360626762677,
      "learning_rate": 1.990028183323105e-06,
      "loss": 0.3887,
      "step": 260
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.713143817573316,
      "learning_rate": 1.9898984779222025e-06,
      "loss": 0.3841,
      "step": 261
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5477445113524815,
      "learning_rate": 1.9897679386931113e-06,
      "loss": 0.3435,
      "step": 262
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.2229779357171524,
      "learning_rate": 1.9896365657457887e-06,
      "loss": 0.3744,
      "step": 263
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.613400938132588,
      "learning_rate": 1.989504359190896e-06,
      "loss": 0.3374,
      "step": 264
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.80802734327446,
      "learning_rate": 1.989371319139794e-06,
      "loss": 0.3772,
      "step": 265
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4470246183799156,
      "learning_rate": 1.9892374457045494e-06,
      "loss": 0.3553,
      "step": 266
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6800638145741353,
      "learning_rate": 1.9891027389979278e-06,
      "loss": 0.3706,
      "step": 267
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.913022801860264,
      "learning_rate": 1.9889671991333976e-06,
      "loss": 0.3766,
      "step": 268
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0319933246387856,
      "learning_rate": 1.9888308262251284e-06,
      "loss": 0.3663,
      "step": 269
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4223388136877886,
      "learning_rate": 1.9886936203879935e-06,
      "loss": 0.3418,
      "step": 270
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7509386532674815,
      "learning_rate": 1.9885555817375654e-06,
      "loss": 0.4096,
      "step": 271
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.639110162050167,
      "learning_rate": 1.9884167103901194e-06,
      "loss": 0.39,
      "step": 272
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7803037778659814,
      "learning_rate": 1.9882770064626328e-06,
      "loss": 0.3795,
      "step": 273
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5513683067438753,
      "learning_rate": 1.988136470072782e-06,
      "loss": 0.3469,
      "step": 274
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.535630029496729,
      "learning_rate": 1.987995101338947e-06,
      "loss": 0.3517,
      "step": 275
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6139021814657037,
      "learning_rate": 1.9878529003802084e-06,
      "loss": 0.3976,
      "step": 276
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.483456591372453,
      "learning_rate": 1.987709867316346e-06,
      "loss": 0.3631,
      "step": 277
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5809926151280314,
      "learning_rate": 1.9875660022678424e-06,
      "loss": 0.3685,
      "step": 278
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.545111593100663,
      "learning_rate": 1.9874213053558804e-06,
      "loss": 0.3427,
      "step": 279
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.617356456075554,
      "learning_rate": 1.987275776702344e-06,
      "loss": 0.3395,
      "step": 280
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.8211042738249743,
      "learning_rate": 1.987129416429817e-06,
      "loss": 0.3571,
      "step": 281
    },
    {
      "epoch": 0.08,
      "grad_norm": 7.370289089811191,
      "learning_rate": 1.986982224661584e-06,
      "loss": 0.3474,
      "step": 282
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5171030275465305,
      "learning_rate": 1.986834201521631e-06,
      "loss": 0.3306,
      "step": 283
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5654163958906837,
      "learning_rate": 1.9866853471346417e-06,
      "loss": 0.3568,
      "step": 284
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.4580167416924765,
      "learning_rate": 1.986535661626003e-06,
      "loss": 0.368,
      "step": 285
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6005433341670887,
      "learning_rate": 1.9863851451218003e-06,
      "loss": 0.3518,
      "step": 286
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.699225097426378,
      "learning_rate": 1.986233797748819e-06,
      "loss": 0.3784,
      "step": 287
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.732736459682785,
      "learning_rate": 1.986081619634545e-06,
      "loss": 0.3752,
      "step": 288
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7938860793855747,
      "learning_rate": 1.9859286109071623e-06,
      "loss": 0.3502,
      "step": 289
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5092963016547123,
      "learning_rate": 1.985774771695558e-06,
      "loss": 0.3709,
      "step": 290
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.8384058342986496,
      "learning_rate": 1.9856201021293148e-06,
      "loss": 0.3702,
      "step": 291
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7336616375679426,
      "learning_rate": 1.985464602338717e-06,
      "loss": 0.363,
      "step": 292
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5955531896252,
      "learning_rate": 1.9853082724547476e-06,
      "loss": 0.3036,
      "step": 293
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.571917020052252,
      "learning_rate": 1.9851511126090906e-06,
      "loss": 0.361,
      "step": 294
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.660892685172359,
      "learning_rate": 1.9849931229341256e-06,
      "loss": 0.325,
      "step": 295
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3648358197445933,
      "learning_rate": 1.9848343035629343e-06,
      "loss": 0.3532,
      "step": 296
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.594871579879767,
      "learning_rate": 1.9846746546292955e-06,
      "loss": 0.3413,
      "step": 297
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5793998255691766,
      "learning_rate": 1.9845141762676882e-06,
      "loss": 0.3335,
      "step": 298
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.591077271608269,
      "learning_rate": 1.984352868613289e-06,
      "loss": 0.3675,
      "step": 299
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.285884192859506,
      "learning_rate": 1.9841907318019724e-06,
      "loss": 0.3539,
      "step": 300
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5448166312144047,
      "learning_rate": 1.9840277659703137e-06,
      "loss": 0.3542,
      "step": 301
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.470915634055657,
      "learning_rate": 1.9838639712555838e-06,
      "loss": 0.3646,
      "step": 302
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5716228188887387,
      "learning_rate": 1.9836993477957536e-06,
      "loss": 0.3493,
      "step": 303
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.643931078864786,
      "learning_rate": 1.983533895729492e-06,
      "loss": 0.3195,
      "step": 304
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.911939697435264,
      "learning_rate": 1.9833676151961647e-06,
      "loss": 0.3654,
      "step": 305
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3724724165880784,
      "learning_rate": 1.9832005063358366e-06,
      "loss": 0.3603,
      "step": 306
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5039300276236034,
      "learning_rate": 1.9830325692892687e-06,
      "loss": 0.3552,
      "step": 307
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5223232113937843,
      "learning_rate": 1.9828638041979216e-06,
      "loss": 0.3327,
      "step": 308
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.9855800223800464,
      "learning_rate": 1.982694211203952e-06,
      "loss": 0.3829,
      "step": 309
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.521775265390254,
      "learning_rate": 1.9825237904502143e-06,
      "loss": 0.3408,
      "step": 310
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7867318408681685,
      "learning_rate": 1.98235254208026e-06,
      "loss": 0.3465,
      "step": 311
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.617705586883914,
      "learning_rate": 1.9821804662383385e-06,
      "loss": 0.3625,
      "step": 312
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.1720347849017676,
      "learning_rate": 1.982007563069395e-06,
      "loss": 0.3589,
      "step": 313
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.575250459612025,
      "learning_rate": 1.9818338327190735e-06,
      "loss": 0.341,
      "step": 314
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.878553661168574,
      "learning_rate": 1.981659275333712e-06,
      "loss": 0.3398,
      "step": 315
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.552552883120015,
      "learning_rate": 1.981483891060348e-06,
      "loss": 0.3343,
      "step": 316
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5137157940173256,
      "learning_rate": 1.981307680046713e-06,
      "loss": 0.3576,
      "step": 317
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.403478162113704,
      "learning_rate": 1.9811306424412368e-06,
      "loss": 0.3313,
      "step": 318
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5219076719388847,
      "learning_rate": 1.9809527783930442e-06,
      "loss": 0.3498,
      "step": 319
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.46937865375049,
      "learning_rate": 1.980774088051957e-06,
      "loss": 0.3305,
      "step": 320
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4507535819330233,
      "learning_rate": 1.980594571568493e-06,
      "loss": 0.3154,
      "step": 321
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5843540111215284,
      "learning_rate": 1.980414229093865e-06,
      "loss": 0.3583,
      "step": 322
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.442105603481713,
      "learning_rate": 1.980233060779983e-06,
      "loss": 0.3347,
      "step": 323
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5856362077055297,
      "learning_rate": 1.9800510667794508e-06,
      "loss": 0.3265,
      "step": 324
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.780782385254357,
      "learning_rate": 1.979868247245569e-06,
      "loss": 0.3502,
      "step": 325
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5778120808206366,
      "learning_rate": 1.9796846023323335e-06,
      "loss": 0.3259,
      "step": 326
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.454138511692687,
      "learning_rate": 1.979500132194435e-06,
      "loss": 0.3468,
      "step": 327
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4070384710440167,
      "learning_rate": 1.97931483698726e-06,
      "loss": 0.3279,
      "step": 328
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.834842983135936,
      "learning_rate": 1.979128716866889e-06,
      "loss": 0.3356,
      "step": 329
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.578244568214759,
      "learning_rate": 1.978941771990098e-06,
      "loss": 0.3452,
      "step": 330
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.628476335289947,
      "learning_rate": 1.9787540025143576e-06,
      "loss": 0.3495,
      "step": 331
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5526394737282962,
      "learning_rate": 1.9785654085978328e-06,
      "loss": 0.3528,
      "step": 332
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.671559049988823,
      "learning_rate": 1.978375990399384e-06,
      "loss": 0.3705,
      "step": 333
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5473894352720237,
      "learning_rate": 1.9781857480785644e-06,
      "loss": 0.3192,
      "step": 334
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5989571364649473,
      "learning_rate": 1.9779946817956223e-06,
      "loss": 0.3616,
      "step": 335
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.795031591430215,
      "learning_rate": 1.9778027917115005e-06,
      "loss": 0.3821,
      "step": 336
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4775590847102986,
      "learning_rate": 1.9776100779878343e-06,
      "loss": 0.3527,
      "step": 337
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7144133030062814,
      "learning_rate": 1.9774165407869535e-06,
      "loss": 0.3803,
      "step": 338
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7376378600974713,
      "learning_rate": 1.977222180271883e-06,
      "loss": 0.3447,
      "step": 339
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5409133266582233,
      "learning_rate": 1.9770269966063388e-06,
      "loss": 0.3393,
      "step": 340
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6836638084477937,
      "learning_rate": 1.976830989954731e-06,
      "loss": 0.3491,
      "step": 341
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.642805261995146,
      "learning_rate": 1.9766341604821643e-06,
      "loss": 0.3471,
      "step": 342
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.598394203262401,
      "learning_rate": 1.976436508354435e-06,
      "loss": 0.3603,
      "step": 343
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.382257852100494,
      "learning_rate": 1.9762380337380325e-06,
      "loss": 0.3502,
      "step": 344
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.632050658022369,
      "learning_rate": 1.97603873680014e-06,
      "loss": 0.3648,
      "step": 345
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.750819827854785,
      "learning_rate": 1.975838617708632e-06,
      "loss": 0.3334,
      "step": 346
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.410781673584302,
      "learning_rate": 1.975637676632077e-06,
      "loss": 0.3417,
      "step": 347
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.629343266531438,
      "learning_rate": 1.975435913739734e-06,
      "loss": 0.3896,
      "step": 348
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.472893659048711,
      "learning_rate": 1.9752333292015564e-06,
      "loss": 0.3457,
      "step": 349
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6266932605282713,
      "learning_rate": 1.9750299231881882e-06,
      "loss": 0.369,
      "step": 350
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.655745774986425,
      "learning_rate": 1.974825695870966e-06,
      "loss": 0.3468,
      "step": 351
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4865033854087613,
      "learning_rate": 1.974620647421918e-06,
      "loss": 0.3326,
      "step": 352
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.536995229766425,
      "learning_rate": 1.9744147780137644e-06,
      "loss": 0.3384,
      "step": 353
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5297575083663606,
      "learning_rate": 1.9742080878199155e-06,
      "loss": 0.3284,
      "step": 354
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3922716483502056,
      "learning_rate": 1.9740005770144757e-06,
      "loss": 0.3624,
      "step": 355
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4959650916452087,
      "learning_rate": 1.973792245772238e-06,
      "loss": 0.3541,
      "step": 356
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7223874523007403,
      "learning_rate": 1.9735830942686877e-06,
      "loss": 0.3142,
      "step": 357
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.636175207703715,
      "learning_rate": 1.973373122680001e-06,
      "loss": 0.3735,
      "step": 358
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5918184199941066,
      "learning_rate": 1.973162331183045e-06,
      "loss": 0.3524,
      "step": 359
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4828658779223347,
      "learning_rate": 1.9729507199553766e-06,
      "loss": 0.3092,
      "step": 360
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4424967985660193,
      "learning_rate": 1.9727382891752444e-06,
      "loss": 0.3256,
      "step": 361
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.74882206494836,
      "learning_rate": 1.972525039021586e-06,
      "loss": 0.3465,
      "step": 362
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5803729023474986,
      "learning_rate": 1.9723109696740307e-06,
      "loss": 0.3697,
      "step": 363
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5314535016182607,
      "learning_rate": 1.9720960813128963e-06,
      "loss": 0.385,
      "step": 364
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5334837256809553,
      "learning_rate": 1.971880374119192e-06,
      "loss": 0.3436,
      "step": 365
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.838981459900644,
      "learning_rate": 1.971663848274615e-06,
      "loss": 0.3547,
      "step": 366
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.785342579119847,
      "learning_rate": 1.971446503961554e-06,
      "loss": 0.3829,
      "step": 367
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4462261571298303,
      "learning_rate": 1.9712283413630863e-06,
      "loss": 0.3461,
      "step": 368
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5548288894996096,
      "learning_rate": 1.9710093606629773e-06,
      "loss": 0.3542,
      "step": 369
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.2724105962356234,
      "learning_rate": 1.970789562045683e-06,
      "loss": 0.3621,
      "step": 370
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.593756143692595,
      "learning_rate": 1.970568945696348e-06,
      "loss": 0.347,
      "step": 371
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4998898782445944,
      "learning_rate": 1.970347511800806e-06,
      "loss": 0.3494,
      "step": 372
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.429214718940558,
      "learning_rate": 1.9701252605455783e-06,
      "loss": 0.3171,
      "step": 373
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7538511396476717,
      "learning_rate": 1.969902192117876e-06,
      "loss": 0.3328,
      "step": 374
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6048648567873838,
      "learning_rate": 1.9696783067055978e-06,
      "loss": 0.3158,
      "step": 375
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5474822093572587,
      "learning_rate": 1.96945360449733e-06,
      "loss": 0.332,
      "step": 376
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5836149604108907,
      "learning_rate": 1.9692280856823484e-06,
      "loss": 0.3482,
      "step": 377
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.0864417183648216,
      "learning_rate": 1.9690017504506155e-06,
      "loss": 0.3451,
      "step": 378
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6700788713740815,
      "learning_rate": 1.9687745989927824e-06,
      "loss": 0.3512,
      "step": 379
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.763428118675348,
      "learning_rate": 1.968546631500186e-06,
      "loss": 0.3783,
      "step": 380
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6666814788251756,
      "learning_rate": 1.9683178481648527e-06,
      "loss": 0.3135,
      "step": 381
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.410152816700471,
      "learning_rate": 1.968088249179495e-06,
      "loss": 0.3304,
      "step": 382
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.514956754492286,
      "learning_rate": 1.967857834737513e-06,
      "loss": 0.3086,
      "step": 383
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.403693065158659,
      "learning_rate": 1.9676266050329922e-06,
      "loss": 0.3082,
      "step": 384
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5751977044730494,
      "learning_rate": 1.967394560260707e-06,
      "loss": 0.3369,
      "step": 385
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5583174802608997,
      "learning_rate": 1.967161700616117e-06,
      "loss": 0.3181,
      "step": 386
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6557837593729072,
      "learning_rate": 1.966928026295369e-06,
      "loss": 0.3771,
      "step": 387
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.5991375063276094,
      "learning_rate": 1.9666935374952944e-06,
      "loss": 0.3262,
      "step": 388
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6405353323745278,
      "learning_rate": 1.9664582344134127e-06,
      "loss": 0.3636,
      "step": 389
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4486516403012857,
      "learning_rate": 1.9662221172479282e-06,
      "loss": 0.3404,
      "step": 390
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.708565335228964,
      "learning_rate": 1.965985186197731e-06,
      "loss": 0.3737,
      "step": 391
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6111093488966133,
      "learning_rate": 1.965747441462397e-06,
      "loss": 0.3577,
      "step": 392
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4352883871602913,
      "learning_rate": 1.965508883242188e-06,
      "loss": 0.3583,
      "step": 393
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6020531043744404,
      "learning_rate": 1.965269511738049e-06,
      "loss": 0.3169,
      "step": 394
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.841460231837411,
      "learning_rate": 1.965029327151613e-06,
      "loss": 0.3649,
      "step": 395
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.25286900315995,
      "learning_rate": 1.9647883296851956e-06,
      "loss": 0.3119,
      "step": 396
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.522094707467322,
      "learning_rate": 1.964546519541798e-06,
      "loss": 0.3345,
      "step": 397
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.443044826914561,
      "learning_rate": 1.9643038969251062e-06,
      "loss": 0.3387,
      "step": 398
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4333924844267014,
      "learning_rate": 1.9640604620394894e-06,
      "loss": 0.3527,
      "step": 399
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6188283476819763,
      "learning_rate": 1.9638162150900025e-06,
      "loss": 0.3755,
      "step": 400
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.1294437997674893,
      "learning_rate": 1.963571156282384e-06,
      "loss": 0.3146,
      "step": 401
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.462574569192702,
      "learning_rate": 1.963325285823055e-06,
      "loss": 0.3475,
      "step": 402
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.424115414008482,
      "learning_rate": 1.9630786039191225e-06,
      "loss": 0.3551,
      "step": 403
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7183638186252166,
      "learning_rate": 1.962831110778375e-06,
      "loss": 0.3235,
      "step": 404
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.707298913677284,
      "learning_rate": 1.9625828066092854e-06,
      "loss": 0.3626,
      "step": 405
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.051683309395247,
      "learning_rate": 1.9623336916210096e-06,
      "loss": 0.3174,
      "step": 406
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6084332006277027,
      "learning_rate": 1.962083766023386e-06,
      "loss": 0.3158,
      "step": 407
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.351302315368594,
      "learning_rate": 1.961833030026937e-06,
      "loss": 0.284,
      "step": 408
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3788504441528806,
      "learning_rate": 1.961581483842866e-06,
      "loss": 0.3278,
      "step": 409
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8946442484890107,
      "learning_rate": 1.96132912768306e-06,
      "loss": 0.3163,
      "step": 410
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.728285381971823,
      "learning_rate": 1.961075961760088e-06,
      "loss": 0.3418,
      "step": 411
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.640098159654451,
      "learning_rate": 1.9608219862872008e-06,
      "loss": 0.3264,
      "step": 412
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5466803175986557,
      "learning_rate": 1.960567201478332e-06,
      "loss": 0.3122,
      "step": 413
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5571107605288512,
      "learning_rate": 1.9603116075480955e-06,
      "loss": 0.3582,
      "step": 414
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6335058487995013,
      "learning_rate": 1.960055204711788e-06,
      "loss": 0.3319,
      "step": 415
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.499711088348462,
      "learning_rate": 1.959797993185387e-06,
      "loss": 0.3553,
      "step": 416
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3778807191941955,
      "learning_rate": 1.959539973185551e-06,
      "loss": 0.332,
      "step": 417
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6344006456744054,
      "learning_rate": 1.9592811449296206e-06,
      "loss": 0.3572,
      "step": 418
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4980884179301603,
      "learning_rate": 1.9590215086356155e-06,
      "loss": 0.2792,
      "step": 419
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.7675481866770877,
      "learning_rate": 1.9587610645222377e-06,
      "loss": 0.3385,
      "step": 420
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4770423980229457,
      "learning_rate": 1.9584998128088683e-06,
      "loss": 0.341,
      "step": 421
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6038105597999173,
      "learning_rate": 1.9582377537155703e-06,
      "loss": 0.3269,
      "step": 422
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.618927817603957,
      "learning_rate": 1.9579748874630846e-06,
      "loss": 0.332,
      "step": 423
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5224312088303065,
      "learning_rate": 1.9577112142728337e-06,
      "loss": 0.3191,
      "step": 424
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.410236418801495,
      "learning_rate": 1.95744673436692e-06,
      "loss": 0.3188,
      "step": 425
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5211092188943076,
      "learning_rate": 1.9571814479681233e-06,
      "loss": 0.3545,
      "step": 426
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6366198905561125,
      "learning_rate": 1.9569153552999053e-06,
      "loss": 0.3413,
      "step": 427
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3273141341492978,
      "learning_rate": 1.9566484565864056e-06,
      "loss": 0.3178,
      "step": 428
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.464296948249515,
      "learning_rate": 1.9563807520524424e-06,
      "loss": 0.3227,
      "step": 429
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5770345250979836,
      "learning_rate": 1.9561122419235133e-06,
      "loss": 0.3275,
      "step": 430
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.806790500176456,
      "learning_rate": 1.9558429264257946e-06,
      "loss": 0.3654,
      "step": 431
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6954362891186325,
      "learning_rate": 1.955572805786141e-06,
      "loss": 0.2983,
      "step": 432
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5608755561769727,
      "learning_rate": 1.955301880232084e-06,
      "loss": 0.3729,
      "step": 433
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.578420783299757,
      "learning_rate": 1.9550301499918353e-06,
      "loss": 0.328,
      "step": 434
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6160850303665457,
      "learning_rate": 1.9547576152942825e-06,
      "loss": 0.3917,
      "step": 435
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.582252428734102,
      "learning_rate": 1.9544842763689928e-06,
      "loss": 0.3299,
      "step": 436
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.634871783199364,
      "learning_rate": 1.9542101334462086e-06,
      "loss": 0.3125,
      "step": 437
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6720251585718655,
      "learning_rate": 1.9539351867568515e-06,
      "loss": 0.3618,
      "step": 438
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.701084613749105,
      "learning_rate": 1.953659436532519e-06,
      "loss": 0.3332,
      "step": 439
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.586301761737129,
      "learning_rate": 1.953382883005485e-06,
      "loss": 0.3434,
      "step": 440
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5520563354383428,
      "learning_rate": 1.953105526408702e-06,
      "loss": 0.3136,
      "step": 441
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7054210509581953,
      "learning_rate": 1.952827366975797e-06,
      "loss": 0.3614,
      "step": 442
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.377738480595085,
      "learning_rate": 1.9525484049410745e-06,
      "loss": 0.3406,
      "step": 443
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.393036741968658,
      "learning_rate": 1.952268640539514e-06,
      "loss": 0.3094,
      "step": 444
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8081828257349892,
      "learning_rate": 1.951988074006772e-06,
      "loss": 0.313,
      "step": 445
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4443193318497127,
      "learning_rate": 1.951706705579179e-06,
      "loss": 0.3388,
      "step": 446
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6083418173169153,
      "learning_rate": 1.9514245354937434e-06,
      "loss": 0.3416,
      "step": 447
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.482240802319246,
      "learning_rate": 1.951141563988147e-06,
      "loss": 0.3184,
      "step": 448
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.573942965254348,
      "learning_rate": 1.9508577913007472e-06,
      "loss": 0.3288,
      "step": 449
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.520122116076561,
      "learning_rate": 1.9505732176705762e-06,
      "loss": 0.3395,
      "step": 450
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5535464035474194,
      "learning_rate": 1.9502878433373404e-06,
      "loss": 0.3257,
      "step": 451
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5785359969521116,
      "learning_rate": 1.9500016685414223e-06,
      "loss": 0.3444,
      "step": 452
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6932421940193616,
      "learning_rate": 1.9497146935238767e-06,
      "loss": 0.3103,
      "step": 453
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5976063721804574,
      "learning_rate": 1.949426918526434e-06,
      "loss": 0.3376,
      "step": 454
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.768636398677477,
      "learning_rate": 1.9491383437914964e-06,
      "loss": 0.3263,
      "step": 455
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4290109643427487,
      "learning_rate": 1.9488489695621427e-06,
      "loss": 0.3428,
      "step": 456
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.356160466466906,
      "learning_rate": 1.9485587960821227e-06,
      "loss": 0.2786,
      "step": 457
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.410697728515682,
      "learning_rate": 1.9482678235958605e-06,
      "loss": 0.3311,
      "step": 458
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7601789133630934,
      "learning_rate": 1.9479760523484526e-06,
      "loss": 0.3668,
      "step": 459
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.2475066691008267,
      "learning_rate": 1.9476834825856695e-06,
      "loss": 0.3236,
      "step": 460
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7004065801678983,
      "learning_rate": 1.947390114553953e-06,
      "loss": 0.3747,
      "step": 461
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4762707217582633,
      "learning_rate": 1.947095948500418e-06,
      "loss": 0.3298,
      "step": 462
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.359836990811463,
      "learning_rate": 1.946800984672851e-06,
      "loss": 0.3274,
      "step": 463
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.475164850692892,
      "learning_rate": 1.946505223319712e-06,
      "loss": 0.3076,
      "step": 464
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.497063237032677,
      "learning_rate": 1.946208664690131e-06,
      "loss": 0.3366,
      "step": 465
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5993339212468367,
      "learning_rate": 1.9459113090339107e-06,
      "loss": 0.3546,
      "step": 466
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.522352594749957,
      "learning_rate": 1.945613156601524e-06,
      "loss": 0.3454,
      "step": 467
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4833483392268674,
      "learning_rate": 1.945314207644117e-06,
      "loss": 0.3262,
      "step": 468
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.715300511368663,
      "learning_rate": 1.9450144624135047e-06,
      "loss": 0.3532,
      "step": 469
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.668453574347607,
      "learning_rate": 1.944713921162174e-06,
      "loss": 0.3488,
      "step": 470
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.401790874153332,
      "learning_rate": 1.9444125841432814e-06,
      "loss": 0.3355,
      "step": 471
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5611493361085746,
      "learning_rate": 1.944110451610655e-06,
      "loss": 0.3469,
      "step": 472
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.649852662518354,
      "learning_rate": 1.9438075238187914e-06,
      "loss": 0.3461,
      "step": 473
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5142573134371973,
      "learning_rate": 1.9435038010228583e-06,
      "loss": 0.3577,
      "step": 474
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6489862422940784,
      "learning_rate": 1.9431992834786925e-06,
      "loss": 0.3052,
      "step": 475
    },
    {
      "epoch": 0.13,
      "grad_norm": 16.540757693889148,
      "learning_rate": 1.9428939714428008e-06,
      "loss": 0.3543,
      "step": 476
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.630121182626526,
      "learning_rate": 1.9425878651723587e-06,
      "loss": 0.3327,
      "step": 477
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6496095013669674,
      "learning_rate": 1.9422809649252107e-06,
      "loss": 0.3809,
      "step": 478
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5789313072631086,
      "learning_rate": 1.9419732709598705e-06,
      "loss": 0.3517,
      "step": 479
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5779662665291494,
      "learning_rate": 1.94166478353552e-06,
      "loss": 0.3393,
      "step": 480
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.463068866924456,
      "learning_rate": 1.9413555029120096e-06,
      "loss": 0.3126,
      "step": 481
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4759062085258847,
      "learning_rate": 1.9410454293498573e-06,
      "loss": 0.3286,
      "step": 482
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.376938714024481,
      "learning_rate": 1.9407345631102507e-06,
      "loss": 0.3082,
      "step": 483
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.480444918068485,
      "learning_rate": 1.940422904455043e-06,
      "loss": 0.3557,
      "step": 484
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.653839263450258,
      "learning_rate": 1.9401104536467562e-06,
      "loss": 0.3158,
      "step": 485
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6730631024757936,
      "learning_rate": 1.93979721094858e-06,
      "loss": 0.3582,
      "step": 486
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.516049593333345,
      "learning_rate": 1.9394831766243685e-06,
      "loss": 0.3389,
      "step": 487
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.8083636392126374,
      "learning_rate": 1.9391683509386457e-06,
      "loss": 0.3247,
      "step": 488
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2678950733089147,
      "learning_rate": 1.9388527341566008e-06,
      "loss": 0.3186,
      "step": 489
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3716071271350714,
      "learning_rate": 1.9385363265440895e-06,
      "loss": 0.2982,
      "step": 490
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4167013747271455,
      "learning_rate": 1.9382191283676333e-06,
      "loss": 0.3442,
      "step": 491
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4025041454549023,
      "learning_rate": 1.9379011398944207e-06,
      "loss": 0.3188,
      "step": 492
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.515862732197645,
      "learning_rate": 1.9375823613923047e-06,
      "loss": 0.3273,
      "step": 493
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.613914145600926,
      "learning_rate": 1.937262793129804e-06,
      "loss": 0.3309,
      "step": 494
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.578527306014049,
      "learning_rate": 1.9369424353761033e-06,
      "loss": 0.3301,
      "step": 495
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.536979840648795,
      "learning_rate": 1.936621288401052e-06,
      "loss": 0.3524,
      "step": 496
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.537778567527244,
      "learning_rate": 1.9362993524751632e-06,
      "loss": 0.3478,
      "step": 497
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.767407350887436,
      "learning_rate": 1.9359766278696163e-06,
      "loss": 0.3498,
      "step": 498
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3407625140305086,
      "learning_rate": 1.9356531148562537e-06,
      "loss": 0.2906,
      "step": 499
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.21835283736995,
      "learning_rate": 1.9353288137075827e-06,
      "loss": 0.3053,
      "step": 500
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6764139240001645,
      "learning_rate": 1.935003724696774e-06,
      "loss": 0.3378,
      "step": 501
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.848173934867263,
      "learning_rate": 1.9346778480976625e-06,
      "loss": 0.3188,
      "step": 502
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.721824420607853,
      "learning_rate": 1.9343511841847455e-06,
      "loss": 0.3694,
      "step": 503
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4137958474476844,
      "learning_rate": 1.9340237332331844e-06,
      "loss": 0.3081,
      "step": 504
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.304436601106214,
      "learning_rate": 1.933695495518804e-06,
      "loss": 0.3085,
      "step": 505
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.660920458664747,
      "learning_rate": 1.9333664713180897e-06,
      "loss": 0.3328,
      "step": 506
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.675836492265816,
      "learning_rate": 1.933036660908192e-06,
      "loss": 0.3514,
      "step": 507
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.592652582435975,
      "learning_rate": 1.932706064566922e-06,
      "loss": 0.3551,
      "step": 508
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.45395931857169,
      "learning_rate": 1.932374682572753e-06,
      "loss": 0.3036,
      "step": 509
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.495991500842149,
      "learning_rate": 1.9320425152048202e-06,
      "loss": 0.3299,
      "step": 510
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5722593523020048,
      "learning_rate": 1.9317095627429214e-06,
      "loss": 0.3039,
      "step": 511
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.699696114708531,
      "learning_rate": 1.931375825467514e-06,
      "loss": 0.3371,
      "step": 512
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.550768314398241,
      "learning_rate": 1.9310413036597178e-06,
      "loss": 0.336,
      "step": 513
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.415423337002907,
      "learning_rate": 1.9307059976013125e-06,
      "loss": 0.3084,
      "step": 514
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.3333411433452578,
      "learning_rate": 1.930369907574739e-06,
      "loss": 0.3311,
      "step": 515
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6221986764230607,
      "learning_rate": 1.9300330338630982e-06,
      "loss": 0.3579,
      "step": 516
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6611984074627193,
      "learning_rate": 1.929695376750152e-06,
      "loss": 0.3324,
      "step": 517
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3464416133083468,
      "learning_rate": 1.9293569365203202e-06,
      "loss": 0.3218,
      "step": 518
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.397852657010271,
      "learning_rate": 1.9290177134586847e-06,
      "loss": 0.3483,
      "step": 519
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.507353885884589,
      "learning_rate": 1.9286777078509856e-06,
      "loss": 0.3258,
      "step": 520
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5193974148260367,
      "learning_rate": 1.928336919983622e-06,
      "loss": 0.3549,
      "step": 521
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5488782265641943,
      "learning_rate": 1.9279953501436516e-06,
      "loss": 0.3011,
      "step": 522
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5085603852646186,
      "learning_rate": 1.927652998618792e-06,
      "loss": 0.322,
      "step": 523
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4263847131574203,
      "learning_rate": 1.927309865697419e-06,
      "loss": 0.3101,
      "step": 524
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3616115639087862,
      "learning_rate": 1.926965951668565e-06,
      "loss": 0.3095,
      "step": 525
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.594453166745246,
      "learning_rate": 1.926621256821922e-06,
      "loss": 0.3514,
      "step": 526
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.567525367017643,
      "learning_rate": 1.9262757814478397e-06,
      "loss": 0.3423,
      "step": 527
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4618364897492793,
      "learning_rate": 1.925929525837324e-06,
      "loss": 0.3388,
      "step": 528
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.622140958588784,
      "learning_rate": 1.92558249028204e-06,
      "loss": 0.3613,
      "step": 529
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.45449141356188,
      "learning_rate": 1.925234675074308e-06,
      "loss": 0.3137,
      "step": 530
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3719475881683727,
      "learning_rate": 1.9248860805071054e-06,
      "loss": 0.3193,
      "step": 531
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.489830142014237,
      "learning_rate": 1.924536706874066e-06,
      "loss": 0.3379,
      "step": 532
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4421392911819737,
      "learning_rate": 1.9241865544694814e-06,
      "loss": 0.3191,
      "step": 533
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.157619123600503,
      "learning_rate": 1.923835623588297e-06,
      "loss": 0.3257,
      "step": 534
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.385666421992085,
      "learning_rate": 1.9234839145261152e-06,
      "loss": 0.2854,
      "step": 535
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.9066780150364364,
      "learning_rate": 1.923131427579193e-06,
      "loss": 0.3441,
      "step": 536
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.573671704791437,
      "learning_rate": 1.9227781630444444e-06,
      "loss": 0.3418,
      "step": 537
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.395871838099826,
      "learning_rate": 1.9224241212194363e-06,
      "loss": 0.2987,
      "step": 538
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3230103292952684,
      "learning_rate": 1.9220693024023915e-06,
      "loss": 0.3357,
      "step": 539
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.486607024771495,
      "learning_rate": 1.921713706892187e-06,
      "loss": 0.352,
      "step": 540
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.395039371562957,
      "learning_rate": 1.9213573349883544e-06,
      "loss": 0.3222,
      "step": 541
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3744101515616243,
      "learning_rate": 1.9210001869910785e-06,
      "loss": 0.297,
      "step": 542
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.8587800610376006,
      "learning_rate": 1.9206422632011987e-06,
      "loss": 0.3287,
      "step": 543
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.3743691210117297,
      "learning_rate": 1.920283563920207e-06,
      "loss": 0.3331,
      "step": 544
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5527534687842115,
      "learning_rate": 1.9199240894502497e-06,
      "loss": 0.3361,
      "step": 545
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6134344899024025,
      "learning_rate": 1.919563840094125e-06,
      "loss": 0.3363,
      "step": 546
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.321694668053294,
      "learning_rate": 1.9192028161552843e-06,
      "loss": 0.3191,
      "step": 547
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5342449599959207,
      "learning_rate": 1.918841017937832e-06,
      "loss": 0.3174,
      "step": 548
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.413833093027609,
      "learning_rate": 1.9184784457465236e-06,
      "loss": 0.3133,
      "step": 549
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6117314393101876,
      "learning_rate": 1.918115099886767e-06,
      "loss": 0.2876,
      "step": 550
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5391976887583554,
      "learning_rate": 1.9177509806646224e-06,
      "loss": 0.334,
      "step": 551
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4016170632334592,
      "learning_rate": 1.9173860883868005e-06,
      "loss": 0.334,
      "step": 552
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.6440124440068837,
      "learning_rate": 1.9170204233606638e-06,
      "loss": 0.3077,
      "step": 553
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.482547368266431,
      "learning_rate": 1.9166539858942254e-06,
      "loss": 0.3164,
      "step": 554
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4041117231577642,
      "learning_rate": 1.9162867762961495e-06,
      "loss": 0.328,
      "step": 555
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.564868045958623,
      "learning_rate": 1.91591879487575e-06,
      "loss": 0.3859,
      "step": 556
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4954454983835226,
      "learning_rate": 1.9155500419429915e-06,
      "loss": 0.3196,
      "step": 557
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2622655946584604,
      "learning_rate": 1.9151805178084877e-06,
      "loss": 0.2938,
      "step": 558
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5314647358932616,
      "learning_rate": 1.9148102227835032e-06,
      "loss": 0.3244,
      "step": 559
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.378307639689325,
      "learning_rate": 1.9144391571799508e-06,
      "loss": 0.3212,
      "step": 560
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.486662946691639,
      "learning_rate": 1.914067321310393e-06,
      "loss": 0.3252,
      "step": 561
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5296589870880304,
      "learning_rate": 1.9136947154880413e-06,
      "loss": 0.356,
      "step": 562
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.308279577860115,
      "learning_rate": 1.9133213400267547e-06,
      "loss": 0.2777,
      "step": 563
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5577799075870113,
      "learning_rate": 1.9129471952410416e-06,
      "loss": 0.316,
      "step": 564
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4022008859887922,
      "learning_rate": 1.912572281446058e-06,
      "loss": 0.293,
      "step": 565
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.7900327421429663,
      "learning_rate": 1.9121965989576074e-06,
      "loss": 0.3221,
      "step": 566
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.302655008983288,
      "learning_rate": 1.9118201480921414e-06,
      "loss": 0.2883,
      "step": 567
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.7137787292734403,
      "learning_rate": 1.911442929166758e-06,
      "loss": 0.3416,
      "step": 568
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4111683606362626,
      "learning_rate": 1.911064942499204e-06,
      "loss": 0.2932,
      "step": 569
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5828661156738106,
      "learning_rate": 1.91068618840787e-06,
      "loss": 0.341,
      "step": 570
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.589921775852251,
      "learning_rate": 1.9103066672117954e-06,
      "loss": 0.3259,
      "step": 571
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6417147099420015,
      "learning_rate": 1.909926379230665e-06,
      "loss": 0.376,
      "step": 572
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.388934604814071,
      "learning_rate": 1.9095453247848097e-06,
      "loss": 0.3073,
      "step": 573
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6524201982252116,
      "learning_rate": 1.909163504195205e-06,
      "loss": 0.3319,
      "step": 574
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4339126604553596,
      "learning_rate": 1.9087809177834733e-06,
      "loss": 0.3259,
      "step": 575
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5826745669652147,
      "learning_rate": 1.9083975658718804e-06,
      "loss": 0.362,
      "step": 576
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5029175314188823,
      "learning_rate": 1.908013448783339e-06,
      "loss": 0.3201,
      "step": 577
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3953466865243507,
      "learning_rate": 1.9076285668414042e-06,
      "loss": 0.3018,
      "step": 578
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.432260404365039,
      "learning_rate": 1.907242920370277e-06,
      "loss": 0.3351,
      "step": 579
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.421859217898423,
      "learning_rate": 1.9068565096948014e-06,
      "loss": 0.3264,
      "step": 580
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6404787924858732,
      "learning_rate": 1.9064693351404655e-06,
      "loss": 0.3268,
      "step": 581
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.8951196157160384,
      "learning_rate": 1.9060813970334006e-06,
      "loss": 0.3217,
      "step": 582
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5423693712583666,
      "learning_rate": 1.9056926957003818e-06,
      "loss": 0.325,
      "step": 583
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6348173425693346,
      "learning_rate": 1.9053032314688261e-06,
      "loss": 0.3266,
      "step": 584
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.335526260808841,
      "learning_rate": 1.904913004666794e-06,
      "loss": 0.3154,
      "step": 585
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.379364811568399,
      "learning_rate": 1.904522015622988e-06,
      "loss": 0.3142,
      "step": 586
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.378276249403308,
      "learning_rate": 1.9041302646667526e-06,
      "loss": 0.3054,
      "step": 587
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6371331096990946,
      "learning_rate": 1.903737752128074e-06,
      "loss": 0.3344,
      "step": 588
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.567548383563427,
      "learning_rate": 1.9033444783375804e-06,
      "loss": 0.313,
      "step": 589
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.666816454240892,
      "learning_rate": 1.9029504436265405e-06,
      "loss": 0.3282,
      "step": 590
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6100809481220226,
      "learning_rate": 1.9025556483268646e-06,
      "loss": 0.3456,
      "step": 591
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.465309639776073,
      "learning_rate": 1.9021600927711035e-06,
      "loss": 0.3375,
      "step": 592
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3866245619525572,
      "learning_rate": 1.901763777292448e-06,
      "loss": 0.3103,
      "step": 593
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.506109281722593,
      "learning_rate": 1.9013667022247295e-06,
      "loss": 0.3546,
      "step": 594
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4005233786171747,
      "learning_rate": 1.9009688679024189e-06,
      "loss": 0.3456,
      "step": 595
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.252931620591618,
      "learning_rate": 1.900570274660627e-06,
      "loss": 0.3204,
      "step": 596
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5630832843404354,
      "learning_rate": 1.900170922835104e-06,
      "loss": 0.3014,
      "step": 597
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.776046404382503,
      "learning_rate": 1.899770812762238e-06,
      "loss": 0.3166,
      "step": 598
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4769903180988444,
      "learning_rate": 1.8993699447790573e-06,
      "loss": 0.3285,
      "step": 599
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6638868801905664,
      "learning_rate": 1.8989683192232274e-06,
      "loss": 0.3336,
      "step": 600
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.553206602922454,
      "learning_rate": 1.898565936433052e-06,
      "loss": 0.3442,
      "step": 601
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5268981785339535,
      "learning_rate": 1.8981627967474738e-06,
      "loss": 0.3122,
      "step": 602
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5220239590274005,
      "learning_rate": 1.8977589005060722e-06,
      "loss": 0.3039,
      "step": 603
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.31394471539144,
      "learning_rate": 1.8973542480490634e-06,
      "loss": 0.3123,
      "step": 604
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.391150021660033,
      "learning_rate": 1.8969488397173018e-06,
      "loss": 0.3363,
      "step": 605
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.472462582484426,
      "learning_rate": 1.8965426758522779e-06,
      "loss": 0.306,
      "step": 606
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.587624273773328,
      "learning_rate": 1.8961357567961178e-06,
      "loss": 0.3533,
      "step": 607
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.347674821776924,
      "learning_rate": 1.8957280828915853e-06,
      "loss": 0.3416,
      "step": 608
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.36594618155158,
      "learning_rate": 1.8953196544820789e-06,
      "loss": 0.3117,
      "step": 609
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3850526329845225,
      "learning_rate": 1.894910471911633e-06,
      "loss": 0.3458,
      "step": 610
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.435275666201602,
      "learning_rate": 1.8945005355249175e-06,
      "loss": 0.3243,
      "step": 611
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4942240390673143,
      "learning_rate": 1.8940898456672368e-06,
      "loss": 0.3712,
      "step": 612
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6755213116047805,
      "learning_rate": 1.89367840268453e-06,
      "loss": 0.3011,
      "step": 613
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5560943374584237,
      "learning_rate": 1.8932662069233713e-06,
      "loss": 0.3243,
      "step": 614
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3265956737861715,
      "learning_rate": 1.892853258730968e-06,
      "loss": 0.3042,
      "step": 615
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.387187177414009,
      "learning_rate": 1.892439558455162e-06,
      "loss": 0.3214,
      "step": 616
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.592245862879042,
      "learning_rate": 1.892025106444428e-06,
      "loss": 0.3066,
      "step": 617
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.334983691084106,
      "learning_rate": 1.8916099030478746e-06,
      "loss": 0.322,
      "step": 618
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4200119260792414,
      "learning_rate": 1.891193948615243e-06,
      "loss": 0.3414,
      "step": 619
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.341872649046259,
      "learning_rate": 1.890777243496907e-06,
      "loss": 0.3028,
      "step": 620
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.699959166733786,
      "learning_rate": 1.8903597880438727e-06,
      "loss": 0.3372,
      "step": 621
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6037992641562195,
      "learning_rate": 1.8899415826077783e-06,
      "loss": 0.3287,
      "step": 622
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5600828901372816,
      "learning_rate": 1.8895226275408937e-06,
      "loss": 0.3081,
      "step": 623
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.47503393467837,
      "learning_rate": 1.8891029231961207e-06,
      "loss": 0.3134,
      "step": 624
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.475124180370412,
      "learning_rate": 1.8886824699269912e-06,
      "loss": 0.3327,
      "step": 625
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4558851933431,
      "learning_rate": 1.8882612680876689e-06,
      "loss": 0.3099,
      "step": 626
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4377750490408023,
      "learning_rate": 1.887839318032948e-06,
      "loss": 0.3065,
      "step": 627
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4888520087900017,
      "learning_rate": 1.8874166201182522e-06,
      "loss": 0.2961,
      "step": 628
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.429511339842882,
      "learning_rate": 1.8869931746996358e-06,
      "loss": 0.2766,
      "step": 629
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.517207986809363,
      "learning_rate": 1.8865689821337825e-06,
      "loss": 0.3325,
      "step": 630
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5414615608781967,
      "learning_rate": 1.8861440427780058e-06,
      "loss": 0.3545,
      "step": 631
    },
    {
      "epoch": 0.18,
      "grad_norm": 6.389922704628554,
      "learning_rate": 1.8857183569902473e-06,
      "loss": 0.3134,
      "step": 632
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.469158788196905,
      "learning_rate": 1.8852919251290783e-06,
      "loss": 0.3327,
      "step": 633
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4225156287436476,
      "learning_rate": 1.884864747553698e-06,
      "loss": 0.3232,
      "step": 634
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.541582435994063,
      "learning_rate": 1.884436824623934e-06,
      "loss": 0.3067,
      "step": 635
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.324430056321435,
      "learning_rate": 1.8840081567002417e-06,
      "loss": 0.3344,
      "step": 636
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4445507644563325,
      "learning_rate": 1.883578744143704e-06,
      "loss": 0.322,
      "step": 637
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.398784855142958,
      "learning_rate": 1.8831485873160312e-06,
      "loss": 0.2996,
      "step": 638
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.490567840851795,
      "learning_rate": 1.8827176865795596e-06,
      "loss": 0.3261,
      "step": 639
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.736732509195614,
      "learning_rate": 1.8822860422972534e-06,
      "loss": 0.3633,
      "step": 640
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.298233280198889,
      "learning_rate": 1.8818536548327026e-06,
      "loss": 0.3252,
      "step": 641
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.381279510885023,
      "learning_rate": 1.8814205245501234e-06,
      "loss": 0.3223,
      "step": 642
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6425786131254654,
      "learning_rate": 1.880986651814357e-06,
      "loss": 0.2965,
      "step": 643
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.608150395709567,
      "learning_rate": 1.8805520369908705e-06,
      "loss": 0.3117,
      "step": 644
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.528769452573672,
      "learning_rate": 1.8801166804457568e-06,
      "loss": 0.3365,
      "step": 645
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.593082547173156,
      "learning_rate": 1.879680582545732e-06,
      "loss": 0.3417,
      "step": 646
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.235518751649165,
      "learning_rate": 1.879243743658138e-06,
      "loss": 0.2968,
      "step": 647
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.512840027266576,
      "learning_rate": 1.8788061641509398e-06,
      "loss": 0.331,
      "step": 648
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.623875669173926,
      "learning_rate": 1.878367844392728e-06,
      "loss": 0.3278,
      "step": 649
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.613013341049209,
      "learning_rate": 1.8779287847527146e-06,
      "loss": 0.3081,
      "step": 650
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3967165560552526,
      "learning_rate": 1.877488985600736e-06,
      "loss": 0.2915,
      "step": 651
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.461584837861662,
      "learning_rate": 1.8770484473072517e-06,
      "loss": 0.3174,
      "step": 652
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5909734569213563,
      "learning_rate": 1.8766071702433427e-06,
      "loss": 0.3462,
      "step": 653
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3003987637657093,
      "learning_rate": 1.8761651547807142e-06,
      "loss": 0.2864,
      "step": 654
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3914632891284517,
      "learning_rate": 1.875722401291691e-06,
      "loss": 0.3229,
      "step": 655
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.745394637477216,
      "learning_rate": 1.8752789101492214e-06,
      "loss": 0.3379,
      "step": 656
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.637442893764372,
      "learning_rate": 1.8748346817268745e-06,
      "loss": 0.2811,
      "step": 657
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7011526479877643,
      "learning_rate": 1.87438971639884e-06,
      "loss": 0.3486,
      "step": 658
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.37855781182186,
      "learning_rate": 1.8739440145399293e-06,
      "loss": 0.3502,
      "step": 659
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4520047039317117,
      "learning_rate": 1.873497576525573e-06,
      "loss": 0.3201,
      "step": 660
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.634743516274746,
      "learning_rate": 1.873050402731822e-06,
      "loss": 0.32,
      "step": 661
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5434914673155924,
      "learning_rate": 1.8726024935353487e-06,
      "loss": 0.3073,
      "step": 662
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.405470557161396,
      "learning_rate": 1.8721538493134425e-06,
      "loss": 0.3465,
      "step": 663
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5890725900604377,
      "learning_rate": 1.8717044704440137e-06,
      "loss": 0.3216,
      "step": 664
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6171547730436733,
      "learning_rate": 1.8712543573055903e-06,
      "loss": 0.3311,
      "step": 665
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6274679971113475,
      "learning_rate": 1.8708035102773196e-06,
      "loss": 0.3092,
      "step": 666
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.0832936101998927,
      "learning_rate": 1.8703519297389667e-06,
      "loss": 0.3331,
      "step": 667
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.370456160821706,
      "learning_rate": 1.8698996160709146e-06,
      "loss": 0.2874,
      "step": 668
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4773772224277013,
      "learning_rate": 1.8694465696541639e-06,
      "loss": 0.32,
      "step": 669
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4624072608294227,
      "learning_rate": 1.8689927908703322e-06,
      "loss": 0.3001,
      "step": 670
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4253560050409084,
      "learning_rate": 1.8685382801016547e-06,
      "loss": 0.3164,
      "step": 671
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5349779292713572,
      "learning_rate": 1.868083037730982e-06,
      "loss": 0.3408,
      "step": 672
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.98022002253498,
      "learning_rate": 1.8676270641417821e-06,
      "loss": 0.3569,
      "step": 673
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4999640201078237,
      "learning_rate": 1.8671703597181383e-06,
      "loss": 0.326,
      "step": 674
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.602274939593711,
      "learning_rate": 1.8667129248447497e-06,
      "loss": 0.3487,
      "step": 675
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.650254061550966,
      "learning_rate": 1.8662547599069308e-06,
      "loss": 0.3179,
      "step": 676
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6539404492903316,
      "learning_rate": 1.8657958652906106e-06,
      "loss": 0.3066,
      "step": 677
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4099554187480887,
      "learning_rate": 1.8653362413823331e-06,
      "loss": 0.3084,
      "step": 678
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.611577650482648,
      "learning_rate": 1.8648758885692569e-06,
      "loss": 0.3539,
      "step": 679
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2722703116941463,
      "learning_rate": 1.8644148072391537e-06,
      "loss": 0.3013,
      "step": 680
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.641577905268663,
      "learning_rate": 1.86395299778041e-06,
      "loss": 0.3231,
      "step": 681
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.2759503960375165,
      "learning_rate": 1.8634904605820244e-06,
      "loss": 0.3255,
      "step": 682
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4768927319494694,
      "learning_rate": 1.8630271960336096e-06,
      "loss": 0.3297,
      "step": 683
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5489379391148748,
      "learning_rate": 1.8625632045253905e-06,
      "loss": 0.3336,
      "step": 684
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.67838618655016,
      "learning_rate": 1.8620984864482042e-06,
      "loss": 0.3079,
      "step": 685
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.586620003188253,
      "learning_rate": 1.8616330421935001e-06,
      "loss": 0.3386,
      "step": 686
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.339499160262079,
      "learning_rate": 1.861166872153339e-06,
      "loss": 0.3196,
      "step": 687
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3721624029710053,
      "learning_rate": 1.860699976720393e-06,
      "loss": 0.3084,
      "step": 688
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4086625407569775,
      "learning_rate": 1.8602323562879461e-06,
      "loss": 0.3253,
      "step": 689
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.599196154030309,
      "learning_rate": 1.8597640112498914e-06,
      "loss": 0.3298,
      "step": 690
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.439632928413326,
      "learning_rate": 1.859294942000734e-06,
      "loss": 0.3522,
      "step": 691
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2742388509467504,
      "learning_rate": 1.8588251489355882e-06,
      "loss": 0.3085,
      "step": 692
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6673271769516163,
      "learning_rate": 1.8583546324501781e-06,
      "loss": 0.2883,
      "step": 693
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.517797219208855,
      "learning_rate": 1.857883392940837e-06,
      "loss": 0.3105,
      "step": 694
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.386077222115691,
      "learning_rate": 1.8574114308045074e-06,
      "loss": 0.3316,
      "step": 695
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.628302002056542,
      "learning_rate": 1.856938746438741e-06,
      "loss": 0.328,
      "step": 696
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.368441850241122,
      "learning_rate": 1.8564653402416968e-06,
      "loss": 0.2803,
      "step": 697
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4692437770109708,
      "learning_rate": 1.8559912126121424e-06,
      "loss": 0.3359,
      "step": 698
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5386494618133373,
      "learning_rate": 1.8555163639494534e-06,
      "loss": 0.3291,
      "step": 699
    },
    {
      "epoch": 0.2,
      "grad_norm": 9.033454716840346,
      "learning_rate": 1.8550407946536123e-06,
      "loss": 0.3158,
      "step": 700
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5159179694463285,
      "learning_rate": 1.854564505125209e-06,
      "loss": 0.3187,
      "step": 701
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5230000058408466,
      "learning_rate": 1.8540874957654396e-06,
      "loss": 0.3,
      "step": 702
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7927177428869077,
      "learning_rate": 1.8536097669761064e-06,
      "loss": 0.3026,
      "step": 703
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4839005856698795,
      "learning_rate": 1.8531313191596186e-06,
      "loss": 0.3017,
      "step": 704
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.800891099137517,
      "learning_rate": 1.8526521527189903e-06,
      "loss": 0.3359,
      "step": 705
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2846064092871186,
      "learning_rate": 1.8521722680578411e-06,
      "loss": 0.3122,
      "step": 706
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.233088715040393,
      "learning_rate": 1.851691665580396e-06,
      "loss": 0.2861,
      "step": 707
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.538036700824122,
      "learning_rate": 1.851210345691484e-06,
      "loss": 0.3084,
      "step": 708
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.45913218863696,
      "learning_rate": 1.8507283087965387e-06,
      "loss": 0.3205,
      "step": 709
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5787057825261046,
      "learning_rate": 1.8502455553015976e-06,
      "loss": 0.3406,
      "step": 710
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.8389237764621162,
      "learning_rate": 1.8497620856133019e-06,
      "loss": 0.352,
      "step": 711
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4819341553311967,
      "learning_rate": 1.8492779001388964e-06,
      "loss": 0.3211,
      "step": 712
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.489319817396486,
      "learning_rate": 1.848792999286228e-06,
      "loss": 0.3089,
      "step": 713
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5388004718059367,
      "learning_rate": 1.8483073834637467e-06,
      "loss": 0.3115,
      "step": 714
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7326112989715554,
      "learning_rate": 1.847821053080505e-06,
      "loss": 0.3376,
      "step": 715
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5047349342201994,
      "learning_rate": 1.8473340085461567e-06,
      "loss": 0.3138,
      "step": 716
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.804498503318447,
      "learning_rate": 1.8468462502709577e-06,
      "loss": 0.35,
      "step": 717
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.439841721380218,
      "learning_rate": 1.8463577786657649e-06,
      "loss": 0.3395,
      "step": 718
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3401956581086893,
      "learning_rate": 1.8458685941420358e-06,
      "loss": 0.2785,
      "step": 719
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.394895809355412,
      "learning_rate": 1.8453786971118287e-06,
      "loss": 0.3223,
      "step": 720
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4660553191758,
      "learning_rate": 1.8448880879878024e-06,
      "loss": 0.3619,
      "step": 721
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.301475320635097,
      "learning_rate": 1.8443967671832148e-06,
      "loss": 0.2969,
      "step": 722
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.0721556769159895,
      "learning_rate": 1.843904735111924e-06,
      "loss": 0.3455,
      "step": 723
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1798953175310274,
      "learning_rate": 1.8434119921883861e-06,
      "loss": 0.2835,
      "step": 724
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3614911044919316,
      "learning_rate": 1.8429185388276576e-06,
      "loss": 0.3089,
      "step": 725
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6367517629764587,
      "learning_rate": 1.8424243754453919e-06,
      "loss": 0.3234,
      "step": 726
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4323439863930796,
      "learning_rate": 1.8419295024578416e-06,
      "loss": 0.3071,
      "step": 727
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.636030318990423,
      "learning_rate": 1.8414339202818562e-06,
      "loss": 0.3645,
      "step": 728
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5239856793906656,
      "learning_rate": 1.8409376293348834e-06,
      "loss": 0.299,
      "step": 729
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3291366949117034,
      "learning_rate": 1.840440630034967e-06,
      "loss": 0.3238,
      "step": 730
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3344355021750274,
      "learning_rate": 1.8399429228007483e-06,
      "loss": 0.2983,
      "step": 731
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5558353060456525,
      "learning_rate": 1.8394445080514642e-06,
      "loss": 0.2869,
      "step": 732
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4247971738192935,
      "learning_rate": 1.838945386206948e-06,
      "loss": 0.292,
      "step": 733
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5825090055477484,
      "learning_rate": 1.8384455576876288e-06,
      "loss": 0.3063,
      "step": 734
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.296180432216192,
      "learning_rate": 1.8379450229145305e-06,
      "loss": 0.3052,
      "step": 735
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.358017059445579,
      "learning_rate": 1.8374437823092722e-06,
      "loss": 0.259,
      "step": 736
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3676231371995446,
      "learning_rate": 1.8369418362940673e-06,
      "loss": 0.3186,
      "step": 737
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.220845042724821,
      "learning_rate": 1.8364391852917235e-06,
      "loss": 0.3032,
      "step": 738
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.726016115467376,
      "learning_rate": 1.8359358297256427e-06,
      "loss": 0.3386,
      "step": 739
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5838509942685897,
      "learning_rate": 1.8354317700198196e-06,
      "loss": 0.327,
      "step": 740
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6091104337763684,
      "learning_rate": 1.8349270065988427e-06,
      "loss": 0.3253,
      "step": 741
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.714238298218788,
      "learning_rate": 1.8344215398878924e-06,
      "loss": 0.2946,
      "step": 742
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.380436878366774,
      "learning_rate": 1.8339153703127428e-06,
      "loss": 0.3225,
      "step": 743
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2474161169587887,
      "learning_rate": 1.8334084982997586e-06,
      "loss": 0.2879,
      "step": 744
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4824008936563606,
      "learning_rate": 1.8329009242758975e-06,
      "loss": 0.3321,
      "step": 745
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.387839320021625,
      "learning_rate": 1.8323926486687073e-06,
      "loss": 0.3115,
      "step": 746
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4975073563611065,
      "learning_rate": 1.8318836719063277e-06,
      "loss": 0.2931,
      "step": 747
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6069296054765014,
      "learning_rate": 1.831373994417489e-06,
      "loss": 0.3373,
      "step": 748
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6304583634758507,
      "learning_rate": 1.830863616631511e-06,
      "loss": 0.3244,
      "step": 749
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4945219891074273,
      "learning_rate": 1.830352538978304e-06,
      "loss": 0.3045,
      "step": 750
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5943190229679503,
      "learning_rate": 1.8298407618883677e-06,
      "loss": 0.3132,
      "step": 751
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.451462512381113,
      "learning_rate": 1.8293282857927909e-06,
      "loss": 0.3306,
      "step": 752
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.802381941279302,
      "learning_rate": 1.828815111123251e-06,
      "loss": 0.3325,
      "step": 753
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2958989566865715,
      "learning_rate": 1.8283012383120145e-06,
      "loss": 0.2997,
      "step": 754
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4742741886127257,
      "learning_rate": 1.827786667791935e-06,
      "loss": 0.3466,
      "step": 755
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3330203880683333,
      "learning_rate": 1.8272713999964546e-06,
      "loss": 0.2964,
      "step": 756
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.372071762014812,
      "learning_rate": 1.8267554353596024e-06,
      "loss": 0.3035,
      "step": 757
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.54065702879972,
      "learning_rate": 1.8262387743159948e-06,
      "loss": 0.3268,
      "step": 758
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3382390623791394,
      "learning_rate": 1.8257214173008344e-06,
      "loss": 0.3051,
      "step": 759
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4031055066754603,
      "learning_rate": 1.8252033647499099e-06,
      "loss": 0.3317,
      "step": 760
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4032306983074845,
      "learning_rate": 1.8246846170995961e-06,
      "loss": 0.3015,
      "step": 761
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.1664023828151944,
      "learning_rate": 1.8241651747868541e-06,
      "loss": 0.3408,
      "step": 762
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4245747041798946,
      "learning_rate": 1.823645038249229e-06,
      "loss": 0.3301,
      "step": 763
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.307622301322068,
      "learning_rate": 1.823124207924851e-06,
      "loss": 0.3273,
      "step": 764
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.300710489645234,
      "learning_rate": 1.822602684252435e-06,
      "loss": 0.2982,
      "step": 765
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.548236181983721,
      "learning_rate": 1.8220804676712794e-06,
      "loss": 0.3127,
      "step": 766
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.405523364038282,
      "learning_rate": 1.8215575586212667e-06,
      "loss": 0.3216,
      "step": 767
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.290131171904868,
      "learning_rate": 1.821033957542863e-06,
      "loss": 0.2858,
      "step": 768
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.408680649010481,
      "learning_rate": 1.8205096648771163e-06,
      "loss": 0.3249,
      "step": 769
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.194918371382472,
      "learning_rate": 1.8199846810656583e-06,
      "loss": 0.2824,
      "step": 770
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.2665129154156993,
      "learning_rate": 1.819459006550702e-06,
      "loss": 0.3252,
      "step": 771
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4877798522357626,
      "learning_rate": 1.8189326417750426e-06,
      "loss": 0.2929,
      "step": 772
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4438405001800634,
      "learning_rate": 1.8184055871820565e-06,
      "loss": 0.3092,
      "step": 773
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.471072912113562,
      "learning_rate": 1.8178778432157014e-06,
      "loss": 0.3262,
      "step": 774
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.520311374184934,
      "learning_rate": 1.8173494103205158e-06,
      "loss": 0.2878,
      "step": 775
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4873941242668485,
      "learning_rate": 1.8168202889416182e-06,
      "loss": 0.2937,
      "step": 776
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4169244067397506,
      "learning_rate": 1.8162904795247074e-06,
      "loss": 0.3231,
      "step": 777
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4212965608339387,
      "learning_rate": 1.8157599825160607e-06,
      "loss": 0.3153,
      "step": 778
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.58524729253949,
      "learning_rate": 1.8152287983625365e-06,
      "loss": 0.3141,
      "step": 779
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.648579898819257,
      "learning_rate": 1.8146969275115701e-06,
      "loss": 0.3006,
      "step": 780
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4588512176786788,
      "learning_rate": 1.8141643704111767e-06,
      "loss": 0.3369,
      "step": 781
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.388867011332001,
      "learning_rate": 1.8136311275099484e-06,
      "loss": 0.3207,
      "step": 782
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3763493849662827,
      "learning_rate": 1.8130971992570552e-06,
      "loss": 0.3433,
      "step": 783
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3814908591670236,
      "learning_rate": 1.8125625861022454e-06,
      "loss": 0.3218,
      "step": 784
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2169228847674773,
      "learning_rate": 1.812027288495843e-06,
      "loss": 0.2853,
      "step": 785
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5600068690152775,
      "learning_rate": 1.8114913068887493e-06,
      "loss": 0.3017,
      "step": 786
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3564897528452327,
      "learning_rate": 1.810954641732441e-06,
      "loss": 0.3025,
      "step": 787
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.7185301584909722,
      "learning_rate": 1.8104172934789715e-06,
      "loss": 0.3002,
      "step": 788
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.394601217070245,
      "learning_rate": 1.8098792625809689e-06,
      "loss": 0.3127,
      "step": 789
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.919708059528672,
      "learning_rate": 1.8093405494916372e-06,
      "loss": 0.3342,
      "step": 790
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5018439752505226,
      "learning_rate": 1.8088011546647533e-06,
      "loss": 0.3036,
      "step": 791
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.0270280866919808,
      "learning_rate": 1.8082610785546706e-06,
      "loss": 0.3237,
      "step": 792
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5871694707198345,
      "learning_rate": 1.8077203216163143e-06,
      "loss": 0.3229,
      "step": 793
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.229119769181344,
      "learning_rate": 1.8071788843051848e-06,
      "loss": 0.2927,
      "step": 794
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2796518406154402,
      "learning_rate": 1.806636767077354e-06,
      "loss": 0.3144,
      "step": 795
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4914832529542728,
      "learning_rate": 1.8060939703894682e-06,
      "loss": 0.3326,
      "step": 796
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3781369431977692,
      "learning_rate": 1.8055504946987447e-06,
      "loss": 0.3312,
      "step": 797
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.6910642279410624,
      "learning_rate": 1.8050063404629732e-06,
      "loss": 0.3322,
      "step": 798
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5232510110504314,
      "learning_rate": 1.8044615081405151e-06,
      "loss": 0.2967,
      "step": 799
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.377745599667623,
      "learning_rate": 1.8039159981903027e-06,
      "loss": 0.289,
      "step": 800
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.354831110869895,
      "learning_rate": 1.8033698110718394e-06,
      "loss": 0.3094,
      "step": 801
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5266561213394017,
      "learning_rate": 1.802822947245199e-06,
      "loss": 0.3095,
      "step": 802
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5283457627776005,
      "learning_rate": 1.8022754071710252e-06,
      "loss": 0.3077,
      "step": 803
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.507549698347303,
      "learning_rate": 1.8017271913105306e-06,
      "loss": 0.3138,
      "step": 804
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.625393318911374,
      "learning_rate": 1.8011783001254988e-06,
      "loss": 0.3556,
      "step": 805
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5556149198490132,
      "learning_rate": 1.8006287340782805e-06,
      "loss": 0.2912,
      "step": 806
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.698644031831514,
      "learning_rate": 1.8000784936317957e-06,
      "loss": 0.2907,
      "step": 807
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4656465189485064,
      "learning_rate": 1.7995275792495324e-06,
      "loss": 0.2793,
      "step": 808
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.411006777212944,
      "learning_rate": 1.7989759913955463e-06,
      "loss": 0.3178,
      "step": 809
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4414409226078426,
      "learning_rate": 1.7984237305344601e-06,
      "loss": 0.3266,
      "step": 810
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4708127483607822,
      "learning_rate": 1.7978707971314636e-06,
      "loss": 0.3158,
      "step": 811
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.843055534073164,
      "learning_rate": 1.7973171916523131e-06,
      "loss": 0.2984,
      "step": 812
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4782911871163895,
      "learning_rate": 1.796762914563331e-06,
      "loss": 0.3261,
      "step": 813
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.559804468307838,
      "learning_rate": 1.7962079663314058e-06,
      "loss": 0.3027,
      "step": 814
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3474605814667004,
      "learning_rate": 1.7956523474239907e-06,
      "loss": 0.2762,
      "step": 815
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3212862035195525,
      "learning_rate": 1.7950960583091042e-06,
      "loss": 0.3334,
      "step": 816
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5219516348949926,
      "learning_rate": 1.794539099455329e-06,
      "loss": 0.3364,
      "step": 817
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2878948873749803,
      "learning_rate": 1.7939814713318122e-06,
      "loss": 0.3063,
      "step": 818
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.7420790975532734,
      "learning_rate": 1.7934231744082649e-06,
      "loss": 0.3206,
      "step": 819
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3798967487705602,
      "learning_rate": 1.7928642091549612e-06,
      "loss": 0.2839,
      "step": 820
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.593899829352958,
      "learning_rate": 1.7923045760427384e-06,
      "loss": 0.3206,
      "step": 821
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4143042994636197,
      "learning_rate": 1.791744275542996e-06,
      "loss": 0.3312,
      "step": 822
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.7810821685273153,
      "learning_rate": 1.7911833081276958e-06,
      "loss": 0.335,
      "step": 823
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4949557139361316,
      "learning_rate": 1.7906216742693619e-06,
      "loss": 0.3108,
      "step": 824
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.392727307987833,
      "learning_rate": 1.7900593744410789e-06,
      "loss": 0.2839,
      "step": 825
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.454105906481232,
      "learning_rate": 1.7894964091164928e-06,
      "loss": 0.311,
      "step": 826
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3359712297438464,
      "learning_rate": 1.7889327787698103e-06,
      "loss": 0.3052,
      "step": 827
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3578175704575868,
      "learning_rate": 1.7883684838757982e-06,
      "loss": 0.2843,
      "step": 828
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3546703645487783,
      "learning_rate": 1.787803524909783e-06,
      "loss": 0.3116,
      "step": 829
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3540766735466523,
      "learning_rate": 1.7872379023476506e-06,
      "loss": 0.3029,
      "step": 830
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4561967275434085,
      "learning_rate": 1.7866716166658455e-06,
      "loss": 0.3164,
      "step": 831
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3804728957034103,
      "learning_rate": 1.7861046683413714e-06,
      "loss": 0.3422,
      "step": 832
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5253167982397517,
      "learning_rate": 1.78553705785179e-06,
      "loss": 0.3434,
      "step": 833
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4647339440108627,
      "learning_rate": 1.7849687856752206e-06,
      "loss": 0.3414,
      "step": 834
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3099561921610197,
      "learning_rate": 1.7843998522903401e-06,
      "loss": 0.287,
      "step": 835
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.169187560233085,
      "learning_rate": 1.7838302581763815e-06,
      "loss": 0.3013,
      "step": 836
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4697580845690608,
      "learning_rate": 1.7832600038131358e-06,
      "loss": 0.3017,
      "step": 837
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.270074862967853,
      "learning_rate": 1.782689089680949e-06,
      "loss": 0.312,
      "step": 838
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.834517930537921,
      "learning_rate": 1.7821175162607234e-06,
      "loss": 0.3683,
      "step": 839
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3820423992048503,
      "learning_rate": 1.7815452840339162e-06,
      "loss": 0.3103,
      "step": 840
    },
    {
      "epoch": 0.24,
      "grad_norm": 4.134181679241243,
      "learning_rate": 1.7809723934825402e-06,
      "loss": 0.3091,
      "step": 841
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.512480635786526,
      "learning_rate": 1.7803988450891626e-06,
      "loss": 0.3315,
      "step": 842
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3133473585428055,
      "learning_rate": 1.7798246393369033e-06,
      "loss": 0.3219,
      "step": 843
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.337263848790207,
      "learning_rate": 1.7792497767094381e-06,
      "loss": 0.3013,
      "step": 844
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5037976885693185,
      "learning_rate": 1.7786742576909952e-06,
      "loss": 0.3077,
      "step": 845
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4742609810228995,
      "learning_rate": 1.778098082766355e-06,
      "loss": 0.3193,
      "step": 846
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4435964981750127,
      "learning_rate": 1.7775212524208512e-06,
      "loss": 0.3154,
      "step": 847
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.350815158082994,
      "learning_rate": 1.7769437671403696e-06,
      "loss": 0.3046,
      "step": 848
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.443996984457143,
      "learning_rate": 1.7763656274113473e-06,
      "loss": 0.3378,
      "step": 849
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4900482903481054,
      "learning_rate": 1.775786833720773e-06,
      "loss": 0.3177,
      "step": 850
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3204585475536352,
      "learning_rate": 1.7752073865561856e-06,
      "loss": 0.3452,
      "step": 851
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3326934912673125,
      "learning_rate": 1.7746272864056752e-06,
      "loss": 0.3015,
      "step": 852
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.170865654693707,
      "learning_rate": 1.774046533757882e-06,
      "loss": 0.2929,
      "step": 853
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.353397028015931,
      "learning_rate": 1.7734651291019953e-06,
      "loss": 0.2848,
      "step": 854
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3162206156297156,
      "learning_rate": 1.7728830729277537e-06,
      "loss": 0.2981,
      "step": 855
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.440390255244275,
      "learning_rate": 1.7723003657254444e-06,
      "loss": 0.3082,
      "step": 856
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.484031656364762,
      "learning_rate": 1.7717170079859039e-06,
      "loss": 0.2997,
      "step": 857
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4625882477489385,
      "learning_rate": 1.7711330002005155e-06,
      "loss": 0.3039,
      "step": 858
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2641087407974614,
      "learning_rate": 1.770548342861211e-06,
      "loss": 0.2964,
      "step": 859
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4624242786779154,
      "learning_rate": 1.7699630364604687e-06,
      "loss": 0.3124,
      "step": 860
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4068039255630387,
      "learning_rate": 1.769377081491314e-06,
      "loss": 0.3017,
      "step": 861
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5898207729162475,
      "learning_rate": 1.7687904784473186e-06,
      "loss": 0.3304,
      "step": 862
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4943816806666663,
      "learning_rate": 1.7682032278226e-06,
      "loss": 0.301,
      "step": 863
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2735550324194502,
      "learning_rate": 1.7676153301118206e-06,
      "loss": 0.2942,
      "step": 864
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.542751669795743,
      "learning_rate": 1.7670267858101892e-06,
      "loss": 0.3213,
      "step": 865
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6233292164559674,
      "learning_rate": 1.7664375954134582e-06,
      "loss": 0.3246,
      "step": 866
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.406407165535525,
      "learning_rate": 1.7658477594179244e-06,
      "loss": 0.2849,
      "step": 867
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2742014416952006,
      "learning_rate": 1.7652572783204284e-06,
      "loss": 0.3338,
      "step": 868
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3357411639986996,
      "learning_rate": 1.7646661526183549e-06,
      "loss": 0.304,
      "step": 869
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3725262940496337,
      "learning_rate": 1.7640743828096305e-06,
      "loss": 0.2939,
      "step": 870
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2632492249675016,
      "learning_rate": 1.7634819693927252e-06,
      "loss": 0.3038,
      "step": 871
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.367959700282048,
      "learning_rate": 1.7628889128666501e-06,
      "loss": 0.313,
      "step": 872
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6123170953069517,
      "learning_rate": 1.7622952137309595e-06,
      "loss": 0.3111,
      "step": 873
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.419626125941722,
      "learning_rate": 1.7617008724857478e-06,
      "loss": 0.2916,
      "step": 874
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.38908803713079,
      "learning_rate": 1.7611058896316507e-06,
      "loss": 0.3049,
      "step": 875
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5552448508204053,
      "learning_rate": 1.7605102656698442e-06,
      "loss": 0.2781,
      "step": 876
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4428995873799524,
      "learning_rate": 1.7599140011020448e-06,
      "loss": 0.3321,
      "step": 877
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4239545273760847,
      "learning_rate": 1.7593170964305077e-06,
      "loss": 0.3077,
      "step": 878
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1915135477858714,
      "learning_rate": 1.7587195521580288e-06,
      "loss": 0.2939,
      "step": 879
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4431326002218743,
      "learning_rate": 1.7581213687879405e-06,
      "loss": 0.3382,
      "step": 880
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.586725605255059,
      "learning_rate": 1.757522546824116e-06,
      "loss": 0.3251,
      "step": 881
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5557207299582285,
      "learning_rate": 1.7569230867709645e-06,
      "loss": 0.33,
      "step": 882
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5666833510025726,
      "learning_rate": 1.7563229891334336e-06,
      "loss": 0.3349,
      "step": 883
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2615897690980273,
      "learning_rate": 1.7557222544170081e-06,
      "loss": 0.3446,
      "step": 884
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.192243862822083,
      "learning_rate": 1.7551208831277088e-06,
      "loss": 0.3127,
      "step": 885
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5533807430869837,
      "learning_rate": 1.754518875772093e-06,
      "loss": 0.2985,
      "step": 886
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6127071592959825,
      "learning_rate": 1.7539162328572542e-06,
      "loss": 0.3152,
      "step": 887
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.24436359208922,
      "learning_rate": 1.7533129548908203e-06,
      "loss": 0.2826,
      "step": 888
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.21650724397911,
      "learning_rate": 1.752709042380955e-06,
      "loss": 0.3139,
      "step": 889
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.240179325579028,
      "learning_rate": 1.7521044958363565e-06,
      "loss": 0.3201,
      "step": 890
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6497870152313414,
      "learning_rate": 1.7514993157662561e-06,
      "loss": 0.3042,
      "step": 891
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4528906426154102,
      "learning_rate": 1.75089350268042e-06,
      "loss": 0.2924,
      "step": 892
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3758599539597336,
      "learning_rate": 1.7502870570891468e-06,
      "loss": 0.3152,
      "step": 893
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3761524990008973,
      "learning_rate": 1.749679979503268e-06,
      "loss": 0.3026,
      "step": 894
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.438579519539714,
      "learning_rate": 1.749072270434148e-06,
      "loss": 0.3014,
      "step": 895
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2771364984262332,
      "learning_rate": 1.7484639303936822e-06,
      "loss": 0.2879,
      "step": 896
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.699643333300088,
      "learning_rate": 1.747854959894298e-06,
      "loss": 0.3181,
      "step": 897
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3392193675944903,
      "learning_rate": 1.7472453594489538e-06,
      "loss": 0.307,
      "step": 898
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.370792659232408,
      "learning_rate": 1.746635129571139e-06,
      "loss": 0.2849,
      "step": 899
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.4045887373466757,
      "learning_rate": 1.7460242707748728e-06,
      "loss": 0.3112,
      "step": 900
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1885494563042704,
      "learning_rate": 1.7454127835747037e-06,
      "loss": 0.3049,
      "step": 901
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.719256164872196,
      "learning_rate": 1.7448006684857106e-06,
      "loss": 0.2924,
      "step": 902
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3275588349254432,
      "learning_rate": 1.7441879260235007e-06,
      "loss": 0.2916,
      "step": 903
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6433671842254447,
      "learning_rate": 1.7435745567042094e-06,
      "loss": 0.3004,
      "step": 904
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3788139054652286,
      "learning_rate": 1.7429605610445004e-06,
      "loss": 0.2855,
      "step": 905
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3669128953610983,
      "learning_rate": 1.7423459395615652e-06,
      "loss": 0.3005,
      "step": 906
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3977847313289256,
      "learning_rate": 1.7417306927731223e-06,
      "loss": 0.3056,
      "step": 907
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.634258954737695,
      "learning_rate": 1.7411148211974167e-06,
      "loss": 0.2984,
      "step": 908
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.549418667244922,
      "learning_rate": 1.7404983253532202e-06,
      "loss": 0.3313,
      "step": 909
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.229251686531007,
      "learning_rate": 1.7398812057598298e-06,
      "loss": 0.2907,
      "step": 910
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.363913384681548,
      "learning_rate": 1.7392634629370681e-06,
      "loss": 0.3069,
      "step": 911
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.635330690255088,
      "learning_rate": 1.7386450974052832e-06,
      "loss": 0.3306,
      "step": 912
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.454322989683554,
      "learning_rate": 1.738026109685347e-06,
      "loss": 0.3071,
      "step": 913
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.559993279354754,
      "learning_rate": 1.7374065002986557e-06,
      "loss": 0.3025,
      "step": 914
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3497500860175626,
      "learning_rate": 1.7367862697671299e-06,
      "loss": 0.3064,
      "step": 915
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5312837946915137,
      "learning_rate": 1.7361654186132117e-06,
      "loss": 0.3307,
      "step": 916
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3373983973812016,
      "learning_rate": 1.735543947359868e-06,
      "loss": 0.2739,
      "step": 917
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4244408158663995,
      "learning_rate": 1.7349218565305867e-06,
      "loss": 0.2939,
      "step": 918
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4878585942575686,
      "learning_rate": 1.7342991466493784e-06,
      "loss": 0.2913,
      "step": 919
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.288423305700923,
      "learning_rate": 1.7336758182407737e-06,
      "loss": 0.3096,
      "step": 920
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.502154794481118,
      "learning_rate": 1.733051871829826e-06,
      "loss": 0.3025,
      "step": 921
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.478412947406026,
      "learning_rate": 1.7324273079421088e-06,
      "loss": 0.3002,
      "step": 922
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6007090817241703,
      "learning_rate": 1.7318021271037146e-06,
      "loss": 0.3246,
      "step": 923
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1730136205293205,
      "learning_rate": 1.7311763298412569e-06,
      "loss": 0.309,
      "step": 924
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.328540172131703,
      "learning_rate": 1.7305499166818679e-06,
      "loss": 0.3198,
      "step": 925
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3787220615358797,
      "learning_rate": 1.7299228881531982e-06,
      "loss": 0.2649,
      "step": 926
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.522766022357985,
      "learning_rate": 1.729295244783418e-06,
      "loss": 0.3404,
      "step": 927
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.235899600554452,
      "learning_rate": 1.7286669871012135e-06,
      "loss": 0.3235,
      "step": 928
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0892728394383817,
      "learning_rate": 1.7280381156357904e-06,
      "loss": 0.2768,
      "step": 929
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5467713290166145,
      "learning_rate": 1.7274086309168701e-06,
      "loss": 0.2989,
      "step": 930
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2375743713758394,
      "learning_rate": 1.7267785334746907e-06,
      "loss": 0.2926,
      "step": 931
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.392561762822056,
      "learning_rate": 1.7261478238400068e-06,
      "loss": 0.2985,
      "step": 932
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4450577681450616,
      "learning_rate": 1.725516502544089e-06,
      "loss": 0.3386,
      "step": 933
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2772591477186124,
      "learning_rate": 1.7248845701187218e-06,
      "loss": 0.2864,
      "step": 934
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.616420026072073,
      "learning_rate": 1.7242520270962057e-06,
      "loss": 0.333,
      "step": 935
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5407530283716717,
      "learning_rate": 1.7236188740093554e-06,
      "loss": 0.3247,
      "step": 936
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.837835871101234,
      "learning_rate": 1.7229851113914986e-06,
      "loss": 0.3547,
      "step": 937
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.438027917952889,
      "learning_rate": 1.7223507397764778e-06,
      "loss": 0.3063,
      "step": 938
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2202556858343114,
      "learning_rate": 1.721715759698647e-06,
      "loss": 0.2684,
      "step": 939
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4880759861493327,
      "learning_rate": 1.721080171692874e-06,
      "loss": 0.3131,
      "step": 940
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4325603890985956,
      "learning_rate": 1.720443976294538e-06,
      "loss": 0.3066,
      "step": 941
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.598743028566477,
      "learning_rate": 1.7198071740395298e-06,
      "loss": 0.3196,
      "step": 942
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.447618969814215,
      "learning_rate": 1.7191697654642515e-06,
      "loss": 0.3208,
      "step": 943
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.398233275246693,
      "learning_rate": 1.7185317511056163e-06,
      "loss": 0.2895,
      "step": 944
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.455998669650308,
      "learning_rate": 1.717893131501047e-06,
      "loss": 0.3088,
      "step": 945
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.55553833781246,
      "learning_rate": 1.717253907188477e-06,
      "loss": 0.311,
      "step": 946
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.331976956659491,
      "learning_rate": 1.7166140787063484e-06,
      "loss": 0.2975,
      "step": 947
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5443708751804843,
      "learning_rate": 1.7159736465936122e-06,
      "loss": 0.3312,
      "step": 948
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4646620518131765,
      "learning_rate": 1.7153326113897285e-06,
      "loss": 0.2983,
      "step": 949
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.700520256461987,
      "learning_rate": 1.7146909736346649e-06,
      "loss": 0.3422,
      "step": 950
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.365208695404597,
      "learning_rate": 1.7140487338688964e-06,
      "loss": 0.3125,
      "step": 951
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.342799021500254,
      "learning_rate": 1.7134058926334061e-06,
      "loss": 0.2851,
      "step": 952
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.472473882462489,
      "learning_rate": 1.712762450469682e-06,
      "loss": 0.2833,
      "step": 953
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.924949206264284,
      "learning_rate": 1.7121184079197199e-06,
      "loss": 0.2867,
      "step": 954
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.446605549606646,
      "learning_rate": 1.7114737655260209e-06,
      "loss": 0.3125,
      "step": 955
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4578380201775114,
      "learning_rate": 1.710828523831591e-06,
      "loss": 0.304,
      "step": 956
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.278217100685124,
      "learning_rate": 1.7101826833799408e-06,
      "loss": 0.3097,
      "step": 957
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.634826805932555,
      "learning_rate": 1.7095362447150863e-06,
      "loss": 0.2935,
      "step": 958
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.378656134708543,
      "learning_rate": 1.708889208381546e-06,
      "loss": 0.2915,
      "step": 959
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4000239757418482,
      "learning_rate": 1.7082415749243434e-06,
      "loss": 0.2999,
      "step": 960
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2573289490776585,
      "learning_rate": 1.7075933448890036e-06,
      "loss": 0.277,
      "step": 961
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.346166395763324,
      "learning_rate": 1.706944518821555e-06,
      "loss": 0.3021,
      "step": 962
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4124728615991455,
      "learning_rate": 1.7062950972685276e-06,
      "loss": 0.3128,
      "step": 963
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.826711761487159,
      "learning_rate": 1.705645080776954e-06,
      "loss": 0.313,
      "step": 964
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4047963495047906,
      "learning_rate": 1.7049944698943666e-06,
      "loss": 0.3371,
      "step": 965
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.389614674773777,
      "learning_rate": 1.7043432651687985e-06,
      "loss": 0.3249,
      "step": 966
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.436635418792066,
      "learning_rate": 1.7036914671487849e-06,
      "loss": 0.2986,
      "step": 967
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3717874245845163,
      "learning_rate": 1.7030390763833586e-06,
      "loss": 0.3382,
      "step": 968
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.497520195958812,
      "learning_rate": 1.7023860934220529e-06,
      "loss": 0.3302,
      "step": 969
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4602529075200508,
      "learning_rate": 1.701732518814899e-06,
      "loss": 0.2964,
      "step": 970
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.496811408865411,
      "learning_rate": 1.7010783531124276e-06,
      "loss": 0.2837,
      "step": 971
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2217169162660713,
      "learning_rate": 1.7004235968656663e-06,
      "loss": 0.3015,
      "step": 972
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.499163380974902,
      "learning_rate": 1.6997682506261408e-06,
      "loss": 0.3124,
      "step": 973
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4447537512361546,
      "learning_rate": 1.6991123149458738e-06,
      "loss": 0.2945,
      "step": 974
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.362267004446206,
      "learning_rate": 1.698455790377384e-06,
      "loss": 0.2863,
      "step": 975
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2546056397623135,
      "learning_rate": 1.6977986774736856e-06,
      "loss": 0.2822,
      "step": 976
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2691350851857135,
      "learning_rate": 1.6971409767882908e-06,
      "loss": 0.2974,
      "step": 977
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.445416620705216,
      "learning_rate": 1.6964826888752036e-06,
      "loss": 0.2852,
      "step": 978
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.384942731710928,
      "learning_rate": 1.6958238142889256e-06,
      "loss": 0.2925,
      "step": 979
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2714288291286295,
      "learning_rate": 1.6951643535844507e-06,
      "loss": 0.2746,
      "step": 980
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2375520872504486,
      "learning_rate": 1.6945043073172669e-06,
      "loss": 0.3002,
      "step": 981
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4784951066376597,
      "learning_rate": 1.6938436760433563e-06,
      "loss": 0.3209,
      "step": 982
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.340283512510198,
      "learning_rate": 1.6931824603191924e-06,
      "loss": 0.3039,
      "step": 983
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.30737661460916,
      "learning_rate": 1.692520660701742e-06,
      "loss": 0.2923,
      "step": 984
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.437715351903717,
      "learning_rate": 1.691858277748464e-06,
      "loss": 0.3014,
      "step": 985
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5154337603104495,
      "learning_rate": 1.6911953120173072e-06,
      "loss": 0.2792,
      "step": 986
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4797671932366554,
      "learning_rate": 1.690531764066713e-06,
      "loss": 0.2949,
      "step": 987
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3547640609593965,
      "learning_rate": 1.6898676344556116e-06,
      "loss": 0.2787,
      "step": 988
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3656668511347525,
      "learning_rate": 1.6892029237434247e-06,
      "loss": 0.3285,
      "step": 989
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4444266223198388,
      "learning_rate": 1.6885376324900627e-06,
      "loss": 0.314,
      "step": 990
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3855107129962514,
      "learning_rate": 1.6878717612559248e-06,
      "loss": 0.3095,
      "step": 991
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5693108001100673,
      "learning_rate": 1.6872053106018994e-06,
      "loss": 0.2997,
      "step": 992
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1970928719836182,
      "learning_rate": 1.686538281089362e-06,
      "loss": 0.3192,
      "step": 993
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3599486714531084,
      "learning_rate": 1.6858706732801765e-06,
      "loss": 0.2893,
      "step": 994
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.297010635268803,
      "learning_rate": 1.6852024877366944e-06,
      "loss": 0.2971,
      "step": 995
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3034348135405156,
      "learning_rate": 1.6845337250217524e-06,
      "loss": 0.3109,
      "step": 996
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.967206227562683,
      "learning_rate": 1.6838643856986742e-06,
      "loss": 0.3199,
      "step": 997
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.623339369359266,
      "learning_rate": 1.6831944703312692e-06,
      "loss": 0.2834,
      "step": 998
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.53117931729609,
      "learning_rate": 1.6825239794838325e-06,
      "loss": 0.3006,
      "step": 999
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3882720935542516,
      "learning_rate": 1.6818529137211426e-06,
      "loss": 0.3082,
      "step": 1000
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.55440787423396,
      "learning_rate": 1.6811812736084634e-06,
      "loss": 0.3204,
      "step": 1001
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.321158053976048,
      "learning_rate": 1.6805090597115424e-06,
      "loss": 0.319,
      "step": 1002
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4872668248685,
      "learning_rate": 1.67983627259661e-06,
      "loss": 0.2799,
      "step": 1003
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4245140920050603,
      "learning_rate": 1.67916291283038e-06,
      "loss": 0.3063,
      "step": 1004
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6435579373630245,
      "learning_rate": 1.678488980980048e-06,
      "loss": 0.3093,
      "step": 1005
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.258406979497761,
      "learning_rate": 1.6778144776132922e-06,
      "loss": 0.268,
      "step": 1006
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9135073664027664,
      "learning_rate": 1.6771394032982715e-06,
      "loss": 0.3402,
      "step": 1007
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3278350848972984,
      "learning_rate": 1.6764637586036262e-06,
      "loss": 0.2839,
      "step": 1008
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3248056744204986,
      "learning_rate": 1.6757875440984765e-06,
      "loss": 0.2983,
      "step": 1009
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.439826013803833,
      "learning_rate": 1.6751107603524234e-06,
      "loss": 0.3344,
      "step": 1010
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2802148297417255,
      "learning_rate": 1.6744334079355468e-06,
      "loss": 0.2897,
      "step": 1011
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6263572428855366,
      "learning_rate": 1.6737554874184054e-06,
      "loss": 0.2902,
      "step": 1012
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2664433077616817,
      "learning_rate": 1.6730769993720374e-06,
      "loss": 0.2768,
      "step": 1013
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.429423006143885,
      "learning_rate": 1.6723979443679581e-06,
      "loss": 0.3299,
      "step": 1014
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.416722381378333,
      "learning_rate": 1.6717183229781608e-06,
      "loss": 0.295,
      "step": 1015
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.8299828280374224,
      "learning_rate": 1.6710381357751153e-06,
      "loss": 0.3108,
      "step": 1016
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.0380148513953085,
      "learning_rate": 1.6703573833317695e-06,
      "loss": 0.3334,
      "step": 1017
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.315449694497384,
      "learning_rate": 1.6696760662215454e-06,
      "loss": 0.3075,
      "step": 1018
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.493817939261923,
      "learning_rate": 1.6689941850183423e-06,
      "loss": 0.3229,
      "step": 1019
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.501749294264062,
      "learning_rate": 1.668311740296534e-06,
      "loss": 0.3106,
      "step": 1020
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.289914719495963,
      "learning_rate": 1.6676287326309684e-06,
      "loss": 0.309,
      "step": 1021
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.718396677832358,
      "learning_rate": 1.666945162596969e-06,
      "loss": 0.3284,
      "step": 1022
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3457700683897236,
      "learning_rate": 1.6662610307703315e-06,
      "loss": 0.3157,
      "step": 1023
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5448441504576618,
      "learning_rate": 1.6655763377273258e-06,
      "loss": 0.3019,
      "step": 1024
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4890666738130007,
      "learning_rate": 1.6648910840446945e-06,
      "loss": 0.3275,
      "step": 1025
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4894253493778624,
      "learning_rate": 1.6642052702996518e-06,
      "loss": 0.3149,
      "step": 1026
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.353041250698328,
      "learning_rate": 1.663518897069884e-06,
      "loss": 0.2886,
      "step": 1027
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4392572033533066,
      "learning_rate": 1.662831964933549e-06,
      "loss": 0.3412,
      "step": 1028
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3188088866561403,
      "learning_rate": 1.662144474469275e-06,
      "loss": 0.2947,
      "step": 1029
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.341100743164467,
      "learning_rate": 1.6614564262561608e-06,
      "loss": 0.2728,
      "step": 1030
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2101531499352434,
      "learning_rate": 1.660767820873775e-06,
      "loss": 0.3123,
      "step": 1031
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.475747069138636,
      "learning_rate": 1.6600786589021552e-06,
      "loss": 0.3403,
      "step": 1032
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.459042145691644,
      "learning_rate": 1.6593889409218082e-06,
      "loss": 0.312,
      "step": 1033
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5295519497711707,
      "learning_rate": 1.6586986675137092e-06,
      "loss": 0.2986,
      "step": 1034
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.450928118563158,
      "learning_rate": 1.658007839259301e-06,
      "loss": 0.3285,
      "step": 1035
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1898774628428628,
      "learning_rate": 1.6573164567404935e-06,
      "loss": 0.2827,
      "step": 1036
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.427219719465487,
      "learning_rate": 1.6566245205396645e-06,
      "loss": 0.299,
      "step": 1037
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3570225217372567,
      "learning_rate": 1.655932031239657e-06,
      "loss": 0.2951,
      "step": 1038
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3402537457095547,
      "learning_rate": 1.6552389894237805e-06,
      "loss": 0.3171,
      "step": 1039
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.354873029829454,
      "learning_rate": 1.6545453956758095e-06,
      "loss": 0.2935,
      "step": 1040
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.25090458483495,
      "learning_rate": 1.6538512505799846e-06,
      "loss": 0.3108,
      "step": 1041
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.55047815912428,
      "learning_rate": 1.6531565547210091e-06,
      "loss": 0.3013,
      "step": 1042
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4440385164546696,
      "learning_rate": 1.6524613086840518e-06,
      "loss": 0.3105,
      "step": 1043
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.391941906752125,
      "learning_rate": 1.6517655130547435e-06,
      "loss": 0.2957,
      "step": 1044
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.435031979636581,
      "learning_rate": 1.6510691684191792e-06,
      "loss": 0.3012,
      "step": 1045
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.289453395649587,
      "learning_rate": 1.6503722753639152e-06,
      "loss": 0.2879,
      "step": 1046
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7232938391584423,
      "learning_rate": 1.6496748344759711e-06,
      "loss": 0.2899,
      "step": 1047
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3480597213292884,
      "learning_rate": 1.6489768463428271e-06,
      "loss": 0.2844,
      "step": 1048
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2274801914857827,
      "learning_rate": 1.6482783115524236e-06,
      "loss": 0.3026,
      "step": 1049
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.350786936769416,
      "learning_rate": 1.6475792306931634e-06,
      "loss": 0.3025,
      "step": 1050
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.455094703349108,
      "learning_rate": 1.646879604353908e-06,
      "loss": 0.279,
      "step": 1051
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.749072216613041,
      "learning_rate": 1.6461794331239784e-06,
      "loss": 0.2868,
      "step": 1052
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3961592307243933,
      "learning_rate": 1.6454787175931545e-06,
      "loss": 0.3241,
      "step": 1053
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3802912636459324,
      "learning_rate": 1.6447774583516757e-06,
      "loss": 0.3184,
      "step": 1054
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3578994334145595,
      "learning_rate": 1.6440756559902378e-06,
      "loss": 0.2992,
      "step": 1055
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.669831279613677,
      "learning_rate": 1.6433733110999955e-06,
      "loss": 0.3343,
      "step": 1056
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3566063200933205,
      "learning_rate": 1.64267042427256e-06,
      "loss": 0.2962,
      "step": 1057
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.9095379633908793,
      "learning_rate": 1.6419669960999988e-06,
      "loss": 0.3049,
      "step": 1058
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4085390520917045,
      "learning_rate": 1.6412630271748353e-06,
      "loss": 0.3121,
      "step": 1059
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3603280046033523,
      "learning_rate": 1.640558518090049e-06,
      "loss": 0.2943,
      "step": 1060
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.486479238370381,
      "learning_rate": 1.6398534694390738e-06,
      "loss": 0.3191,
      "step": 1061
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3336082601675407,
      "learning_rate": 1.6391478818157984e-06,
      "loss": 0.3227,
      "step": 1062
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.40928700934904,
      "learning_rate": 1.6384417558145653e-06,
      "loss": 0.2902,
      "step": 1063
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.320113452541132,
      "learning_rate": 1.637735092030171e-06,
      "loss": 0.294,
      "step": 1064
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.1722153868801457,
      "learning_rate": 1.637027891057864e-06,
      "loss": 0.2874,
      "step": 1065
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4440111925983836,
      "learning_rate": 1.6363201534933461e-06,
      "loss": 0.3141,
      "step": 1066
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3338478949662225,
      "learning_rate": 1.6356118799327714e-06,
      "loss": 0.3092,
      "step": 1067
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5121442311511917,
      "learning_rate": 1.634903070972744e-06,
      "loss": 0.3397,
      "step": 1068
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5781463798701902,
      "learning_rate": 1.634193727210321e-06,
      "loss": 0.3104,
      "step": 1069
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5961627840525114,
      "learning_rate": 1.6334838492430083e-06,
      "loss": 0.3101,
      "step": 1070
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.267179058947479,
      "learning_rate": 1.6327734376687627e-06,
      "loss": 0.283,
      "step": 1071
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3444605330977066,
      "learning_rate": 1.6320624930859904e-06,
      "loss": 0.3122,
      "step": 1072
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.993127129778634,
      "learning_rate": 1.6313510160935456e-06,
      "loss": 0.3471,
      "step": 1073
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2872650945211332,
      "learning_rate": 1.6306390072907325e-06,
      "loss": 0.3169,
      "step": 1074
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.724604471623292,
      "learning_rate": 1.6299264672773023e-06,
      "loss": 0.3031,
      "step": 1075
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.913751858855762,
      "learning_rate": 1.6292133966534538e-06,
      "loss": 0.284,
      "step": 1076
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.286247266050023,
      "learning_rate": 1.6284997960198327e-06,
      "loss": 0.3257,
      "step": 1077
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3860053312723952,
      "learning_rate": 1.6277856659775318e-06,
      "loss": 0.315,
      "step": 1078
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.453199624252134,
      "learning_rate": 1.6270710071280886e-06,
      "loss": 0.3262,
      "step": 1079
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.622679958245868,
      "learning_rate": 1.6263558200734874e-06,
      "loss": 0.2872,
      "step": 1080
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3486987427194266,
      "learning_rate": 1.6256401054161564e-06,
      "loss": 0.3164,
      "step": 1081
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.217339421310054,
      "learning_rate": 1.6249238637589686e-06,
      "loss": 0.2927,
      "step": 1082
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.350362880482745,
      "learning_rate": 1.6242070957052408e-06,
      "loss": 0.327,
      "step": 1083
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.291927833619107,
      "learning_rate": 1.6234898018587336e-06,
      "loss": 0.281,
      "step": 1084
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3219095991433525,
      "learning_rate": 1.62277198282365e-06,
      "loss": 0.292,
      "step": 1085
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3916455188615386,
      "learning_rate": 1.6220536392046355e-06,
      "loss": 0.2984,
      "step": 1086
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.858586614338182,
      "learning_rate": 1.621334771606778e-06,
      "loss": 0.3256,
      "step": 1087
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.563926714028576,
      "learning_rate": 1.620615380635606e-06,
      "loss": 0.298,
      "step": 1088
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3780229371631334,
      "learning_rate": 1.6198954668970892e-06,
      "loss": 0.3113,
      "step": 1089
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3717718934908736,
      "learning_rate": 1.6191750309976375e-06,
      "loss": 0.3097,
      "step": 1090
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.26879452699283,
      "learning_rate": 1.6184540735441011e-06,
      "loss": 0.2758,
      "step": 1091
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3545499465443673,
      "learning_rate": 1.617732595143769e-06,
      "loss": 0.2951,
      "step": 1092
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.244857733670842,
      "learning_rate": 1.6170105964043693e-06,
      "loss": 0.2951,
      "step": 1093
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3294945365100497,
      "learning_rate": 1.6162880779340685e-06,
      "loss": 0.2943,
      "step": 1094
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3426596108235653,
      "learning_rate": 1.61556504034147e-06,
      "loss": 0.3315,
      "step": 1095
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4575241805037504,
      "learning_rate": 1.6148414842356157e-06,
      "loss": 0.3015,
      "step": 1096
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4324516901036253,
      "learning_rate": 1.6141174102259835e-06,
      "loss": 0.2792,
      "step": 1097
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4290612852471347,
      "learning_rate": 1.6133928189224886e-06,
      "loss": 0.3176,
      "step": 1098
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.370996130318332,
      "learning_rate": 1.61266771093548e-06,
      "loss": 0.2853,
      "step": 1099
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.770906603532933,
      "learning_rate": 1.6119420868757429e-06,
      "loss": 0.3126,
      "step": 1100
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3587875090211288,
      "learning_rate": 1.6112159473544988e-06,
      "loss": 0.2996,
      "step": 1101
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.213960910839405,
      "learning_rate": 1.6104892929834006e-06,
      "loss": 0.2793,
      "step": 1102
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.453113592013504,
      "learning_rate": 1.6097621243745369e-06,
      "loss": 0.3057,
      "step": 1103
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.458016611313274,
      "learning_rate": 1.6090344421404285e-06,
      "loss": 0.2673,
      "step": 1104
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.473355007432182,
      "learning_rate": 1.6083062468940294e-06,
      "loss": 0.3012,
      "step": 1105
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.336731158238361,
      "learning_rate": 1.607577539248725e-06,
      "loss": 0.2982,
      "step": 1106
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.431007377897008,
      "learning_rate": 1.606848319818333e-06,
      "loss": 0.3029,
      "step": 1107
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.484485654028409,
      "learning_rate": 1.6061185892171021e-06,
      "loss": 0.3235,
      "step": 1108
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1469506553648987,
      "learning_rate": 1.6053883480597112e-06,
      "loss": 0.2604,
      "step": 1109
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3229347997347713,
      "learning_rate": 1.60465759696127e-06,
      "loss": 0.3256,
      "step": 1110
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.802832295421606,
      "learning_rate": 1.6039263365373167e-06,
      "loss": 0.2955,
      "step": 1111
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2623099141043825,
      "learning_rate": 1.6031945674038188e-06,
      "loss": 0.2703,
      "step": 1112
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3600259797934546,
      "learning_rate": 1.6024622901771734e-06,
      "loss": 0.2909,
      "step": 1113
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4897006659817715,
      "learning_rate": 1.6017295054742044e-06,
      "loss": 0.2968,
      "step": 1114
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.684689300603362,
      "learning_rate": 1.6009962139121634e-06,
      "loss": 0.2989,
      "step": 1115
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4044760003269716,
      "learning_rate": 1.600262416108729e-06,
      "loss": 0.2997,
      "step": 1116
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3201320285410962,
      "learning_rate": 1.5995281126820066e-06,
      "loss": 0.3071,
      "step": 1117
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6468821662036515,
      "learning_rate": 1.598793304250527e-06,
      "loss": 0.286,
      "step": 1118
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3639503488566613,
      "learning_rate": 1.5980579914332465e-06,
      "loss": 0.2914,
      "step": 1119
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6193329887287207,
      "learning_rate": 1.5973221748495468e-06,
      "loss": 0.2952,
      "step": 1120
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.679692415018323,
      "learning_rate": 1.5965858551192327e-06,
      "loss": 0.2953,
      "step": 1121
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3437150532341953,
      "learning_rate": 1.5958490328625347e-06,
      "loss": 0.3032,
      "step": 1122
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3239952811715567,
      "learning_rate": 1.5951117087001046e-06,
      "loss": 0.2854,
      "step": 1123
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.385086510499303,
      "learning_rate": 1.5943738832530182e-06,
      "loss": 0.2979,
      "step": 1124
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4806154461787604,
      "learning_rate": 1.5936355571427733e-06,
      "loss": 0.2966,
      "step": 1125
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.216112548739291,
      "learning_rate": 1.5928967309912888e-06,
      "loss": 0.3029,
      "step": 1126
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.572934206919917,
      "learning_rate": 1.5921574054209063e-06,
      "loss": 0.3056,
      "step": 1127
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5508527353207016,
      "learning_rate": 1.5914175810543866e-06,
      "loss": 0.2833,
      "step": 1128
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6384165447425425,
      "learning_rate": 1.590677258514911e-06,
      "loss": 0.3049,
      "step": 1129
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.509989832211171,
      "learning_rate": 1.5899364384260811e-06,
      "loss": 0.2929,
      "step": 1130
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5231713344758324,
      "learning_rate": 1.5891951214119165e-06,
      "loss": 0.294,
      "step": 1131
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.281755230252254,
      "learning_rate": 1.5884533080968569e-06,
      "loss": 0.2919,
      "step": 1132
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3669782426311925,
      "learning_rate": 1.5877109991057587e-06,
      "loss": 0.3073,
      "step": 1133
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.362565174713835,
      "learning_rate": 1.5869681950638959e-06,
      "loss": 0.2966,
      "step": 1134
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.438987608826221,
      "learning_rate": 1.5862248965969603e-06,
      "loss": 0.2823,
      "step": 1135
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3746503313743537,
      "learning_rate": 1.5854811043310596e-06,
      "loss": 0.2849,
      "step": 1136
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5131572164621208,
      "learning_rate": 1.5847368188927179e-06,
      "loss": 0.2863,
      "step": 1137
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5408742994134594,
      "learning_rate": 1.5839920409088743e-06,
      "loss": 0.2736,
      "step": 1138
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3560444580090807,
      "learning_rate": 1.5832467710068824e-06,
      "loss": 0.2994,
      "step": 1139
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4639874108624364,
      "learning_rate": 1.5825010098145116e-06,
      "loss": 0.3127,
      "step": 1140
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1704217818052713,
      "learning_rate": 1.5817547579599432e-06,
      "loss": 0.2887,
      "step": 1141
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4916333471595937,
      "learning_rate": 1.5810080160717734e-06,
      "loss": 0.2873,
      "step": 1142
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.240153940362432,
      "learning_rate": 1.5802607847790107e-06,
      "loss": 0.278,
      "step": 1143
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.379603346081565,
      "learning_rate": 1.5795130647110753e-06,
      "loss": 0.3027,
      "step": 1144
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.356341932955021,
      "learning_rate": 1.5787648564977998e-06,
      "loss": 0.3085,
      "step": 1145
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4227890292727765,
      "learning_rate": 1.5780161607694275e-06,
      "loss": 0.3099,
      "step": 1146
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6094467312618272,
      "learning_rate": 1.577266978156613e-06,
      "loss": 0.3307,
      "step": 1147
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4273403176635058,
      "learning_rate": 1.5765173092904201e-06,
      "loss": 0.2923,
      "step": 1148
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4037099389004934,
      "learning_rate": 1.5757671548023228e-06,
      "loss": 0.3346,
      "step": 1149
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1568447136717457,
      "learning_rate": 1.5750165153242044e-06,
      "loss": 0.2583,
      "step": 1150
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.28801464145824,
      "learning_rate": 1.5742653914883558e-06,
      "loss": 0.2924,
      "step": 1151
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.529364561744506,
      "learning_rate": 1.5735137839274773e-06,
      "loss": 0.2886,
      "step": 1152
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.327560334688552,
      "learning_rate": 1.5727616932746746e-06,
      "loss": 0.285,
      "step": 1153
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4283275210499173,
      "learning_rate": 1.5720091201634627e-06,
      "loss": 0.299,
      "step": 1154
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5399568436656264,
      "learning_rate": 1.5712560652277609e-06,
      "loss": 0.2833,
      "step": 1155
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3646366116477813,
      "learning_rate": 1.570502529101896e-06,
      "loss": 0.3011,
      "step": 1156
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.94102514169586,
      "learning_rate": 1.5697485124205989e-06,
      "loss": 0.3032,
      "step": 1157
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.36875518092041,
      "learning_rate": 1.568994015819006e-06,
      "loss": 0.2749,
      "step": 1158
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.534169488455715,
      "learning_rate": 1.5682390399326582e-06,
      "loss": 0.3512,
      "step": 1159
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3107725895023434,
      "learning_rate": 1.567483585397499e-06,
      "loss": 0.2657,
      "step": 1160
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6748111833306107,
      "learning_rate": 1.5667276528498763e-06,
      "loss": 0.3012,
      "step": 1161
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2766830635131297,
      "learning_rate": 1.56597124292654e-06,
      "loss": 0.284,
      "step": 1162
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.542009323160178,
      "learning_rate": 1.5652143562646413e-06,
      "loss": 0.2992,
      "step": 1163
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.316537583857316,
      "learning_rate": 1.5644569935017355e-06,
      "loss": 0.2771,
      "step": 1164
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5038538788727873,
      "learning_rate": 1.563699155275776e-06,
      "loss": 0.3007,
      "step": 1165
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4159983582420788,
      "learning_rate": 1.5629408422251192e-06,
      "loss": 0.3034,
      "step": 1166
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4341610746771356,
      "learning_rate": 1.562182054988519e-06,
      "loss": 0.3088,
      "step": 1167
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.357812160161354,
      "learning_rate": 1.5614227942051307e-06,
      "loss": 0.2786,
      "step": 1168
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3163132363746737,
      "learning_rate": 1.5606630605145081e-06,
      "loss": 0.3147,
      "step": 1169
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2351859561010263,
      "learning_rate": 1.5599028545566026e-06,
      "loss": 0.3012,
      "step": 1170
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4339611497994285,
      "learning_rate": 1.5591421769717642e-06,
      "loss": 0.2826,
      "step": 1171
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.624244763187848,
      "learning_rate": 1.5583810284007393e-06,
      "loss": 0.3324,
      "step": 1172
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.547134698048815,
      "learning_rate": 1.5576194094846722e-06,
      "loss": 0.3079,
      "step": 1173
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.527816531870315,
      "learning_rate": 1.5568573208651023e-06,
      "loss": 0.2957,
      "step": 1174
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2182054977613723,
      "learning_rate": 1.5560947631839652e-06,
      "loss": 0.2692,
      "step": 1175
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.265694858532665,
      "learning_rate": 1.5553317370835913e-06,
      "loss": 0.2751,
      "step": 1176
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.443214358088767,
      "learning_rate": 1.5545682432067063e-06,
      "loss": 0.3132,
      "step": 1177
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5864134329362356,
      "learning_rate": 1.5538042821964292e-06,
      "loss": 0.275,
      "step": 1178
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.492520363058183,
      "learning_rate": 1.5530398546962729e-06,
      "loss": 0.3168,
      "step": 1179
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6570124876738745,
      "learning_rate": 1.5522749613501423e-06,
      "loss": 0.2994,
      "step": 1180
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4265104474117507,
      "learning_rate": 1.5515096028023359e-06,
      "loss": 0.3041,
      "step": 1181
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.807562758250271,
      "learning_rate": 1.5507437796975434e-06,
      "loss": 0.2973,
      "step": 1182
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3246387128523778,
      "learning_rate": 1.5499774926808464e-06,
      "loss": 0.2884,
      "step": 1183
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2932871149447402,
      "learning_rate": 1.5492107423977166e-06,
      "loss": 0.2855,
      "step": 1184
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.435228543816678,
      "learning_rate": 1.548443529494016e-06,
      "loss": 0.3097,
      "step": 1185
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3510824680139604,
      "learning_rate": 1.5476758546159966e-06,
      "loss": 0.2742,
      "step": 1186
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.392760312472469,
      "learning_rate": 1.5469077184102996e-06,
      "loss": 0.3302,
      "step": 1187
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.343553702909353,
      "learning_rate": 1.5461391215239545e-06,
      "loss": 0.2465,
      "step": 1188
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.248803321374722,
      "learning_rate": 1.545370064604379e-06,
      "loss": 0.2789,
      "step": 1189
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2905096127758915,
      "learning_rate": 1.544600548299378e-06,
      "loss": 0.2953,
      "step": 1190
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.158849950842058,
      "learning_rate": 1.5438305732571442e-06,
      "loss": 0.2784,
      "step": 1191
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.7014072777834843,
      "learning_rate": 1.543060140126255e-06,
      "loss": 0.2989,
      "step": 1192
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4406118157452497,
      "learning_rate": 1.5422892495556764e-06,
      "loss": 0.2828,
      "step": 1193
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.309427539691521,
      "learning_rate": 1.5415179021947565e-06,
      "loss": 0.2881,
      "step": 1194
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3087869140253527,
      "learning_rate": 1.5407460986932309e-06,
      "loss": 0.3104,
      "step": 1195
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4365948516037017,
      "learning_rate": 1.5399738397012176e-06,
      "loss": 0.3117,
      "step": 1196
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.472925420715825,
      "learning_rate": 1.5392011258692197e-06,
      "loss": 0.297,
      "step": 1197
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4954071083244327,
      "learning_rate": 1.538427957848122e-06,
      "loss": 0.2885,
      "step": 1198
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.370015598392829,
      "learning_rate": 1.5376543362891932e-06,
      "loss": 0.3013,
      "step": 1199
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.352657234394544,
      "learning_rate": 1.5368802618440829e-06,
      "loss": 0.2781,
      "step": 1200
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.413009305045956,
      "learning_rate": 1.5361057351648228e-06,
      "loss": 0.294,
      "step": 1201
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4136284994593225,
      "learning_rate": 1.5353307569038254e-06,
      "loss": 0.3016,
      "step": 1202
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5447870948805607,
      "learning_rate": 1.5345553277138846e-06,
      "loss": 0.3485,
      "step": 1203
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.230921132142122,
      "learning_rate": 1.5337794482481714e-06,
      "loss": 0.2858,
      "step": 1204
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.38145420246431,
      "learning_rate": 1.5330031191602393e-06,
      "loss": 0.2674,
      "step": 1205
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.275552437256254,
      "learning_rate": 1.5322263411040185e-06,
      "loss": 0.2731,
      "step": 1206
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5017524293312565,
      "learning_rate": 1.5314491147338178e-06,
      "loss": 0.276,
      "step": 1207
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3438144876797264,
      "learning_rate": 1.530671440704324e-06,
      "loss": 0.2789,
      "step": 1208
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2085904618308208,
      "learning_rate": 1.5298933196706008e-06,
      "loss": 0.2342,
      "step": 1209
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4759768263587394,
      "learning_rate": 1.5291147522880884e-06,
      "loss": 0.2941,
      "step": 1210
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4202423096646886,
      "learning_rate": 1.528335739212603e-06,
      "loss": 0.3279,
      "step": 1211
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3966984192951535,
      "learning_rate": 1.5275562811003363e-06,
      "loss": 0.2772,
      "step": 1212
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.199944532742987,
      "learning_rate": 1.5267763786078541e-06,
      "loss": 0.2842,
      "step": 1213
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.508334908719426,
      "learning_rate": 1.525996032392098e-06,
      "loss": 0.2911,
      "step": 1214
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2262275947096417,
      "learning_rate": 1.525215243110382e-06,
      "loss": 0.292,
      "step": 1215
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2895553897355208,
      "learning_rate": 1.5244340114203943e-06,
      "loss": 0.2798,
      "step": 1216
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2505915029751153,
      "learning_rate": 1.5236523379801951e-06,
      "loss": 0.2813,
      "step": 1217
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5301472784695105,
      "learning_rate": 1.522870223448217e-06,
      "loss": 0.3055,
      "step": 1218
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.467104811055958,
      "learning_rate": 1.5220876684832638e-06,
      "loss": 0.3138,
      "step": 1219
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4211238944654565,
      "learning_rate": 1.5213046737445105e-06,
      "loss": 0.2889,
      "step": 1220
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4038218910709306,
      "learning_rate": 1.5205212398915032e-06,
      "loss": 0.2955,
      "step": 1221
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.233445129687268,
      "learning_rate": 1.5197373675841569e-06,
      "loss": 0.2641,
      "step": 1222
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6361531058162115,
      "learning_rate": 1.5189530574827565e-06,
      "loss": 0.2966,
      "step": 1223
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4076912906486867,
      "learning_rate": 1.518168310247955e-06,
      "loss": 0.3003,
      "step": 1224
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4118387221922744,
      "learning_rate": 1.5173831265407747e-06,
      "loss": 0.311,
      "step": 1225
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.8245527076546093,
      "learning_rate": 1.5165975070226043e-06,
      "loss": 0.3364,
      "step": 1226
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.602810109791664,
      "learning_rate": 1.515811452355201e-06,
      "loss": 0.3041,
      "step": 1227
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.347826721773315,
      "learning_rate": 1.5150249632006868e-06,
      "loss": 0.275,
      "step": 1228
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4794242356386733,
      "learning_rate": 1.5142380402215518e-06,
      "loss": 0.3054,
      "step": 1229
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.577687203706355,
      "learning_rate": 1.5134506840806496e-06,
      "loss": 0.3037,
      "step": 1230
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3642776332377244,
      "learning_rate": 1.5126628954411999e-06,
      "loss": 0.3193,
      "step": 1231
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.8048821645424717,
      "learning_rate": 1.5118746749667862e-06,
      "loss": 0.3046,
      "step": 1232
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.344113545456005,
      "learning_rate": 1.5110860233213554e-06,
      "loss": 0.2815,
      "step": 1233
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.281481302849905,
      "learning_rate": 1.5102969411692183e-06,
      "loss": 0.3236,
      "step": 1234
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.276485093800136,
      "learning_rate": 1.5095074291750485e-06,
      "loss": 0.2774,
      "step": 1235
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.338175236841784,
      "learning_rate": 1.5087174880038806e-06,
      "loss": 0.2859,
      "step": 1236
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3029685841528056,
      "learning_rate": 1.5079271183211116e-06,
      "loss": 0.2911,
      "step": 1237
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.407994836006956,
      "learning_rate": 1.5071363207924992e-06,
      "loss": 0.3106,
      "step": 1238
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5374727320190633,
      "learning_rate": 1.5063450960841614e-06,
      "loss": 0.2836,
      "step": 1239
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3499467670805023,
      "learning_rate": 1.5055534448625764e-06,
      "loss": 0.2795,
      "step": 1240
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4266429590953495,
      "learning_rate": 1.5047613677945808e-06,
      "loss": 0.2749,
      "step": 1241
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4657170841490976,
      "learning_rate": 1.503968865547371e-06,
      "loss": 0.2837,
      "step": 1242
    },
    {
      "epoch": 0.35,
      "grad_norm": 5.0976536509730455,
      "learning_rate": 1.5031759387885007e-06,
      "loss": 0.3269,
      "step": 1243
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.628231359188434,
      "learning_rate": 1.5023825881858818e-06,
      "loss": 0.3152,
      "step": 1244
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4756832958452573,
      "learning_rate": 1.5015888144077824e-06,
      "loss": 0.3073,
      "step": 1245
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.298512467341184,
      "learning_rate": 1.5007946181228283e-06,
      "loss": 0.3061,
      "step": 1246
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.52945706170183,
      "learning_rate": 1.5e-06,
      "loss": 0.3178,
      "step": 1247
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5724615855504926,
      "learning_rate": 1.4992049607086339e-06,
      "loss": 0.3417,
      "step": 1248
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.727288725787042,
      "learning_rate": 1.4984095009184212e-06,
      "loss": 0.3349,
      "step": 1249
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.350197638440752,
      "learning_rate": 1.497613621299407e-06,
      "loss": 0.3041,
      "step": 1250
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.7132496007296156,
      "learning_rate": 1.4968173225219901e-06,
      "loss": 0.313,
      "step": 1251
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5199034855021836,
      "learning_rate": 1.496020605256923e-06,
      "loss": 0.3085,
      "step": 1252
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2147023860745105,
      "learning_rate": 1.4952234701753095e-06,
      "loss": 0.2737,
      "step": 1253
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.297883065371963,
      "learning_rate": 1.4944259179486065e-06,
      "loss": 0.2783,
      "step": 1254
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3449804782716774,
      "learning_rate": 1.493627949248622e-06,
      "loss": 0.314,
      "step": 1255
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4267078408027443,
      "learning_rate": 1.492829564747514e-06,
      "loss": 0.3169,
      "step": 1256
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6432791420563784,
      "learning_rate": 1.492030765117792e-06,
      "loss": 0.3107,
      "step": 1257
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.7012532465585966,
      "learning_rate": 1.4912315510323137e-06,
      "loss": 0.329,
      "step": 1258
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.486228397791336,
      "learning_rate": 1.4904319231642876e-06,
      "loss": 0.3027,
      "step": 1259
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.41871838980173,
      "learning_rate": 1.4896318821872696e-06,
      "loss": 0.3126,
      "step": 1260
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5420371517098084,
      "learning_rate": 1.4888314287751638e-06,
      "loss": 0.3158,
      "step": 1261
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3839764981648215,
      "learning_rate": 1.488030563602222e-06,
      "loss": 0.3023,
      "step": 1262
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4532950147848407,
      "learning_rate": 1.4872292873430424e-06,
      "loss": 0.2901,
      "step": 1263
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5054399027116503,
      "learning_rate": 1.4864276006725698e-06,
      "loss": 0.3057,
      "step": 1264
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.477920531834612,
      "learning_rate": 1.4856255042660943e-06,
      "loss": 0.2954,
      "step": 1265
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.935540643162236,
      "learning_rate": 1.484822998799252e-06,
      "loss": 0.2844,
      "step": 1266
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.314201674178338,
      "learning_rate": 1.4840200849480225e-06,
      "loss": 0.2863,
      "step": 1267
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3731413875042193,
      "learning_rate": 1.4832167633887305e-06,
      "loss": 0.3051,
      "step": 1268
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3892817055908777,
      "learning_rate": 1.482413034798043e-06,
      "loss": 0.3129,
      "step": 1269
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.302263982367759,
      "learning_rate": 1.4816088998529706e-06,
      "loss": 0.2664,
      "step": 1270
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6512705403947354,
      "learning_rate": 1.480804359230866e-06,
      "loss": 0.2965,
      "step": 1271
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.347288821858566,
      "learning_rate": 1.4799994136094232e-06,
      "loss": 0.301,
      "step": 1272
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5333151896115873,
      "learning_rate": 1.4791940636666782e-06,
      "loss": 0.3012,
      "step": 1273
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5264247786210148,
      "learning_rate": 1.4783883100810073e-06,
      "loss": 0.3196,
      "step": 1274
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.462515547861249,
      "learning_rate": 1.4775821535311259e-06,
      "loss": 0.3057,
      "step": 1275
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.447574287258765,
      "learning_rate": 1.47677559469609e-06,
      "loss": 0.3101,
      "step": 1276
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4266565567789664,
      "learning_rate": 1.4759686342552943e-06,
      "loss": 0.271,
      "step": 1277
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2897497408615908,
      "learning_rate": 1.475161272888471e-06,
      "loss": 0.2681,
      "step": 1278
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0860403283083073,
      "learning_rate": 1.4743535112756908e-06,
      "loss": 0.2544,
      "step": 1279
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.392018732079389,
      "learning_rate": 1.4735453500973609e-06,
      "loss": 0.2836,
      "step": 1280
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.23012471953278,
      "learning_rate": 1.4727367900342258e-06,
      "loss": 0.2767,
      "step": 1281
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.272102233160078,
      "learning_rate": 1.4719278317673654e-06,
      "loss": 0.3001,
      "step": 1282
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.39901218259605,
      "learning_rate": 1.4711184759781953e-06,
      "loss": 0.2574,
      "step": 1283
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5600352664145354,
      "learning_rate": 1.4703087233484659e-06,
      "loss": 0.3206,
      "step": 1284
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.9198388613999184,
      "learning_rate": 1.469498574560262e-06,
      "loss": 0.3024,
      "step": 1285
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4103965434314567,
      "learning_rate": 1.4686880302960018e-06,
      "loss": 0.3014,
      "step": 1286
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.209789038709257,
      "learning_rate": 1.4678770912384368e-06,
      "loss": 0.249,
      "step": 1287
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.3618412034454384,
      "learning_rate": 1.467065758070651e-06,
      "loss": 0.2966,
      "step": 1288
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3019549434185347,
      "learning_rate": 1.4662540314760605e-06,
      "loss": 0.2788,
      "step": 1289
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4662661882754224,
      "learning_rate": 1.4654419121384126e-06,
      "loss": 0.2831,
      "step": 1290
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.454442520748841,
      "learning_rate": 1.4646294007417856e-06,
      "loss": 0.302,
      "step": 1291
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.340806467057121,
      "learning_rate": 1.463816497970588e-06,
      "loss": 0.329,
      "step": 1292
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.449099525994602,
      "learning_rate": 1.4630032045095579e-06,
      "loss": 0.3047,
      "step": 1293
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6306339717083382,
      "learning_rate": 1.4621895210437625e-06,
      "loss": 0.3269,
      "step": 1294
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2548608028685004,
      "learning_rate": 1.4613754482585977e-06,
      "loss": 0.2985,
      "step": 1295
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.37377254194116,
      "learning_rate": 1.4605609868397872e-06,
      "loss": 0.2989,
      "step": 1296
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2530252554578647,
      "learning_rate": 1.4597461374733815e-06,
      "loss": 0.3155,
      "step": 1297
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6798879482553373,
      "learning_rate": 1.4589309008457594e-06,
      "loss": 0.2738,
      "step": 1298
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4502889632471945,
      "learning_rate": 1.4581152776436238e-06,
      "loss": 0.3192,
      "step": 1299
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6298976641211786,
      "learning_rate": 1.4572992685540056e-06,
      "loss": 0.3167,
      "step": 1300
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3641713607691983,
      "learning_rate": 1.4564828742642583e-06,
      "loss": 0.289,
      "step": 1301
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.367238462783568,
      "learning_rate": 1.455666095462062e-06,
      "loss": 0.2841,
      "step": 1302
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.494597823178741,
      "learning_rate": 1.4548489328354194e-06,
      "loss": 0.3052,
      "step": 1303
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.549571518755505,
      "learning_rate": 1.4540313870726568e-06,
      "loss": 0.3206,
      "step": 1304
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.34529117759076,
      "learning_rate": 1.4532134588624233e-06,
      "loss": 0.2947,
      "step": 1305
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3384051528527823,
      "learning_rate": 1.4523951488936903e-06,
      "loss": 0.3108,
      "step": 1306
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.223749125445581,
      "learning_rate": 1.451576457855751e-06,
      "loss": 0.2693,
      "step": 1307
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.466587040514915,
      "learning_rate": 1.4507573864382186e-06,
      "loss": 0.2897,
      "step": 1308
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4013650330596032,
      "learning_rate": 1.4499379353310272e-06,
      "loss": 0.3095,
      "step": 1309
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.329526517251253,
      "learning_rate": 1.4491181052244315e-06,
      "loss": 0.3053,
      "step": 1310
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2325386851804963,
      "learning_rate": 1.4482978968090043e-06,
      "loss": 0.2799,
      "step": 1311
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.739730141975504,
      "learning_rate": 1.4474773107756378e-06,
      "loss": 0.2819,
      "step": 1312
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4059311840397424,
      "learning_rate": 1.446656347815542e-06,
      "loss": 0.3184,
      "step": 1313
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5043427586314624,
      "learning_rate": 1.4458350086202442e-06,
      "loss": 0.3358,
      "step": 1314
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5523646241136073,
      "learning_rate": 1.4450132938815893e-06,
      "loss": 0.324,
      "step": 1315
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6696043592655574,
      "learning_rate": 1.4441912042917378e-06,
      "loss": 0.3198,
      "step": 1316
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4038993670802613,
      "learning_rate": 1.4433687405431661e-06,
      "loss": 0.3094,
      "step": 1317
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2708458215823537,
      "learning_rate": 1.4425459033286663e-06,
      "loss": 0.2929,
      "step": 1318
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.228885189442795,
      "learning_rate": 1.4417226933413445e-06,
      "loss": 0.2684,
      "step": 1319
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.3560738300099113,
      "learning_rate": 1.4408991112746209e-06,
      "loss": 0.3066,
      "step": 1320
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3347511327984707,
      "learning_rate": 1.4400751578222293e-06,
      "loss": 0.2655,
      "step": 1321
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4636897945354117,
      "learning_rate": 1.4392508336782165e-06,
      "loss": 0.2594,
      "step": 1322
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3437567986546166,
      "learning_rate": 1.4384261395369405e-06,
      "loss": 0.2944,
      "step": 1323
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.430515865063001,
      "learning_rate": 1.4376010760930727e-06,
      "loss": 0.3145,
      "step": 1324
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.648183336932284,
      "learning_rate": 1.436775644041594e-06,
      "loss": 0.3171,
      "step": 1325
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1948854220827525,
      "learning_rate": 1.4359498440777969e-06,
      "loss": 0.2954,
      "step": 1326
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.424356777798155,
      "learning_rate": 1.4351236768972827e-06,
      "loss": 0.295,
      "step": 1327
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3561806922810296,
      "learning_rate": 1.4342971431959633e-06,
      "loss": 0.2942,
      "step": 1328
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2325216270315376,
      "learning_rate": 1.4334702436700582e-06,
      "loss": 0.2743,
      "step": 1329
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.388209633531802,
      "learning_rate": 1.4326429790160957e-06,
      "loss": 0.3036,
      "step": 1330
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.518564073310508,
      "learning_rate": 1.4318153499309115e-06,
      "loss": 0.2809,
      "step": 1331
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3524718365011186,
      "learning_rate": 1.4309873571116484e-06,
      "loss": 0.2934,
      "step": 1332
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4008119110054373,
      "learning_rate": 1.4301590012557552e-06,
      "loss": 0.2948,
      "step": 1333
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.657182854931203,
      "learning_rate": 1.4293302830609869e-06,
      "loss": 0.2982,
      "step": 1334
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4741787245997418,
      "learning_rate": 1.4285012032254033e-06,
      "loss": 0.3052,
      "step": 1335
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.266535481706884,
      "learning_rate": 1.4276717624473695e-06,
      "loss": 0.2751,
      "step": 1336
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.499984914973482,
      "learning_rate": 1.4268419614255543e-06,
      "loss": 0.3218,
      "step": 1337
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.167267266773434,
      "learning_rate": 1.4260118008589293e-06,
      "loss": 0.2849,
      "step": 1338
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3809186742123614,
      "learning_rate": 1.42518128144677e-06,
      "loss": 0.3186,
      "step": 1339
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.276436866987727,
      "learning_rate": 1.4243504038886528e-06,
      "loss": 0.2769,
      "step": 1340
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7771729871088713,
      "learning_rate": 1.4235191688844583e-06,
      "loss": 0.3143,
      "step": 1341
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5976626344842813,
      "learning_rate": 1.4226875771343654e-06,
      "loss": 0.3003,
      "step": 1342
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3154780781114837,
      "learning_rate": 1.4218556293388547e-06,
      "loss": 0.2912,
      "step": 1343
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3516866583648466,
      "learning_rate": 1.4210233261987069e-06,
      "loss": 0.2951,
      "step": 1344
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6133293177904156,
      "learning_rate": 1.4201906684150019e-06,
      "loss": 0.32,
      "step": 1345
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.349221085671353,
      "learning_rate": 1.4193576566891179e-06,
      "loss": 0.2934,
      "step": 1346
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3372859553928187,
      "learning_rate": 1.418524291722732e-06,
      "loss": 0.2875,
      "step": 1347
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.9969867158207175,
      "learning_rate": 1.4176905742178178e-06,
      "loss": 0.2858,
      "step": 1348
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.469367670387917,
      "learning_rate": 1.4168565048766473e-06,
      "loss": 0.3216,
      "step": 1349
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.433107326687296,
      "learning_rate": 1.4160220844017873e-06,
      "loss": 0.3011,
      "step": 1350
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.44869944836565,
      "learning_rate": 1.4151873134961011e-06,
      "loss": 0.2823,
      "step": 1351
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3356654813249977,
      "learning_rate": 1.4143521928627477e-06,
      "loss": 0.2743,
      "step": 1352
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4618631831211477,
      "learning_rate": 1.41351672320518e-06,
      "loss": 0.3103,
      "step": 1353
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3728132538728643,
      "learning_rate": 1.4126809052271451e-06,
      "loss": 0.2894,
      "step": 1354
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1921637488839747,
      "learning_rate": 1.411844739632683e-06,
      "loss": 0.2991,
      "step": 1355
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2401465864522305,
      "learning_rate": 1.4110082271261277e-06,
      "loss": 0.2916,
      "step": 1356
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1374788781683236,
      "learning_rate": 1.410171368412104e-06,
      "loss": 0.27,
      "step": 1357
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2226597845776324,
      "learning_rate": 1.4093341641955296e-06,
      "loss": 0.313,
      "step": 1358
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2209502544829482,
      "learning_rate": 1.4084966151816122e-06,
      "loss": 0.3006,
      "step": 1359
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2293956956621597,
      "learning_rate": 1.4076587220758508e-06,
      "loss": 0.2945,
      "step": 1360
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.469630659844368,
      "learning_rate": 1.4068204855840336e-06,
      "loss": 0.2862,
      "step": 1361
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7961374088455777,
      "learning_rate": 1.405981906412238e-06,
      "loss": 0.3437,
      "step": 1362
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.422097199933655,
      "learning_rate": 1.4051429852668311e-06,
      "loss": 0.2894,
      "step": 1363
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6518743875649915,
      "learning_rate": 1.4043037228544665e-06,
      "loss": 0.3017,
      "step": 1364
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5012891328415634,
      "learning_rate": 1.4034641198820865e-06,
      "loss": 0.3169,
      "step": 1365
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3793187249527525,
      "learning_rate": 1.4026241770569196e-06,
      "loss": 0.2922,
      "step": 1366
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4635007290354376,
      "learning_rate": 1.4017838950864806e-06,
      "loss": 0.3042,
      "step": 1367
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4776118195788914,
      "learning_rate": 1.4009432746785709e-06,
      "loss": 0.2756,
      "step": 1368
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3652753400904305,
      "learning_rate": 1.4001023165412753e-06,
      "loss": 0.3089,
      "step": 1369
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4321634002156793,
      "learning_rate": 1.3992610213829648e-06,
      "loss": 0.3024,
      "step": 1370
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3618306833159903,
      "learning_rate": 1.3984193899122932e-06,
      "loss": 0.2753,
      "step": 1371
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.462533985172125,
      "learning_rate": 1.3975774228381974e-06,
      "loss": 0.3393,
      "step": 1372
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3130478385308115,
      "learning_rate": 1.3967351208698982e-06,
      "loss": 0.2861,
      "step": 1373
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4529100901465632,
      "learning_rate": 1.3958924847168977e-06,
      "loss": 0.3029,
      "step": 1374
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.424126592675069,
      "learning_rate": 1.3950495150889793e-06,
      "loss": 0.3359,
      "step": 1375
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3408333339200533,
      "learning_rate": 1.3942062126962075e-06,
      "loss": 0.2858,
      "step": 1376
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5154163908242535,
      "learning_rate": 1.3933625782489274e-06,
      "loss": 0.3013,
      "step": 1377
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1942197379970936,
      "learning_rate": 1.3925186124577637e-06,
      "loss": 0.2779,
      "step": 1378
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.573288722683661,
      "learning_rate": 1.3916743160336195e-06,
      "loss": 0.2878,
      "step": 1379
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3597427022892417,
      "learning_rate": 1.3908296896876776e-06,
      "loss": 0.2953,
      "step": 1380
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6721873029691863,
      "learning_rate": 1.389984734131398e-06,
      "loss": 0.2873,
      "step": 1381
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.1997325497798492,
      "learning_rate": 1.389139450076518e-06,
      "loss": 0.2886,
      "step": 1382
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.829582003955937,
      "learning_rate": 1.388293838235051e-06,
      "loss": 0.2966,
      "step": 1383
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.407857097243499,
      "learning_rate": 1.3874478993192885e-06,
      "loss": 0.2855,
      "step": 1384
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3886683252466736,
      "learning_rate": 1.3866016340417951e-06,
      "loss": 0.2789,
      "step": 1385
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5464389702961645,
      "learning_rate": 1.385755043115412e-06,
      "loss": 0.3101,
      "step": 1386
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.316650930929904,
      "learning_rate": 1.3849081272532544e-06,
      "loss": 0.3025,
      "step": 1387
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4580772632167855,
      "learning_rate": 1.3840608871687102e-06,
      "loss": 0.2894,
      "step": 1388
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5338692072902522,
      "learning_rate": 1.3832133235754415e-06,
      "loss": 0.2948,
      "step": 1389
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.417527611985879,
      "learning_rate": 1.3823654371873825e-06,
      "loss": 0.2873,
      "step": 1390
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4989649967624388,
      "learning_rate": 1.3815172287187393e-06,
      "loss": 0.2881,
      "step": 1391
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5528812441155635,
      "learning_rate": 1.3806686988839896e-06,
      "loss": 0.2971,
      "step": 1392
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5574316457528656,
      "learning_rate": 1.3798198483978813e-06,
      "loss": 0.2848,
      "step": 1393
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.50493766439196,
      "learning_rate": 1.3789706779754324e-06,
      "loss": 0.308,
      "step": 1394
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2752842924157215,
      "learning_rate": 1.3781211883319312e-06,
      "loss": 0.2848,
      "step": 1395
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.381552220394106,
      "learning_rate": 1.3772713801829336e-06,
      "loss": 0.2649,
      "step": 1396
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.319700989497983,
      "learning_rate": 1.3764212542442655e-06,
      "loss": 0.2681,
      "step": 1397
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3685340252808373,
      "learning_rate": 1.3755708112320185e-06,
      "loss": 0.3195,
      "step": 1398
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.356275959928643,
      "learning_rate": 1.3747200518625529e-06,
      "loss": 0.3112,
      "step": 1399
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4613648289880636,
      "learning_rate": 1.3738689768524944e-06,
      "loss": 0.2868,
      "step": 1400
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.636826673880524,
      "learning_rate": 1.3730175869187356e-06,
      "loss": 0.3307,
      "step": 1401
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.185141789325747,
      "learning_rate": 1.3721658827784333e-06,
      "loss": 0.277,
      "step": 1402
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.284884901630232,
      "learning_rate": 1.37131386514901e-06,
      "loss": 0.2662,
      "step": 1403
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5056304950275905,
      "learning_rate": 1.370461534748151e-06,
      "loss": 0.2736,
      "step": 1404
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6112929606416824,
      "learning_rate": 1.3696088922938063e-06,
      "loss": 0.3041,
      "step": 1405
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.518370343635079,
      "learning_rate": 1.3687559385041883e-06,
      "loss": 0.2924,
      "step": 1406
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.452505799257892,
      "learning_rate": 1.3679026740977716e-06,
      "loss": 0.3219,
      "step": 1407
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3448228996764517,
      "learning_rate": 1.367049099793292e-06,
      "loss": 0.2965,
      "step": 1408
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5390555374689074,
      "learning_rate": 1.3661952163097472e-06,
      "loss": 0.2962,
      "step": 1409
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.481322721652436,
      "learning_rate": 1.3653410243663951e-06,
      "loss": 0.2905,
      "step": 1410
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.686838626482608,
      "learning_rate": 1.3644865246827527e-06,
      "loss": 0.3123,
      "step": 1411
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.384974185059098,
      "learning_rate": 1.363631717978597e-06,
      "loss": 0.2874,
      "step": 1412
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5990639729189606,
      "learning_rate": 1.3627766049739633e-06,
      "loss": 0.3079,
      "step": 1413
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3712178342874344,
      "learning_rate": 1.3619211863891456e-06,
      "loss": 0.3038,
      "step": 1414
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.524361510330734,
      "learning_rate": 1.3610654629446936e-06,
      "loss": 0.336,
      "step": 1415
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0728242696264614,
      "learning_rate": 1.3602094353614158e-06,
      "loss": 0.2532,
      "step": 1416
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3929540560511,
      "learning_rate": 1.3593531043603755e-06,
      "loss": 0.2928,
      "step": 1417
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2288066754224736,
      "learning_rate": 1.3584964706628921e-06,
      "loss": 0.2806,
      "step": 1418
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3635592025311642,
      "learning_rate": 1.35763953499054e-06,
      "loss": 0.2875,
      "step": 1419
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3988417976914302,
      "learning_rate": 1.356782298065148e-06,
      "loss": 0.2665,
      "step": 1420
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3297542920898744,
      "learning_rate": 1.3559247606087984e-06,
      "loss": 0.2878,
      "step": 1421
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.326689143317108,
      "learning_rate": 1.355066923343827e-06,
      "loss": 0.2892,
      "step": 1422
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3010274507614974,
      "learning_rate": 1.3542087869928213e-06,
      "loss": 0.2607,
      "step": 1423
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6613557163218498,
      "learning_rate": 1.3533503522786223e-06,
      "loss": 0.3039,
      "step": 1424
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4466499629846106,
      "learning_rate": 1.3524916199243208e-06,
      "loss": 0.3095,
      "step": 1425
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.547045510727559,
      "learning_rate": 1.351632590653259e-06,
      "loss": 0.28,
      "step": 1426
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2667498039894647,
      "learning_rate": 1.3507732651890291e-06,
      "loss": 0.2992,
      "step": 1427
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.49008724136304,
      "learning_rate": 1.349913644255473e-06,
      "loss": 0.279,
      "step": 1428
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.719503341700533,
      "learning_rate": 1.3490537285766808e-06,
      "loss": 0.2948,
      "step": 1429
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4092554173724507,
      "learning_rate": 1.3481935188769917e-06,
      "loss": 0.2996,
      "step": 1430
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4216625872037585,
      "learning_rate": 1.3473330158809924e-06,
      "loss": 0.282,
      "step": 1431
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1808475970040537,
      "learning_rate": 1.346472220313516e-06,
      "loss": 0.2887,
      "step": 1432
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3738790822075817,
      "learning_rate": 1.3456111328996428e-06,
      "loss": 0.2629,
      "step": 1433
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.461742161075914,
      "learning_rate": 1.344749754364699e-06,
      "loss": 0.3071,
      "step": 1434
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.458297651337914,
      "learning_rate": 1.343888085434255e-06,
      "loss": 0.3058,
      "step": 1435
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3528781827973866,
      "learning_rate": 1.343026126834127e-06,
      "loss": 0.2796,
      "step": 1436
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4443019851876113,
      "learning_rate": 1.3421638792903743e-06,
      "loss": 0.3051,
      "step": 1437
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4157361534282247,
      "learning_rate": 1.3413013435293002e-06,
      "loss": 0.3129,
      "step": 1438
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.6256696001134663,
      "learning_rate": 1.3404385202774504e-06,
      "loss": 0.3422,
      "step": 1439
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.494653539752748,
      "learning_rate": 1.3395754102616133e-06,
      "loss": 0.3017,
      "step": 1440
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.533547772461133,
      "learning_rate": 1.338712014208818e-06,
      "loss": 0.3079,
      "step": 1441
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2545374483944847,
      "learning_rate": 1.3378483328463351e-06,
      "loss": 0.2881,
      "step": 1442
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2384615081575245,
      "learning_rate": 1.3369843669016756e-06,
      "loss": 0.2581,
      "step": 1443
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3973511771959712,
      "learning_rate": 1.33612011710259e-06,
      "loss": 0.2767,
      "step": 1444
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.6139147945528984,
      "learning_rate": 1.335255584177068e-06,
      "loss": 0.3135,
      "step": 1445
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5360027379877477,
      "learning_rate": 1.3343907688533375e-06,
      "loss": 0.2835,
      "step": 1446
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.338124638027161,
      "learning_rate": 1.333525671859865e-06,
      "loss": 0.2852,
      "step": 1447
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4812362246871094,
      "learning_rate": 1.332660293925353e-06,
      "loss": 0.3102,
      "step": 1448
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.444894602358509,
      "learning_rate": 1.3317946357787424e-06,
      "loss": 0.302,
      "step": 1449
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3811549830187424,
      "learning_rate": 1.3309286981492082e-06,
      "loss": 0.2827,
      "step": 1450
    },
    {
      "epoch": 0.41,
      "grad_norm": 4.15688641411048,
      "learning_rate": 1.3300624817661626e-06,
      "loss": 0.2674,
      "step": 1451
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.365390398206374,
      "learning_rate": 1.3291959873592507e-06,
      "loss": 0.2953,
      "step": 1452
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3143132163507927,
      "learning_rate": 1.328329215658354e-06,
      "loss": 0.2969,
      "step": 1453
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3680841714427667,
      "learning_rate": 1.327462167393586e-06,
      "loss": 0.2858,
      "step": 1454
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4333096082055556,
      "learning_rate": 1.3265948432952934e-06,
      "loss": 0.3119,
      "step": 1455
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3979577441652404,
      "learning_rate": 1.3257272440940556e-06,
      "loss": 0.2716,
      "step": 1456
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.780283482227355,
      "learning_rate": 1.3248593705206837e-06,
      "loss": 0.3097,
      "step": 1457
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4727707069996874,
      "learning_rate": 1.3239912233062195e-06,
      "loss": 0.2949,
      "step": 1458
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.605144018890558,
      "learning_rate": 1.3231228031819358e-06,
      "loss": 0.3038,
      "step": 1459
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.30781291279654,
      "learning_rate": 1.322254110879335e-06,
      "loss": 0.3085,
      "step": 1460
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4557382603995013,
      "learning_rate": 1.321385147130149e-06,
      "loss": 0.3176,
      "step": 1461
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.480506129867659,
      "learning_rate": 1.320515912666338e-06,
      "loss": 0.3305,
      "step": 1462
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5727163800612325,
      "learning_rate": 1.3196464082200901e-06,
      "loss": 0.2969,
      "step": 1463
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1255081785167396,
      "learning_rate": 1.318776634523822e-06,
      "loss": 0.2373,
      "step": 1464
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5116755100727595,
      "learning_rate": 1.3179065923101757e-06,
      "loss": 0.2783,
      "step": 1465
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2406852442419862,
      "learning_rate": 1.3170362823120202e-06,
      "loss": 0.2865,
      "step": 1466
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.357655391327485,
      "learning_rate": 1.3161657052624496e-06,
      "loss": 0.2699,
      "step": 1467
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4125721643155313,
      "learning_rate": 1.3152948618947836e-06,
      "loss": 0.315,
      "step": 1468
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5164883344248845,
      "learning_rate": 1.3144237529425652e-06,
      "loss": 0.3061,
      "step": 1469
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3188123708680113,
      "learning_rate": 1.313552379139563e-06,
      "loss": 0.2862,
      "step": 1470
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3745493691138244,
      "learning_rate": 1.3126807412197664e-06,
      "loss": 0.3067,
      "step": 1471
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.50456059069696,
      "learning_rate": 1.3118088399173886e-06,
      "loss": 0.2483,
      "step": 1472
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4560490015824437,
      "learning_rate": 1.3109366759668646e-06,
      "loss": 0.2723,
      "step": 1473
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5134191311678173,
      "learning_rate": 1.31006425010285e-06,
      "loss": 0.3037,
      "step": 1474
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3293536029879425,
      "learning_rate": 1.3091915630602222e-06,
      "loss": 0.2851,
      "step": 1475
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3190242087963773,
      "learning_rate": 1.308318615574077e-06,
      "loss": 0.2658,
      "step": 1476
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3274324770049466,
      "learning_rate": 1.3074454083797307e-06,
      "loss": 0.2773,
      "step": 1477
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4771135962414292,
      "learning_rate": 1.3065719422127185e-06,
      "loss": 0.302,
      "step": 1478
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3441652217311866,
      "learning_rate": 1.3056982178087933e-06,
      "loss": 0.291,
      "step": 1479
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5380690988427403,
      "learning_rate": 1.3048242359039247e-06,
      "loss": 0.3169,
      "step": 1480
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2385674340764274,
      "learning_rate": 1.303949997234301e-06,
      "loss": 0.2914,
      "step": 1481
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5364022580181365,
      "learning_rate": 1.3030755025363255e-06,
      "loss": 0.306,
      "step": 1482
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.446531539254504,
      "learning_rate": 1.3022007525466179e-06,
      "loss": 0.336,
      "step": 1483
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4545408603051793,
      "learning_rate": 1.3013257480020114e-06,
      "loss": 0.2919,
      "step": 1484
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3489981204127623,
      "learning_rate": 1.3004504896395562e-06,
      "loss": 0.2909,
      "step": 1485
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.3814888503667264,
      "learning_rate": 1.2995749781965136e-06,
      "loss": 0.2893,
      "step": 1486
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.419910649136688,
      "learning_rate": 1.2986992144103606e-06,
      "loss": 0.2879,
      "step": 1487
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.619557910339679,
      "learning_rate": 1.2978231990187847e-06,
      "loss": 0.2802,
      "step": 1488
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2204889549475744,
      "learning_rate": 1.2969469327596859e-06,
      "loss": 0.2897,
      "step": 1489
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.38645753292471,
      "learning_rate": 1.2960704163711766e-06,
      "loss": 0.2963,
      "step": 1490
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.47586048779542,
      "learning_rate": 1.2951936505915781e-06,
      "loss": 0.2775,
      "step": 1491
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.406803415941365,
      "learning_rate": 1.294316636159424e-06,
      "loss": 0.2906,
      "step": 1492
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3808072486060525,
      "learning_rate": 1.2934393738134546e-06,
      "loss": 0.299,
      "step": 1493
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5512356401014133,
      "learning_rate": 1.2925618642926218e-06,
      "loss": 0.3172,
      "step": 1494
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.160246048496873,
      "learning_rate": 1.2916841083360834e-06,
      "loss": 0.268,
      "step": 1495
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.537746401420283,
      "learning_rate": 1.2908061066832063e-06,
      "loss": 0.2998,
      "step": 1496
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.522608636766397,
      "learning_rate": 1.289927860073564e-06,
      "loss": 0.2946,
      "step": 1497
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.371012661692452,
      "learning_rate": 1.2890493692469356e-06,
      "loss": 0.3024,
      "step": 1498
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.450688539105884,
      "learning_rate": 1.2881706349433067e-06,
      "loss": 0.2836,
      "step": 1499
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4682318267739607,
      "learning_rate": 1.2872916579028684e-06,
      "loss": 0.3102,
      "step": 1500
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.43757520685359,
      "learning_rate": 1.2864124388660146e-06,
      "loss": 0.2905,
      "step": 1501
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.790709881138362,
      "learning_rate": 1.2855329785733452e-06,
      "loss": 0.3042,
      "step": 1502
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4479829710987713,
      "learning_rate": 1.2846532777656612e-06,
      "loss": 0.2789,
      "step": 1503
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.375342138833745,
      "learning_rate": 1.2837733371839678e-06,
      "loss": 0.2797,
      "step": 1504
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2951167449970327,
      "learning_rate": 1.2828931575694718e-06,
      "loss": 0.2895,
      "step": 1505
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4220238351531425,
      "learning_rate": 1.2820127396635801e-06,
      "loss": 0.2767,
      "step": 1506
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.390590180143157,
      "learning_rate": 1.2811320842079026e-06,
      "loss": 0.2845,
      "step": 1507
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.227092280010057,
      "learning_rate": 1.2802511919442468e-06,
      "loss": 0.2727,
      "step": 1508
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.9350175829138374,
      "learning_rate": 1.279370063614622e-06,
      "loss": 0.3165,
      "step": 1509
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3556655543461025,
      "learning_rate": 1.2784886999612347e-06,
      "loss": 0.2972,
      "step": 1510
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4145727803902792,
      "learning_rate": 1.2776071017264907e-06,
      "loss": 0.3229,
      "step": 1511
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2701438676974655,
      "learning_rate": 1.276725269652992e-06,
      "loss": 0.2972,
      "step": 1512
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.279322336462779,
      "learning_rate": 1.275843204483539e-06,
      "loss": 0.2831,
      "step": 1513
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2549254803317544,
      "learning_rate": 1.274960906961128e-06,
      "loss": 0.2961,
      "step": 1514
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.410217984272276,
      "learning_rate": 1.2740783778289505e-06,
      "loss": 0.3046,
      "step": 1515
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5502149712168185,
      "learning_rate": 1.273195617830394e-06,
      "loss": 0.2885,
      "step": 1516
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1287324402745083,
      "learning_rate": 1.2723126277090395e-06,
      "loss": 0.2714,
      "step": 1517
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5024724957081674,
      "learning_rate": 1.2714294082086627e-06,
      "loss": 0.3027,
      "step": 1518
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.403173741361718,
      "learning_rate": 1.2705459600732317e-06,
      "loss": 0.3082,
      "step": 1519
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.338355201173731,
      "learning_rate": 1.2696622840469081e-06,
      "loss": 0.2848,
      "step": 1520
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3525510876983664,
      "learning_rate": 1.2687783808740448e-06,
      "loss": 0.2715,
      "step": 1521
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.280452801928687,
      "learning_rate": 1.2678942512991864e-06,
      "loss": 0.272,
      "step": 1522
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2859179747537994,
      "learning_rate": 1.2670098960670674e-06,
      "loss": 0.2747,
      "step": 1523
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.303075929644119,
      "learning_rate": 1.2661253159226138e-06,
      "loss": 0.2929,
      "step": 1524
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3018200914697458,
      "learning_rate": 1.2652405116109393e-06,
      "loss": 0.3046,
      "step": 1525
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8220611835937754,
      "learning_rate": 1.2643554838773486e-06,
      "loss": 0.3014,
      "step": 1526
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3777860887932234,
      "learning_rate": 1.263470233467332e-06,
      "loss": 0.3024,
      "step": 1527
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.397804821536525,
      "learning_rate": 1.26258476112657e-06,
      "loss": 0.3062,
      "step": 1528
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.218905959145441,
      "learning_rate": 1.261699067600928e-06,
      "loss": 0.2794,
      "step": 1529
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.463662208750161,
      "learning_rate": 1.2608131536364589e-06,
      "loss": 0.285,
      "step": 1530
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.336920618973469,
      "learning_rate": 1.2599270199794006e-06,
      "loss": 0.2871,
      "step": 1531
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4511676946774426,
      "learning_rate": 1.259040667376176e-06,
      "loss": 0.2744,
      "step": 1532
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.445524281717222,
      "learning_rate": 1.2581540965733939e-06,
      "loss": 0.3044,
      "step": 1533
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.481605303885608,
      "learning_rate": 1.2572673083178447e-06,
      "loss": 0.2677,
      "step": 1534
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3038645377681823,
      "learning_rate": 1.2563803033565032e-06,
      "loss": 0.2978,
      "step": 1535
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.521409954147198,
      "learning_rate": 1.255493082436527e-06,
      "loss": 0.3202,
      "step": 1536
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3247669770557176,
      "learning_rate": 1.2546056463052548e-06,
      "loss": 0.2797,
      "step": 1537
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.284580868757689,
      "learning_rate": 1.2537179957102074e-06,
      "loss": 0.2746,
      "step": 1538
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.3220609927468523,
      "learning_rate": 1.2528301313990853e-06,
      "loss": 0.2953,
      "step": 1539
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3514220186854273,
      "learning_rate": 1.2519420541197693e-06,
      "loss": 0.2994,
      "step": 1540
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6177756144523845,
      "learning_rate": 1.2510537646203207e-06,
      "loss": 0.2957,
      "step": 1541
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3045538164229953,
      "learning_rate": 1.2501652636489778e-06,
      "loss": 0.2742,
      "step": 1542
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5292210544986977,
      "learning_rate": 1.249276551954159e-06,
      "loss": 0.2926,
      "step": 1543
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.397955383256873,
      "learning_rate": 1.2483876302844578e-06,
      "loss": 0.31,
      "step": 1544
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4427886972425847,
      "learning_rate": 1.2474984993886465e-06,
      "loss": 0.3102,
      "step": 1545
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.494204157300474,
      "learning_rate": 1.2466091600156733e-06,
      "loss": 0.2729,
      "step": 1546
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.332220076619459,
      "learning_rate": 1.2457196129146615e-06,
      "loss": 0.2446,
      "step": 1547
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2296539060987866,
      "learning_rate": 1.2448298588349096e-06,
      "loss": 0.2756,
      "step": 1548
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4852006453223505,
      "learning_rate": 1.2439398985258897e-06,
      "loss": 0.3134,
      "step": 1549
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4450105242836138,
      "learning_rate": 1.24304973273725e-06,
      "loss": 0.2894,
      "step": 1550
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1789569148926184,
      "learning_rate": 1.2421593622188086e-06,
      "loss": 0.2926,
      "step": 1551
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.125580836250774,
      "learning_rate": 1.2412687877205585e-06,
      "loss": 0.2825,
      "step": 1552
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.419826962755061,
      "learning_rate": 1.2403780099926633e-06,
      "loss": 0.2975,
      "step": 1553
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.335424495673667,
      "learning_rate": 1.2394870297854581e-06,
      "loss": 0.2841,
      "step": 1554
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1971134981537985,
      "learning_rate": 1.2385958478494484e-06,
      "loss": 0.3257,
      "step": 1555
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6420619435757744,
      "learning_rate": 1.2377044649353102e-06,
      "loss": 0.3272,
      "step": 1556
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6924042663801266,
      "learning_rate": 1.2368128817938882e-06,
      "loss": 0.2752,
      "step": 1557
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3389639420764436,
      "learning_rate": 1.2359210991761956e-06,
      "loss": 0.2989,
      "step": 1558
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.466397462305249,
      "learning_rate": 1.2350291178334144e-06,
      "loss": 0.2997,
      "step": 1559
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.448220145119163,
      "learning_rate": 1.2341369385168935e-06,
      "loss": 0.2712,
      "step": 1560
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.45491872341095,
      "learning_rate": 1.2332445619781489e-06,
      "loss": 0.2982,
      "step": 1561
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.397589962456345,
      "learning_rate": 1.2323519889688614e-06,
      "loss": 0.2792,
      "step": 1562
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3142801661017898,
      "learning_rate": 1.2314592202408795e-06,
      "loss": 0.2755,
      "step": 1563
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.527326221140043,
      "learning_rate": 1.2305662565462144e-06,
      "loss": 0.2994,
      "step": 1564
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3697314137556154,
      "learning_rate": 1.2296730986370436e-06,
      "loss": 0.2737,
      "step": 1565
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.8244358950556676,
      "learning_rate": 1.2287797472657063e-06,
      "loss": 0.2652,
      "step": 1566
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5992271577632464,
      "learning_rate": 1.2278862031847059e-06,
      "loss": 0.3089,
      "step": 1567
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2422228745894803,
      "learning_rate": 1.2269924671467073e-06,
      "loss": 0.2793,
      "step": 1568
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6203714518020855,
      "learning_rate": 1.226098539904538e-06,
      "loss": 0.317,
      "step": 1569
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.616453062567964,
      "learning_rate": 1.2252044222111857e-06,
      "loss": 0.2953,
      "step": 1570
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.160665725191548,
      "learning_rate": 1.2243101148197989e-06,
      "loss": 0.2658,
      "step": 1571
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2669549608244024,
      "learning_rate": 1.223415618483686e-06,
      "loss": 0.2762,
      "step": 1572
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1309593576379817,
      "learning_rate": 1.2225209339563143e-06,
      "loss": 0.2691,
      "step": 1573
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.502580046773122,
      "learning_rate": 1.22162606199131e-06,
      "loss": 0.3064,
      "step": 1574
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.183455320755513,
      "learning_rate": 1.2207310033424566e-06,
      "loss": 0.2644,
      "step": 1575
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0960195827035477,
      "learning_rate": 1.2198357587636956e-06,
      "loss": 0.2653,
      "step": 1576
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.8250176006360355,
      "learning_rate": 1.2189403290091244e-06,
      "loss": 0.3265,
      "step": 1577
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3678674723497974,
      "learning_rate": 1.218044714832997e-06,
      "loss": 0.2833,
      "step": 1578
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2569556247463827,
      "learning_rate": 1.2171489169897215e-06,
      "loss": 0.2895,
      "step": 1579
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.812494247020307,
      "learning_rate": 1.2162529362338631e-06,
      "loss": 0.3053,
      "step": 1580
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2127372881951244,
      "learning_rate": 1.2153567733201383e-06,
      "loss": 0.2867,
      "step": 1581
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.426303976237553,
      "learning_rate": 1.214460429003419e-06,
      "loss": 0.2647,
      "step": 1582
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.39029073034326,
      "learning_rate": 1.213563904038729e-06,
      "loss": 0.3349,
      "step": 1583
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3123384990964833,
      "learning_rate": 1.2126671991812447e-06,
      "loss": 0.2903,
      "step": 1584
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3166156240565745,
      "learning_rate": 1.2117703151862939e-06,
      "loss": 0.2993,
      "step": 1585
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4677299405381614,
      "learning_rate": 1.2108732528093549e-06,
      "loss": 0.3073,
      "step": 1586
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4192821491062335,
      "learning_rate": 1.209976012806057e-06,
      "loss": 0.299,
      "step": 1587
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4357059330636432,
      "learning_rate": 1.2090785959321781e-06,
      "loss": 0.2646,
      "step": 1588
    },
    {
      "epoch": 0.45,
      "grad_norm": 5.522395881502279,
      "learning_rate": 1.2081810029436468e-06,
      "loss": 0.2937,
      "step": 1589
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.321652531861596,
      "learning_rate": 1.207283234596538e-06,
      "loss": 0.3025,
      "step": 1590
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.446193398181088,
      "learning_rate": 1.2063852916470753e-06,
      "loss": 0.3041,
      "step": 1591
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.551171821649562,
      "learning_rate": 1.20548717485163e-06,
      "loss": 0.3013,
      "step": 1592
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2392588623142875,
      "learning_rate": 1.2045888849667185e-06,
      "loss": 0.2993,
      "step": 1593
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.377514804296935,
      "learning_rate": 1.2036904227490041e-06,
      "loss": 0.3237,
      "step": 1594
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4836421658846763,
      "learning_rate": 1.202791788955295e-06,
      "loss": 0.2805,
      "step": 1595
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4812149257453022,
      "learning_rate": 1.2018929843425427e-06,
      "loss": 0.29,
      "step": 1596
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7616563865201336,
      "learning_rate": 1.200994009667845e-06,
      "loss": 0.2956,
      "step": 1597
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.75907227360528,
      "learning_rate": 1.2000948656884407e-06,
      "loss": 0.3059,
      "step": 1598
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4106964339523826,
      "learning_rate": 1.1991955531617123e-06,
      "loss": 0.304,
      "step": 1599
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4572321494745086,
      "learning_rate": 1.1982960728451845e-06,
      "loss": 0.3002,
      "step": 1600
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3147931678781486,
      "learning_rate": 1.1973964254965223e-06,
      "loss": 0.2984,
      "step": 1601
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.162842212080869,
      "learning_rate": 1.196496611873533e-06,
      "loss": 0.2708,
      "step": 1602
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.439223299123657,
      "learning_rate": 1.1955966327341613e-06,
      "loss": 0.2816,
      "step": 1603
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.270140418104315,
      "learning_rate": 1.1946964888364947e-06,
      "loss": 0.2834,
      "step": 1604
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2872796726584594,
      "learning_rate": 1.1937961809387567e-06,
      "loss": 0.2927,
      "step": 1605
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.430768331384572,
      "learning_rate": 1.192895709799311e-06,
      "loss": 0.2615,
      "step": 1606
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5802386722827864,
      "learning_rate": 1.1919950761766567e-06,
      "loss": 0.2601,
      "step": 1607
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.303315690496668,
      "learning_rate": 1.1910942808294313e-06,
      "loss": 0.3003,
      "step": 1608
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.401745013844011,
      "learning_rate": 1.1901933245164084e-06,
      "loss": 0.2738,
      "step": 1609
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2269512971196703,
      "learning_rate": 1.189292207996497e-06,
      "loss": 0.2859,
      "step": 1610
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5530381476933846,
      "learning_rate": 1.1883909320287403e-06,
      "loss": 0.3276,
      "step": 1611
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.552181480306632,
      "learning_rate": 1.1874894973723171e-06,
      "loss": 0.2945,
      "step": 1612
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4277394972003403,
      "learning_rate": 1.1865879047865389e-06,
      "loss": 0.3273,
      "step": 1613
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.304486641989715,
      "learning_rate": 1.1856861550308506e-06,
      "loss": 0.2731,
      "step": 1614
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6788843226324546,
      "learning_rate": 1.1847842488648294e-06,
      "loss": 0.3032,
      "step": 1615
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3709010265119104,
      "learning_rate": 1.1838821870481846e-06,
      "loss": 0.2952,
      "step": 1616
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3812997004982766,
      "learning_rate": 1.1829799703407562e-06,
      "loss": 0.2854,
      "step": 1617
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4542133115233544,
      "learning_rate": 1.1820775995025146e-06,
      "loss": 0.3001,
      "step": 1618
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2291860735070586,
      "learning_rate": 1.1811750752935604e-06,
      "loss": 0.2829,
      "step": 1619
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3315720405872233,
      "learning_rate": 1.1802723984741227e-06,
      "loss": 0.2916,
      "step": 1620
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.284866993118778,
      "learning_rate": 1.1793695698045605e-06,
      "loss": 0.2921,
      "step": 1621
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.38603967727056,
      "learning_rate": 1.1784665900453592e-06,
      "loss": 0.3035,
      "step": 1622
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3086528982281633,
      "learning_rate": 1.1775634599571325e-06,
      "loss": 0.2809,
      "step": 1623
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.355332925064751,
      "learning_rate": 1.1766601803006201e-06,
      "loss": 0.2996,
      "step": 1624
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.897899160265012,
      "learning_rate": 1.1757567518366883e-06,
      "loss": 0.2976,
      "step": 1625
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.366508867197264,
      "learning_rate": 1.174853175326328e-06,
      "loss": 0.2954,
      "step": 1626
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4458650264482884,
      "learning_rate": 1.1739494515306552e-06,
      "loss": 0.2743,
      "step": 1627
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.878447090985886,
      "learning_rate": 1.17304558121091e-06,
      "loss": 0.2948,
      "step": 1628
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3919404467561116,
      "learning_rate": 1.1721415651284564e-06,
      "loss": 0.296,
      "step": 1629
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4843121301414333,
      "learning_rate": 1.1712374040447801e-06,
      "loss": 0.2939,
      "step": 1630
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.443680664040836,
      "learning_rate": 1.1703330987214896e-06,
      "loss": 0.2894,
      "step": 1631
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4771653111875067,
      "learning_rate": 1.1694286499203147e-06,
      "loss": 0.3024,
      "step": 1632
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.492661747284556,
      "learning_rate": 1.1685240584031067e-06,
      "loss": 0.2502,
      "step": 1633
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.422281617887777,
      "learning_rate": 1.1676193249318358e-06,
      "loss": 0.2729,
      "step": 1634
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2196613564768586,
      "learning_rate": 1.166714450268593e-06,
      "loss": 0.261,
      "step": 1635
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.410642664529523,
      "learning_rate": 1.165809435175588e-06,
      "loss": 0.2686,
      "step": 1636
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4144309428272397,
      "learning_rate": 1.164904280415148e-06,
      "loss": 0.2752,
      "step": 1637
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.6694333385746716,
      "learning_rate": 1.163998986749719e-06,
      "loss": 0.2774,
      "step": 1638
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.304174111118199,
      "learning_rate": 1.1630935549418626e-06,
      "loss": 0.2836,
      "step": 1639
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4381870026370938,
      "learning_rate": 1.1621879857542585e-06,
      "loss": 0.281,
      "step": 1640
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.435408423357395,
      "learning_rate": 1.1612822799497005e-06,
      "loss": 0.2797,
      "step": 1641
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1859105113726507,
      "learning_rate": 1.1603764382910988e-06,
      "loss": 0.2905,
      "step": 1642
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.478183699921075,
      "learning_rate": 1.1594704615414768e-06,
      "loss": 0.2683,
      "step": 1643
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3890470350734123,
      "learning_rate": 1.1585643504639725e-06,
      "loss": 0.2967,
      "step": 1644
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5253388824450242,
      "learning_rate": 1.1576581058218372e-06,
      "loss": 0.3048,
      "step": 1645
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.295525160535059,
      "learning_rate": 1.1567517283784343e-06,
      "loss": 0.2689,
      "step": 1646
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4418315746970265,
      "learning_rate": 1.1558452188972384e-06,
      "loss": 0.2702,
      "step": 1647
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4257726301731095,
      "learning_rate": 1.154938578141837e-06,
      "loss": 0.2566,
      "step": 1648
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4880540461286063,
      "learning_rate": 1.1540318068759268e-06,
      "loss": 0.2707,
      "step": 1649
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.180860676840754,
      "learning_rate": 1.1531249058633147e-06,
      "loss": 0.2975,
      "step": 1650
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3279730202534044,
      "learning_rate": 1.152217875867917e-06,
      "loss": 0.3109,
      "step": 1651
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2238195956771283,
      "learning_rate": 1.151310717653759e-06,
      "loss": 0.2528,
      "step": 1652
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.443841308625575,
      "learning_rate": 1.150403431984974e-06,
      "loss": 0.3063,
      "step": 1653
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6189577221458014,
      "learning_rate": 1.1494960196258015e-06,
      "loss": 0.2925,
      "step": 1654
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.382589203135882,
      "learning_rate": 1.1485884813405891e-06,
      "loss": 0.3003,
      "step": 1655
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4163619522350466,
      "learning_rate": 1.1476808178937898e-06,
      "loss": 0.3021,
      "step": 1656
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.801855906017032,
      "learning_rate": 1.1467730300499624e-06,
      "loss": 0.2966,
      "step": 1657
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.072560421336879,
      "learning_rate": 1.1458651185737702e-06,
      "loss": 0.2517,
      "step": 1658
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3776984976754605,
      "learning_rate": 1.1449570842299803e-06,
      "loss": 0.2902,
      "step": 1659
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.272720150972345,
      "learning_rate": 1.1440489277834645e-06,
      "loss": 0.2967,
      "step": 1660
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.1113527639819614,
      "learning_rate": 1.1431406499991953e-06,
      "loss": 0.2833,
      "step": 1661
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.124494330550425,
      "learning_rate": 1.1422322516422505e-06,
      "loss": 0.2549,
      "step": 1662
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2187918477352206,
      "learning_rate": 1.1413237334778064e-06,
      "loss": 0.2724,
      "step": 1663
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.338783727753721,
      "learning_rate": 1.1404150962711416e-06,
      "loss": 0.2768,
      "step": 1664
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.268969273911241,
      "learning_rate": 1.1395063407876358e-06,
      "loss": 0.307,
      "step": 1665
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3939662990886927,
      "learning_rate": 1.1385974677927665e-06,
      "loss": 0.2903,
      "step": 1666
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3441718507444174,
      "learning_rate": 1.1376884780521116e-06,
      "loss": 0.2631,
      "step": 1667
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5386815434886256,
      "learning_rate": 1.1367793723313468e-06,
      "loss": 0.3002,
      "step": 1668
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.9721204904382397,
      "learning_rate": 1.1358701513962454e-06,
      "loss": 0.2851,
      "step": 1669
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.961021970694507,
      "learning_rate": 1.1349608160126783e-06,
      "loss": 0.3089,
      "step": 1670
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.281055601400845,
      "learning_rate": 1.1340513669466119e-06,
      "loss": 0.2629,
      "step": 1671
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3781198642027723,
      "learning_rate": 1.133141804964109e-06,
      "loss": 0.3255,
      "step": 1672
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2183111482419444,
      "learning_rate": 1.1322321308313277e-06,
      "loss": 0.2706,
      "step": 1673
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.251887744888779,
      "learning_rate": 1.13132234531452e-06,
      "loss": 0.2522,
      "step": 1674
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3499624185539907,
      "learning_rate": 1.130412449180032e-06,
      "loss": 0.306,
      "step": 1675
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.428096073028077,
      "learning_rate": 1.1295024431943028e-06,
      "loss": 0.2778,
      "step": 1676
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2110536927365874,
      "learning_rate": 1.1285923281238646e-06,
      "loss": 0.2743,
      "step": 1677
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.634455891084864,
      "learning_rate": 1.1276821047353401e-06,
      "loss": 0.3085,
      "step": 1678
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.8355465814347784,
      "learning_rate": 1.1267717737954458e-06,
      "loss": 0.2992,
      "step": 1679
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3347658200547374,
      "learning_rate": 1.1258613360709858e-06,
      "loss": 0.2813,
      "step": 1680
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.491899736342909,
      "learning_rate": 1.1249507923288561e-06,
      "loss": 0.2773,
      "step": 1681
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4424959895263525,
      "learning_rate": 1.1240401433360417e-06,
      "loss": 0.2641,
      "step": 1682
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.425874752306347,
      "learning_rate": 1.1231293898596153e-06,
      "loss": 0.2927,
      "step": 1683
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.373837340184451,
      "learning_rate": 1.1222185326667387e-06,
      "loss": 0.2625,
      "step": 1684
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.329312562974794,
      "learning_rate": 1.121307572524661e-06,
      "loss": 0.2582,
      "step": 1685
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3640722972664494,
      "learning_rate": 1.1203965102007173e-06,
      "loss": 0.2603,
      "step": 1686
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.396425754577803,
      "learning_rate": 1.1194853464623293e-06,
      "loss": 0.2633,
      "step": 1687
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.381954406906367,
      "learning_rate": 1.118574082077004e-06,
      "loss": 0.2834,
      "step": 1688
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3566730696052085,
      "learning_rate": 1.117662717812333e-06,
      "loss": 0.2863,
      "step": 1689
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.569134886147644,
      "learning_rate": 1.1167512544359927e-06,
      "loss": 0.3116,
      "step": 1690
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2502650418462724,
      "learning_rate": 1.115839692715742e-06,
      "loss": 0.2557,
      "step": 1691
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.124319603820594,
      "learning_rate": 1.1149280334194235e-06,
      "loss": 0.2509,
      "step": 1692
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3197689998035793,
      "learning_rate": 1.114016277314961e-06,
      "loss": 0.2843,
      "step": 1693
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3081780964531617,
      "learning_rate": 1.1131044251703615e-06,
      "loss": 0.2712,
      "step": 1694
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6681397012058037,
      "learning_rate": 1.1121924777537107e-06,
      "loss": 0.2887,
      "step": 1695
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.570603638430538,
      "learning_rate": 1.1112804358331765e-06,
      "loss": 0.2973,
      "step": 1696
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3482739754496023,
      "learning_rate": 1.1103683001770055e-06,
      "loss": 0.3073,
      "step": 1697
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5409530486963754,
      "learning_rate": 1.109456071553523e-06,
      "loss": 0.2721,
      "step": 1698
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.515926218025391,
      "learning_rate": 1.1085437507311338e-06,
      "loss": 0.3055,
      "step": 1699
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.297386792787777,
      "learning_rate": 1.1076313384783182e-06,
      "loss": 0.2715,
      "step": 1700
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2796020266024217,
      "learning_rate": 1.1067188355636366e-06,
      "loss": 0.2703,
      "step": 1701
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.324970919155172,
      "learning_rate": 1.1058062427557228e-06,
      "loss": 0.2629,
      "step": 1702
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.832132949007624,
      "learning_rate": 1.1048935608232878e-06,
      "loss": 0.3345,
      "step": 1703
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5880321537200777,
      "learning_rate": 1.1039807905351176e-06,
      "loss": 0.2845,
      "step": 1704
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.444606820574314,
      "learning_rate": 1.1030679326600725e-06,
      "loss": 0.2943,
      "step": 1705
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.216027659045018,
      "learning_rate": 1.1021549879670864e-06,
      "loss": 0.277,
      "step": 1706
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.36616902049424,
      "learning_rate": 1.1012419572251663e-06,
      "loss": 0.2683,
      "step": 1707
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.520596375978376,
      "learning_rate": 1.1003288412033923e-06,
      "loss": 0.2901,
      "step": 1708
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3720873886683127,
      "learning_rate": 1.0994156406709153e-06,
      "loss": 0.2916,
      "step": 1709
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.550717392273682,
      "learning_rate": 1.0985023563969584e-06,
      "loss": 0.2813,
      "step": 1710
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2948039992926934,
      "learning_rate": 1.0975889891508147e-06,
      "loss": 0.2802,
      "step": 1711
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.302247130619214,
      "learning_rate": 1.0966755397018472e-06,
      "loss": 0.2835,
      "step": 1712
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1751534784301647,
      "learning_rate": 1.0957620088194883e-06,
      "loss": 0.2757,
      "step": 1713
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4138394233404137,
      "learning_rate": 1.0948483972732395e-06,
      "loss": 0.2824,
      "step": 1714
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.203928367452892,
      "learning_rate": 1.0939347058326681e-06,
      "loss": 0.2812,
      "step": 1715
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.388362231374219,
      "learning_rate": 1.0930209352674123e-06,
      "loss": 0.3166,
      "step": 1716
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3369407790264196,
      "learning_rate": 1.0921070863471732e-06,
      "loss": 0.2883,
      "step": 1717
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.463710226139802,
      "learning_rate": 1.0911931598417209e-06,
      "loss": 0.2929,
      "step": 1718
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5950571987671425,
      "learning_rate": 1.0902791565208886e-06,
      "loss": 0.2898,
      "step": 1719
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4638825288289783,
      "learning_rate": 1.0893650771545756e-06,
      "loss": 0.2853,
      "step": 1720
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4506770789310464,
      "learning_rate": 1.0884509225127451e-06,
      "loss": 0.3009,
      "step": 1721
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2545658493991563,
      "learning_rate": 1.0875366933654231e-06,
      "loss": 0.2552,
      "step": 1722
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.568482399258855,
      "learning_rate": 1.0866223904826989e-06,
      "loss": 0.256,
      "step": 1723
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3860221317855297,
      "learning_rate": 1.0857080146347236e-06,
      "loss": 0.2599,
      "step": 1724
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.324595881561982,
      "learning_rate": 1.0847935665917098e-06,
      "loss": 0.2739,
      "step": 1725
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.8118642447032007,
      "learning_rate": 1.0838790471239311e-06,
      "loss": 0.3026,
      "step": 1726
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.384500504570064,
      "learning_rate": 1.0829644570017211e-06,
      "loss": 0.2669,
      "step": 1727
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.523962612237143,
      "learning_rate": 1.0820497969954731e-06,
      "loss": 0.295,
      "step": 1728
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.281525461610133,
      "learning_rate": 1.0811350678756391e-06,
      "loss": 0.2788,
      "step": 1729
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2065386887892777,
      "learning_rate": 1.0802202704127292e-06,
      "loss": 0.2514,
      "step": 1730
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.335004256597714,
      "learning_rate": 1.0793054053773117e-06,
      "loss": 0.2886,
      "step": 1731
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.422474441053778,
      "learning_rate": 1.0783904735400102e-06,
      "loss": 0.2989,
      "step": 1732
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3667128250142393,
      "learning_rate": 1.0774754756715071e-06,
      "loss": 0.2674,
      "step": 1733
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4721285965550446,
      "learning_rate": 1.0765604125425381e-06,
      "loss": 0.275,
      "step": 1734
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6296653925230213,
      "learning_rate": 1.0756452849238953e-06,
      "loss": 0.2941,
      "step": 1735
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3439950039178763,
      "learning_rate": 1.0747300935864243e-06,
      "loss": 0.2858,
      "step": 1736
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.417870316163827,
      "learning_rate": 1.0738148393010249e-06,
      "loss": 0.2902,
      "step": 1737
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3262702827912864,
      "learning_rate": 1.0728995228386495e-06,
      "loss": 0.275,
      "step": 1738
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.58841745004525,
      "learning_rate": 1.0719841449703033e-06,
      "loss": 0.2811,
      "step": 1739
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.380777299893438,
      "learning_rate": 1.071068706467043e-06,
      "loss": 0.2861,
      "step": 1740
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4556940889456893,
      "learning_rate": 1.070153208099976e-06,
      "loss": 0.3134,
      "step": 1741
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6412055647062096,
      "learning_rate": 1.0692376506402613e-06,
      "loss": 0.3369,
      "step": 1742
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.560785153064596,
      "learning_rate": 1.068322034859106e-06,
      "loss": 0.29,
      "step": 1743
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.531656496883173,
      "learning_rate": 1.067406361527768e-06,
      "loss": 0.2713,
      "step": 1744
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5381548769556077,
      "learning_rate": 1.0664906314175524e-06,
      "loss": 0.305,
      "step": 1745
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5206232944748304,
      "learning_rate": 1.0655748452998127e-06,
      "loss": 0.2925,
      "step": 1746
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.592877659877174,
      "learning_rate": 1.0646590039459499e-06,
      "loss": 0.3254,
      "step": 1747
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.415535301875715,
      "learning_rate": 1.0637431081274107e-06,
      "loss": 0.2762,
      "step": 1748
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.450107923002262,
      "learning_rate": 1.0628271586156878e-06,
      "loss": 0.2704,
      "step": 1749
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.414014331879564,
      "learning_rate": 1.0619111561823206e-06,
      "loss": 0.2876,
      "step": 1750
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3011402092648536,
      "learning_rate": 1.0609951015988904e-06,
      "loss": 0.2916,
      "step": 1751
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.296789517012798,
      "learning_rate": 1.0600789956370253e-06,
      "loss": 0.285,
      "step": 1752
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1437783622474975,
      "learning_rate": 1.0591628390683945e-06,
      "loss": 0.259,
      "step": 1753
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.396711139395835,
      "learning_rate": 1.0582466326647109e-06,
      "loss": 0.2865,
      "step": 1754
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.411830911114145,
      "learning_rate": 1.0573303771977288e-06,
      "loss": 0.2833,
      "step": 1755
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.40099593001122,
      "learning_rate": 1.0564140734392445e-06,
      "loss": 0.2596,
      "step": 1756
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3296913537453428,
      "learning_rate": 1.0554977221610948e-06,
      "loss": 0.2708,
      "step": 1757
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.536138383027427,
      "learning_rate": 1.0545813241351558e-06,
      "loss": 0.2789,
      "step": 1758
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3747634534579163,
      "learning_rate": 1.053664880133344e-06,
      "loss": 0.2982,
      "step": 1759
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5618665119525854,
      "learning_rate": 1.0527483909276142e-06,
      "loss": 0.317,
      "step": 1760
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4399883429176024,
      "learning_rate": 1.051831857289959e-06,
      "loss": 0.2937,
      "step": 1761
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.466100915953789,
      "learning_rate": 1.0509152799924084e-06,
      "loss": 0.3105,
      "step": 1762
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2869814441583154,
      "learning_rate": 1.0499986598070301e-06,
      "loss": 0.2699,
      "step": 1763
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3649098477306936,
      "learning_rate": 1.0490819975059267e-06,
      "loss": 0.2624,
      "step": 1764
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2203716949643275,
      "learning_rate": 1.0481652938612372e-06,
      "loss": 0.2749,
      "step": 1765
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.218764342181374,
      "learning_rate": 1.0472485496451347e-06,
      "loss": 0.2587,
      "step": 1766
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1979106434585445,
      "learning_rate": 1.0463317656298272e-06,
      "loss": 0.2564,
      "step": 1767
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.598649617483703,
      "learning_rate": 1.0454149425875558e-06,
      "loss": 0.307,
      "step": 1768
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4256848758871676,
      "learning_rate": 1.0444980812905944e-06,
      "loss": 0.2598,
      "step": 1769
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.799918173511259,
      "learning_rate": 1.0435811825112496e-06,
      "loss": 0.296,
      "step": 1770
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.825152276633744,
      "learning_rate": 1.0426642470218585e-06,
      "loss": 0.2572,
      "step": 1771
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2875491695406134,
      "learning_rate": 1.0417472755947908e-06,
      "loss": 0.2697,
      "step": 1772
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.338928267426616,
      "learning_rate": 1.0408302690024446e-06,
      "loss": 0.2905,
      "step": 1773
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.377571283990241,
      "learning_rate": 1.0399132280172493e-06,
      "loss": 0.2888,
      "step": 1774
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.494627386822755,
      "learning_rate": 1.038996153411662e-06,
      "loss": 0.3092,
      "step": 1775
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4803657304664886,
      "learning_rate": 1.0380790459581694e-06,
      "loss": 0.2933,
      "step": 1776
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.599391249742488,
      "learning_rate": 1.0371619064292842e-06,
      "loss": 0.2987,
      "step": 1777
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4867542348412894,
      "learning_rate": 1.0362447355975475e-06,
      "loss": 0.2618,
      "step": 1778
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.345277800331825,
      "learning_rate": 1.0353275342355262e-06,
      "loss": 0.2586,
      "step": 1779
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2953003810671726,
      "learning_rate": 1.034410303115813e-06,
      "loss": 0.2999,
      "step": 1780
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.40069451279197,
      "learning_rate": 1.0334930430110256e-06,
      "loss": 0.2897,
      "step": 1781
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4034742727959304,
      "learning_rate": 1.0325757546938066e-06,
      "loss": 0.3252,
      "step": 1782
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6249771671947753,
      "learning_rate": 1.0316584389368212e-06,
      "loss": 0.2917,
      "step": 1783
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.521429682335649,
      "learning_rate": 1.0307410965127594e-06,
      "loss": 0.266,
      "step": 1784
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4288395224203296,
      "learning_rate": 1.029823728194332e-06,
      "loss": 0.2769,
      "step": 1785
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.278568446029088,
      "learning_rate": 1.0289063347542726e-06,
      "loss": 0.2921,
      "step": 1786
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3818620321327395,
      "learning_rate": 1.0279889169653359e-06,
      "loss": 0.2805,
      "step": 1787
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.438023465912851,
      "learning_rate": 1.0270714756002965e-06,
      "loss": 0.3057,
      "step": 1788
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2677418993032314,
      "learning_rate": 1.0261540114319497e-06,
      "loss": 0.3,
      "step": 1789
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.204566597722765,
      "learning_rate": 1.0252365252331092e-06,
      "loss": 0.2801,
      "step": 1790
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4901371783556514,
      "learning_rate": 1.0243190177766084e-06,
      "loss": 0.2966,
      "step": 1791
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3665442696463894,
      "learning_rate": 1.0234014898352965e-06,
      "loss": 0.2915,
      "step": 1792
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3177504690621924,
      "learning_rate": 1.0224839421820426e-06,
      "loss": 0.2512,
      "step": 1793
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.423196847728315,
      "learning_rate": 1.0215663755897306e-06,
      "loss": 0.2942,
      "step": 1794
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3565389077019434,
      "learning_rate": 1.0206487908312607e-06,
      "loss": 0.2896,
      "step": 1795
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5055170626548535,
      "learning_rate": 1.0197311886795485e-06,
      "loss": 0.2973,
      "step": 1796
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.303580120594449,
      "learning_rate": 1.018813569907525e-06,
      "loss": 0.2792,
      "step": 1797
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2318800840205935,
      "learning_rate": 1.0178959352881335e-06,
      "loss": 0.2664,
      "step": 1798
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.404582126143902,
      "learning_rate": 1.0169782855943326e-06,
      "loss": 0.28,
      "step": 1799
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3223672277058265,
      "learning_rate": 1.016060621599092e-06,
      "loss": 0.2784,
      "step": 1800
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.211504993025335,
      "learning_rate": 1.0151429440753948e-06,
      "loss": 0.2583,
      "step": 1801
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1652312860171645,
      "learning_rate": 1.0142252537962338e-06,
      "loss": 0.2483,
      "step": 1802
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5690040507347525,
      "learning_rate": 1.0133075515346147e-06,
      "loss": 0.3247,
      "step": 1803
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3512850501355937,
      "learning_rate": 1.0123898380635514e-06,
      "loss": 0.2846,
      "step": 1804
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5307997781444462,
      "learning_rate": 1.0114721141560678e-06,
      "loss": 0.3021,
      "step": 1805
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3755600706842928,
      "learning_rate": 1.0105543805851975e-06,
      "loss": 0.2707,
      "step": 1806
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.510828516893168,
      "learning_rate": 1.0096366381239806e-06,
      "loss": 0.2939,
      "step": 1807
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2898278280269504,
      "learning_rate": 1.0087188875454668e-06,
      "loss": 0.2451,
      "step": 1808
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.449184623548092,
      "learning_rate": 1.0078011296227103e-06,
      "loss": 0.2667,
      "step": 1809
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4197885773422607,
      "learning_rate": 1.0068833651287733e-06,
      "loss": 0.2951,
      "step": 1810
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.536061699800912,
      "learning_rate": 1.0059655948367228e-06,
      "loss": 0.2738,
      "step": 1811
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.29823211800711,
      "learning_rate": 1.0050478195196302e-06,
      "loss": 0.3026,
      "step": 1812
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.131609519143086,
      "learning_rate": 1.0041300399505724e-06,
      "loss": 0.2666,
      "step": 1813
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2837693911189096,
      "learning_rate": 1.0032122569026281e-06,
      "loss": 0.2944,
      "step": 1814
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.382827289671159,
      "learning_rate": 1.0022944711488816e-06,
      "loss": 0.2907,
      "step": 1815
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3398481320326656,
      "learning_rate": 1.0013766834624167e-06,
      "loss": 0.2928,
      "step": 1816
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3567810281368526,
      "learning_rate": 1.0004588946163202e-06,
      "loss": 0.278,
      "step": 1817
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3774697747428126,
      "learning_rate": 9.995411053836797e-07,
      "loss": 0.317,
      "step": 1818
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5589790947949296,
      "learning_rate": 9.986233165375836e-07,
      "loss": 0.2779,
      "step": 1819
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.408172535398655,
      "learning_rate": 9.977055288511181e-07,
      "loss": 0.2812,
      "step": 1820
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.1184684834613012,
      "learning_rate": 9.967877430973716e-07,
      "loss": 0.296,
      "step": 1821
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3555209555113175,
      "learning_rate": 9.958699600494277e-07,
      "loss": 0.3002,
      "step": 1822
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2792496340161654,
      "learning_rate": 9.949521804803697e-07,
      "loss": 0.2675,
      "step": 1823
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.444601373370277,
      "learning_rate": 9.940344051632776e-07,
      "loss": 0.2925,
      "step": 1824
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4852374720528982,
      "learning_rate": 9.931166348712266e-07,
      "loss": 0.2627,
      "step": 1825
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7780433494306975,
      "learning_rate": 9.921988703772896e-07,
      "loss": 0.3006,
      "step": 1826
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.446401074333341,
      "learning_rate": 9.912811124545332e-07,
      "loss": 0.2667,
      "step": 1827
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3409932053933957,
      "learning_rate": 9.903633618760193e-07,
      "loss": 0.2746,
      "step": 1828
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.407708241012865,
      "learning_rate": 9.894456194148028e-07,
      "loss": 0.2602,
      "step": 1829
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.22125022141639,
      "learning_rate": 9.885278858439321e-07,
      "loss": 0.2698,
      "step": 1830
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6680271799964284,
      "learning_rate": 9.876101619364487e-07,
      "loss": 0.2795,
      "step": 1831
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4811890516289075,
      "learning_rate": 9.866924484653855e-07,
      "loss": 0.2979,
      "step": 1832
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2735370919447107,
      "learning_rate": 9.85774746203766e-07,
      "loss": 0.2601,
      "step": 1833
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3429662880966524,
      "learning_rate": 9.848570559246053e-07,
      "loss": 0.2813,
      "step": 1834
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.449821577681452,
      "learning_rate": 9.839393784009076e-07,
      "loss": 0.2968,
      "step": 1835
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.302224969895475,
      "learning_rate": 9.830217144056673e-07,
      "loss": 0.2868,
      "step": 1836
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.6831658109868886,
      "learning_rate": 9.821040647118664e-07,
      "loss": 0.2874,
      "step": 1837
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3434640511937035,
      "learning_rate": 9.811864300924752e-07,
      "loss": 0.2616,
      "step": 1838
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.337482263937847,
      "learning_rate": 9.802688113204516e-07,
      "loss": 0.2761,
      "step": 1839
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.089170981209531,
      "learning_rate": 9.793512091687394e-07,
      "loss": 0.2862,
      "step": 1840
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4252580066175775,
      "learning_rate": 9.784336244102695e-07,
      "loss": 0.3009,
      "step": 1841
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.259680761735452,
      "learning_rate": 9.775160578179573e-07,
      "loss": 0.297,
      "step": 1842
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.325015796073353,
      "learning_rate": 9.765985101647034e-07,
      "loss": 0.3074,
      "step": 1843
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.298833983394814,
      "learning_rate": 9.75680982223392e-07,
      "loss": 0.2627,
      "step": 1844
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5713981868206135,
      "learning_rate": 9.747634747668905e-07,
      "loss": 0.2899,
      "step": 1845
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.419747640232926,
      "learning_rate": 9.738459885680502e-07,
      "loss": 0.283,
      "step": 1846
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.355604327545243,
      "learning_rate": 9.729285243997036e-07,
      "loss": 0.2893,
      "step": 1847
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8604074410743534,
      "learning_rate": 9.720110830346642e-07,
      "loss": 0.2752,
      "step": 1848
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.351280307326712,
      "learning_rate": 9.710936652457275e-07,
      "loss": 0.2744,
      "step": 1849
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.392326574811654,
      "learning_rate": 9.70176271805668e-07,
      "loss": 0.2886,
      "step": 1850
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4560054450403612,
      "learning_rate": 9.692589034872408e-07,
      "loss": 0.2949,
      "step": 1851
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.333345087894509,
      "learning_rate": 9.683415610631787e-07,
      "loss": 0.3006,
      "step": 1852
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.384442083807228,
      "learning_rate": 9.674242453061935e-07,
      "loss": 0.2591,
      "step": 1853
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3865601767906246,
      "learning_rate": 9.66506956988974e-07,
      "loss": 0.2539,
      "step": 1854
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3401087584534266,
      "learning_rate": 9.655896968841872e-07,
      "loss": 0.2816,
      "step": 1855
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4510382444906633,
      "learning_rate": 9.64672465764474e-07,
      "loss": 0.2776,
      "step": 1856
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6222410730947283,
      "learning_rate": 9.637552644024526e-07,
      "loss": 0.2724,
      "step": 1857
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0489271774956777,
      "learning_rate": 9.62838093570716e-07,
      "loss": 0.2194,
      "step": 1858
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4784055906636544,
      "learning_rate": 9.619209540418306e-07,
      "loss": 0.2921,
      "step": 1859
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1713605870866304,
      "learning_rate": 9.610038465883376e-07,
      "loss": 0.2496,
      "step": 1860
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.39693101648829,
      "learning_rate": 9.600867719827506e-07,
      "loss": 0.2992,
      "step": 1861
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.229116705886729,
      "learning_rate": 9.591697309975555e-07,
      "loss": 0.2797,
      "step": 1862
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4326025652791423,
      "learning_rate": 9.582527244052094e-07,
      "loss": 0.2756,
      "step": 1863
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3303058887008707,
      "learning_rate": 9.573357529781414e-07,
      "loss": 0.2905,
      "step": 1864
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.234063548100955,
      "learning_rate": 9.564188174887503e-07,
      "loss": 0.266,
      "step": 1865
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2828800276675207,
      "learning_rate": 9.555019187094057e-07,
      "loss": 0.2989,
      "step": 1866
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6430152997452225,
      "learning_rate": 9.545850574124443e-07,
      "loss": 0.2702,
      "step": 1867
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4975260565196784,
      "learning_rate": 9.536682343701728e-07,
      "loss": 0.29,
      "step": 1868
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2169356324072207,
      "learning_rate": 9.527514503548651e-07,
      "loss": 0.2595,
      "step": 1869
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4171320151902154,
      "learning_rate": 9.518347061387627e-07,
      "loss": 0.288,
      "step": 1870
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4017512468814295,
      "learning_rate": 9.509180024940734e-07,
      "loss": 0.3166,
      "step": 1871
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.417658702684582,
      "learning_rate": 9.500013401929701e-07,
      "loss": 0.2598,
      "step": 1872
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4955418694217633,
      "learning_rate": 9.490847200075917e-07,
      "loss": 0.3007,
      "step": 1873
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.407401323867513,
      "learning_rate": 9.48168142710041e-07,
      "loss": 0.2828,
      "step": 1874
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3317221443277116,
      "learning_rate": 9.472516090723859e-07,
      "loss": 0.268,
      "step": 1875
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.633088826167416,
      "learning_rate": 9.463351198666559e-07,
      "loss": 0.3279,
      "step": 1876
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4694512144741725,
      "learning_rate": 9.454186758648443e-07,
      "loss": 0.3348,
      "step": 1877
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.427311040283762,
      "learning_rate": 9.445022778389056e-07,
      "loss": 0.2883,
      "step": 1878
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.254665154293914,
      "learning_rate": 9.435859265607554e-07,
      "loss": 0.2615,
      "step": 1879
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4736029632574907,
      "learning_rate": 9.426696228022713e-07,
      "loss": 0.2653,
      "step": 1880
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.358484562573563,
      "learning_rate": 9.417533673352893e-07,
      "loss": 0.2898,
      "step": 1881
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3562987141421656,
      "learning_rate": 9.408371609316058e-07,
      "loss": 0.2452,
      "step": 1882
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.256556790697905,
      "learning_rate": 9.39921004362975e-07,
      "loss": 0.2823,
      "step": 1883
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5861784387547755,
      "learning_rate": 9.390048984011094e-07,
      "loss": 0.2502,
      "step": 1884
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5766242409891094,
      "learning_rate": 9.380888438176795e-07,
      "loss": 0.3105,
      "step": 1885
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4315420005305888,
      "learning_rate": 9.37172841384312e-07,
      "loss": 0.282,
      "step": 1886
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7588805558007934,
      "learning_rate": 9.362568918725895e-07,
      "loss": 0.277,
      "step": 1887
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.420971784915736,
      "learning_rate": 9.353409960540505e-07,
      "loss": 0.3012,
      "step": 1888
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2747877360470765,
      "learning_rate": 9.344251547001871e-07,
      "loss": 0.2797,
      "step": 1889
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4181692064226716,
      "learning_rate": 9.335093685824476e-07,
      "loss": 0.2631,
      "step": 1890
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.340474100192766,
      "learning_rate": 9.325936384722321e-07,
      "loss": 0.2648,
      "step": 1891
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4228559382516854,
      "learning_rate": 9.316779651408939e-07,
      "loss": 0.2925,
      "step": 1892
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.523273040640516,
      "learning_rate": 9.307623493597387e-07,
      "loss": 0.2793,
      "step": 1893
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7145037184668444,
      "learning_rate": 9.29846791900024e-07,
      "loss": 0.315,
      "step": 1894
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4016932182830613,
      "learning_rate": 9.289312935329572e-07,
      "loss": 0.2689,
      "step": 1895
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5004609561755666,
      "learning_rate": 9.280158550296968e-07,
      "loss": 0.2841,
      "step": 1896
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2912353853003617,
      "learning_rate": 9.271004771613508e-07,
      "loss": 0.2632,
      "step": 1897
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.490373208906252,
      "learning_rate": 9.261851606989753e-07,
      "loss": 0.2796,
      "step": 1898
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3708068137158524,
      "learning_rate": 9.252699064135758e-07,
      "loss": 0.2935,
      "step": 1899
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.211291701138569,
      "learning_rate": 9.243547150761046e-07,
      "loss": 0.2753,
      "step": 1900
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.310505963420102,
      "learning_rate": 9.23439587457462e-07,
      "loss": 0.2807,
      "step": 1901
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2820307173012457,
      "learning_rate": 9.22524524328493e-07,
      "loss": 0.2828,
      "step": 1902
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1476374326857575,
      "learning_rate": 9.216095264599894e-07,
      "loss": 0.2673,
      "step": 1903
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4323240408845486,
      "learning_rate": 9.206945946226883e-07,
      "loss": 0.29,
      "step": 1904
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6627412851874475,
      "learning_rate": 9.197797295872708e-07,
      "loss": 0.2769,
      "step": 1905
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.530655011578067,
      "learning_rate": 9.188649321243609e-07,
      "loss": 0.3114,
      "step": 1906
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3937515846923914,
      "learning_rate": 9.179502030045269e-07,
      "loss": 0.2785,
      "step": 1907
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.323366450374401,
      "learning_rate": 9.170355429982787e-07,
      "loss": 0.282,
      "step": 1908
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.354831302609529,
      "learning_rate": 9.161209528760689e-07,
      "loss": 0.2577,
      "step": 1909
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.71004074994221,
      "learning_rate": 9.152064334082903e-07,
      "loss": 0.2879,
      "step": 1910
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4742529942991918,
      "learning_rate": 9.142919853652765e-07,
      "loss": 0.2716,
      "step": 1911
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1048299871316023,
      "learning_rate": 9.133776095173013e-07,
      "loss": 0.2557,
      "step": 1912
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.578248531071737,
      "learning_rate": 9.124633066345768e-07,
      "loss": 0.2945,
      "step": 1913
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.916797832424238,
      "learning_rate": 9.115490774872549e-07,
      "loss": 0.2802,
      "step": 1914
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.392195902194485,
      "learning_rate": 9.106349228454242e-07,
      "loss": 0.287,
      "step": 1915
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.37893310147143,
      "learning_rate": 9.097208434791116e-07,
      "loss": 0.2834,
      "step": 1916
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.287075599664048,
      "learning_rate": 9.088068401582795e-07,
      "loss": 0.2554,
      "step": 1917
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.416346774146093,
      "learning_rate": 9.078929136528267e-07,
      "loss": 0.2581,
      "step": 1918
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.353920820538036,
      "learning_rate": 9.069790647325878e-07,
      "loss": 0.2793,
      "step": 1919
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3798498329467104,
      "learning_rate": 9.060652941673317e-07,
      "loss": 0.2813,
      "step": 1920
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4914236732800843,
      "learning_rate": 9.05151602726761e-07,
      "loss": 0.3002,
      "step": 1921
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4316041066701413,
      "learning_rate": 9.042379911805116e-07,
      "loss": 0.2804,
      "step": 1922
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.639715234179413,
      "learning_rate": 9.033244602981525e-07,
      "loss": 0.2862,
      "step": 1923
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.241581435626847,
      "learning_rate": 9.024110108491853e-07,
      "loss": 0.2774,
      "step": 1924
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.856997759615627,
      "learning_rate": 9.014976436030416e-07,
      "loss": 0.2824,
      "step": 1925
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2585837498129946,
      "learning_rate": 9.005843593290847e-07,
      "loss": 0.2812,
      "step": 1926
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.6046519639500647,
      "learning_rate": 8.996711587966077e-07,
      "loss": 0.2876,
      "step": 1927
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4420328746980484,
      "learning_rate": 8.987580427748335e-07,
      "loss": 0.2839,
      "step": 1928
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3624950251203214,
      "learning_rate": 8.978450120329137e-07,
      "loss": 0.3064,
      "step": 1929
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3272518802728523,
      "learning_rate": 8.969320673399276e-07,
      "loss": 0.2786,
      "step": 1930
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2731877501718016,
      "learning_rate": 8.960192094648826e-07,
      "loss": 0.2765,
      "step": 1931
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.419747659121121,
      "learning_rate": 8.951064391767119e-07,
      "loss": 0.2528,
      "step": 1932
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.442953839847466,
      "learning_rate": 8.941937572442773e-07,
      "loss": 0.2231,
      "step": 1933
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.387408496361725,
      "learning_rate": 8.932811644363635e-07,
      "loss": 0.2664,
      "step": 1934
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4922207588724437,
      "learning_rate": 8.923686615216816e-07,
      "loss": 0.321,
      "step": 1935
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4252013919135114,
      "learning_rate": 8.914562492688666e-07,
      "loss": 0.3129,
      "step": 1936
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1682085963308153,
      "learning_rate": 8.905439284464769e-07,
      "loss": 0.2873,
      "step": 1937
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.416834129768717,
      "learning_rate": 8.896316998229946e-07,
      "loss": 0.2461,
      "step": 1938
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.195335513474169,
      "learning_rate": 8.887195641668234e-07,
      "loss": 0.2644,
      "step": 1939
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2883878593039366,
      "learning_rate": 8.878075222462895e-07,
      "loss": 0.2704,
      "step": 1940
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.787774022686844,
      "learning_rate": 8.86895574829639e-07,
      "loss": 0.2796,
      "step": 1941
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3051247213959707,
      "learning_rate": 8.859837226850388e-07,
      "loss": 0.2558,
      "step": 1942
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3063454413012825,
      "learning_rate": 8.850719665805766e-07,
      "loss": 0.2837,
      "step": 1943
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3273022288303054,
      "learning_rate": 8.841603072842581e-07,
      "loss": 0.2783,
      "step": 1944
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.255821650288142,
      "learning_rate": 8.832487455640074e-07,
      "loss": 0.2783,
      "step": 1945
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4717382690009164,
      "learning_rate": 8.823372821876671e-07,
      "loss": 0.2977,
      "step": 1946
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.257346400197328,
      "learning_rate": 8.814259179229959e-07,
      "loss": 0.2843,
      "step": 1947
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.222491787939881,
      "learning_rate": 8.805146535376708e-07,
      "loss": 0.2756,
      "step": 1948
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7443046514917997,
      "learning_rate": 8.796034897992828e-07,
      "loss": 0.3251,
      "step": 1949
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3189270403797373,
      "learning_rate": 8.78692427475339e-07,
      "loss": 0.2681,
      "step": 1950
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.319905410278452,
      "learning_rate": 8.777814673332614e-07,
      "loss": 0.2999,
      "step": 1951
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3315871825933216,
      "learning_rate": 8.768706101403847e-07,
      "loss": 0.2713,
      "step": 1952
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.585690032496667,
      "learning_rate": 8.759598566639586e-07,
      "loss": 0.3387,
      "step": 1953
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2257813847959205,
      "learning_rate": 8.750492076711439e-07,
      "loss": 0.2633,
      "step": 1954
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3438552082983675,
      "learning_rate": 8.741386639290144e-07,
      "loss": 0.2721,
      "step": 1955
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.381209291439057,
      "learning_rate": 8.732282262045545e-07,
      "loss": 0.294,
      "step": 1956
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.383293998015045,
      "learning_rate": 8.723178952646595e-07,
      "loss": 0.2625,
      "step": 1957
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3888215246027644,
      "learning_rate": 8.714076718761355e-07,
      "loss": 0.2833,
      "step": 1958
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.484679974607535,
      "learning_rate": 8.704975568056974e-07,
      "loss": 0.2762,
      "step": 1959
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.7262266793563943,
      "learning_rate": 8.695875508199682e-07,
      "loss": 0.2734,
      "step": 1960
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4748011500503755,
      "learning_rate": 8.686776546854799e-07,
      "loss": 0.2844,
      "step": 1961
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4158474466530744,
      "learning_rate": 8.677678691686721e-07,
      "loss": 0.2933,
      "step": 1962
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.365395676527773,
      "learning_rate": 8.668581950358909e-07,
      "loss": 0.2819,
      "step": 1963
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.308616132611635,
      "learning_rate": 8.659486330533881e-07,
      "loss": 0.2717,
      "step": 1964
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1834999486117583,
      "learning_rate": 8.650391839873217e-07,
      "loss": 0.2787,
      "step": 1965
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.461144597727408,
      "learning_rate": 8.641298486037543e-07,
      "loss": 0.3018,
      "step": 1966
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.80762751416142,
      "learning_rate": 8.632206276686532e-07,
      "loss": 0.292,
      "step": 1967
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.250135073102463,
      "learning_rate": 8.623115219478884e-07,
      "loss": 0.2788,
      "step": 1968
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.248508050847324,
      "learning_rate": 8.614025322072336e-07,
      "loss": 0.2968,
      "step": 1969
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1565106717006195,
      "learning_rate": 8.604936592123646e-07,
      "loss": 0.2758,
      "step": 1970
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3513480691328814,
      "learning_rate": 8.595849037288581e-07,
      "loss": 0.25,
      "step": 1971
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.564839835756159,
      "learning_rate": 8.586762665221938e-07,
      "loss": 0.2919,
      "step": 1972
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5031125040120625,
      "learning_rate": 8.577677483577496e-07,
      "loss": 0.3025,
      "step": 1973
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3757684711077345,
      "learning_rate": 8.568593500008046e-07,
      "loss": 0.2957,
      "step": 1974
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4814182838221437,
      "learning_rate": 8.559510722165359e-07,
      "loss": 0.296,
      "step": 1975
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1058424660903623,
      "learning_rate": 8.550429157700195e-07,
      "loss": 0.2587,
      "step": 1976
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3051111370602984,
      "learning_rate": 8.541348814262297e-07,
      "loss": 0.2773,
      "step": 1977
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.379656401763318,
      "learning_rate": 8.532269699500376e-07,
      "loss": 0.278,
      "step": 1978
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.128141672834981,
      "learning_rate": 8.523191821062101e-07,
      "loss": 0.2422,
      "step": 1979
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4154798555989068,
      "learning_rate": 8.51411518659411e-07,
      "loss": 0.3067,
      "step": 1980
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4147085149757053,
      "learning_rate": 8.505039803741985e-07,
      "loss": 0.285,
      "step": 1981
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.260022512654864,
      "learning_rate": 8.49596568015026e-07,
      "loss": 0.2586,
      "step": 1982
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.327455245309484,
      "learning_rate": 8.486892823462409e-07,
      "loss": 0.286,
      "step": 1983
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.500033776529244,
      "learning_rate": 8.47782124132083e-07,
      "loss": 0.2992,
      "step": 1984
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.444891884703191,
      "learning_rate": 8.468750941366858e-07,
      "loss": 0.2911,
      "step": 1985
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.284667592281283,
      "learning_rate": 8.459681931240732e-07,
      "loss": 0.2759,
      "step": 1986
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4878491891950203,
      "learning_rate": 8.45061421858163e-07,
      "loss": 0.2881,
      "step": 1987
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3148743275286594,
      "learning_rate": 8.441547811027614e-07,
      "loss": 0.2717,
      "step": 1988
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.246090838636193,
      "learning_rate": 8.432482716215661e-07,
      "loss": 0.2686,
      "step": 1989
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4017844104732218,
      "learning_rate": 8.423418941781628e-07,
      "loss": 0.2761,
      "step": 1990
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4218579296866674,
      "learning_rate": 8.414356495360273e-07,
      "loss": 0.3065,
      "step": 1991
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.341440126862159,
      "learning_rate": 8.405295384585231e-07,
      "loss": 0.2606,
      "step": 1992
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.292885993151981,
      "learning_rate": 8.396235617089012e-07,
      "loss": 0.2555,
      "step": 1993
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.341814398483769,
      "learning_rate": 8.387177200502995e-07,
      "loss": 0.266,
      "step": 1994
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.296872307085636,
      "learning_rate": 8.378120142457414e-07,
      "loss": 0.2583,
      "step": 1995
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3580787443847817,
      "learning_rate": 8.369064450581372e-07,
      "loss": 0.3061,
      "step": 1996
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3394656647930487,
      "learning_rate": 8.360010132502811e-07,
      "loss": 0.2603,
      "step": 1997
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.326461942946112,
      "learning_rate": 8.35095719584852e-07,
      "loss": 0.2937,
      "step": 1998
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.9975048170259715,
      "learning_rate": 8.34190564824412e-07,
      "loss": 0.2967,
      "step": 1999
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1577062774756857,
      "learning_rate": 8.332855497314066e-07,
      "loss": 0.249,
      "step": 2000
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2756120324360114,
      "learning_rate": 8.32380675068164e-07,
      "loss": 0.2726,
      "step": 2001
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6199877483436027,
      "learning_rate": 8.314759415968935e-07,
      "loss": 0.2773,
      "step": 2002
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4368100866555142,
      "learning_rate": 8.305713500796851e-07,
      "loss": 0.2852,
      "step": 2003
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3744985075641285,
      "learning_rate": 8.296669012785104e-07,
      "loss": 0.2776,
      "step": 2004
    },
    {
      "epoch": 0.57,
      "grad_norm": 5.917189729911316,
      "learning_rate": 8.287625959552198e-07,
      "loss": 0.2689,
      "step": 2005
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.355544152202805,
      "learning_rate": 8.278584348715436e-07,
      "loss": 0.2588,
      "step": 2006
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.355190741627796,
      "learning_rate": 8.269544187890898e-07,
      "loss": 0.2782,
      "step": 2007
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4253597317377102,
      "learning_rate": 8.260505484693448e-07,
      "loss": 0.2703,
      "step": 2008
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.433583526241199,
      "learning_rate": 8.251468246736724e-07,
      "loss": 0.2638,
      "step": 2009
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.199948379924113,
      "learning_rate": 8.242432481633118e-07,
      "loss": 0.261,
      "step": 2010
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5886116191736344,
      "learning_rate": 8.233398196993798e-07,
      "loss": 0.2925,
      "step": 2011
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2534242765762365,
      "learning_rate": 8.224365400428674e-07,
      "loss": 0.2674,
      "step": 2012
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.272714618279941,
      "learning_rate": 8.215334099546409e-07,
      "loss": 0.29,
      "step": 2013
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.430180054552849,
      "learning_rate": 8.206304301954396e-07,
      "loss": 0.2953,
      "step": 2014
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.352628694363435,
      "learning_rate": 8.197276015258772e-07,
      "loss": 0.26,
      "step": 2015
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.416566753780975,
      "learning_rate": 8.188249247064398e-07,
      "loss": 0.3085,
      "step": 2016
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3952528996210893,
      "learning_rate": 8.179224004974856e-07,
      "loss": 0.27,
      "step": 2017
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4678308055327314,
      "learning_rate": 8.17020029659244e-07,
      "loss": 0.2753,
      "step": 2018
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2562591329701824,
      "learning_rate": 8.161178129518154e-07,
      "loss": 0.2535,
      "step": 2019
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.429259179192542,
      "learning_rate": 8.152157511351703e-07,
      "loss": 0.2736,
      "step": 2020
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4260375660546294,
      "learning_rate": 8.143138449691495e-07,
      "loss": 0.2932,
      "step": 2021
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3616449561041235,
      "learning_rate": 8.134120952134613e-07,
      "loss": 0.2741,
      "step": 2022
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3250813299091413,
      "learning_rate": 8.125105026276831e-07,
      "loss": 0.2833,
      "step": 2023
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.252788985570452,
      "learning_rate": 8.116090679712599e-07,
      "loss": 0.2829,
      "step": 2024
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5183270412019643,
      "learning_rate": 8.107077920035031e-07,
      "loss": 0.2796,
      "step": 2025
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.37778294908015,
      "learning_rate": 8.098066754835915e-07,
      "loss": 0.2816,
      "step": 2026
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.300737037482741,
      "learning_rate": 8.089057191705686e-07,
      "loss": 0.258,
      "step": 2027
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.679227795168687,
      "learning_rate": 8.080049238233438e-07,
      "loss": 0.2379,
      "step": 2028
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.7613834098675243,
      "learning_rate": 8.071042902006895e-07,
      "loss": 0.328,
      "step": 2029
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3468717263559764,
      "learning_rate": 8.06203819061243e-07,
      "loss": 0.2647,
      "step": 2030
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3531096490494323,
      "learning_rate": 8.053035111635053e-07,
      "loss": 0.2864,
      "step": 2031
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.414012788939637,
      "learning_rate": 8.044033672658386e-07,
      "loss": 0.292,
      "step": 2032
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2777315812282772,
      "learning_rate": 8.035033881264674e-07,
      "loss": 0.2596,
      "step": 2033
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4517357307262144,
      "learning_rate": 8.026035745034773e-07,
      "loss": 0.2746,
      "step": 2034
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4070537452132297,
      "learning_rate": 8.017039271548154e-07,
      "loss": 0.2971,
      "step": 2035
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.247094822024942,
      "learning_rate": 8.008044468382876e-07,
      "loss": 0.2794,
      "step": 2036
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.306978962744317,
      "learning_rate": 7.999051343115595e-07,
      "loss": 0.2655,
      "step": 2037
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2123055046356663,
      "learning_rate": 7.990059903321552e-07,
      "loss": 0.2664,
      "step": 2038
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2889931885830315,
      "learning_rate": 7.981070156574571e-07,
      "loss": 0.2636,
      "step": 2039
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.478118432621666,
      "learning_rate": 7.972082110447051e-07,
      "loss": 0.276,
      "step": 2040
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2579090871085845,
      "learning_rate": 7.963095772509959e-07,
      "loss": 0.2579,
      "step": 2041
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.236066241421122,
      "learning_rate": 7.954111150332814e-07,
      "loss": 0.244,
      "step": 2042
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3660082996037533,
      "learning_rate": 7.945128251483702e-07,
      "loss": 0.2734,
      "step": 2043
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.359454646610675,
      "learning_rate": 7.936147083529243e-07,
      "loss": 0.2573,
      "step": 2044
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.236496257553659,
      "learning_rate": 7.927167654034621e-07,
      "loss": 0.2801,
      "step": 2045
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.133062542389784,
      "learning_rate": 7.918189970563534e-07,
      "loss": 0.2848,
      "step": 2046
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2905808901519253,
      "learning_rate": 7.909214040678219e-07,
      "loss": 0.3012,
      "step": 2047
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3380895198629887,
      "learning_rate": 7.900239871939434e-07,
      "loss": 0.278,
      "step": 2048
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2480017735021103,
      "learning_rate": 7.891267471906451e-07,
      "loss": 0.2631,
      "step": 2049
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4311189635822004,
      "learning_rate": 7.882296848137063e-07,
      "loss": 0.2801,
      "step": 2050
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.5144487494732086,
      "learning_rate": 7.873328008187553e-07,
      "loss": 0.3047,
      "step": 2051
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1876903102504475,
      "learning_rate": 7.864360959612713e-07,
      "loss": 0.2638,
      "step": 2052
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2277035817996653,
      "learning_rate": 7.855395709965813e-07,
      "loss": 0.2514,
      "step": 2053
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.491217536073013,
      "learning_rate": 7.846432266798618e-07,
      "loss": 0.2838,
      "step": 2054
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3602798122236064,
      "learning_rate": 7.83747063766137e-07,
      "loss": 0.2715,
      "step": 2055
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.5464354937602813,
      "learning_rate": 7.828510830102784e-07,
      "loss": 0.2894,
      "step": 2056
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2870974956544288,
      "learning_rate": 7.819552851670032e-07,
      "loss": 0.2697,
      "step": 2057
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3234172641559288,
      "learning_rate": 7.810596709908758e-07,
      "loss": 0.2723,
      "step": 2058
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.414991920888712,
      "learning_rate": 7.801642412363041e-07,
      "loss": 0.274,
      "step": 2059
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.444712542812734,
      "learning_rate": 7.792689966575432e-07,
      "loss": 0.2814,
      "step": 2060
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.47079382630738,
      "learning_rate": 7.7837393800869e-07,
      "loss": 0.2675,
      "step": 2061
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1684754178540895,
      "learning_rate": 7.774790660436857e-07,
      "loss": 0.2669,
      "step": 2062
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.460092218537868,
      "learning_rate": 7.765843815163142e-07,
      "loss": 0.2804,
      "step": 2063
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.433323079334218,
      "learning_rate": 7.756898851802012e-07,
      "loss": 0.2714,
      "step": 2064
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.537186553166969,
      "learning_rate": 7.747955777888144e-07,
      "loss": 0.2474,
      "step": 2065
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.267636696071962,
      "learning_rate": 7.739014600954621e-07,
      "loss": 0.2699,
      "step": 2066
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.430074544000473,
      "learning_rate": 7.730075328532929e-07,
      "loss": 0.3004,
      "step": 2067
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4201031125203274,
      "learning_rate": 7.721137968152943e-07,
      "loss": 0.2731,
      "step": 2068
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.290956179149082,
      "learning_rate": 7.712202527342936e-07,
      "loss": 0.2916,
      "step": 2069
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4089321122557297,
      "learning_rate": 7.703269013629563e-07,
      "loss": 0.2728,
      "step": 2070
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.276232550384149,
      "learning_rate": 7.694337434537855e-07,
      "loss": 0.2448,
      "step": 2071
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.496023360365651,
      "learning_rate": 7.685407797591207e-07,
      "loss": 0.3067,
      "step": 2072
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4890362420157524,
      "learning_rate": 7.676480110311384e-07,
      "loss": 0.2688,
      "step": 2073
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5525509171952554,
      "learning_rate": 7.667554380218512e-07,
      "loss": 0.2882,
      "step": 2074
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4611262943609726,
      "learning_rate": 7.658630614831064e-07,
      "loss": 0.2698,
      "step": 2075
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4864295885236416,
      "learning_rate": 7.649708821665855e-07,
      "loss": 0.2881,
      "step": 2076
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2472701497969103,
      "learning_rate": 7.640789008238044e-07,
      "loss": 0.2778,
      "step": 2077
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.195989671921942,
      "learning_rate": 7.631871182061117e-07,
      "loss": 0.2975,
      "step": 2078
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.476679902794854,
      "learning_rate": 7.622955350646898e-07,
      "loss": 0.2998,
      "step": 2079
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.696686446155788,
      "learning_rate": 7.614041521505517e-07,
      "loss": 0.3025,
      "step": 2080
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3976222679749406,
      "learning_rate": 7.605129702145421e-07,
      "loss": 0.2662,
      "step": 2081
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3738204956727182,
      "learning_rate": 7.59621990007337e-07,
      "loss": 0.2548,
      "step": 2082
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3700727964945068,
      "learning_rate": 7.587312122794413e-07,
      "loss": 0.3058,
      "step": 2083
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.265968246116298,
      "learning_rate": 7.578406377811914e-07,
      "loss": 0.3043,
      "step": 2084
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3290623178260126,
      "learning_rate": 7.569502672627502e-07,
      "loss": 0.2747,
      "step": 2085
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.233856682563935,
      "learning_rate": 7.560601014741101e-07,
      "loss": 0.271,
      "step": 2086
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.394115009432413,
      "learning_rate": 7.551701411650908e-07,
      "loss": 0.2599,
      "step": 2087
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.313562738069459,
      "learning_rate": 7.542803870853385e-07,
      "loss": 0.2798,
      "step": 2088
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4791031646621047,
      "learning_rate": 7.533908399843265e-07,
      "loss": 0.3062,
      "step": 2089
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2719449030756977,
      "learning_rate": 7.525015006113536e-07,
      "loss": 0.2697,
      "step": 2090
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.288872717976602,
      "learning_rate": 7.516123697155423e-07,
      "loss": 0.2292,
      "step": 2091
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3190728757400314,
      "learning_rate": 7.507234480458413e-07,
      "loss": 0.2436,
      "step": 2092
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.437882538102759,
      "learning_rate": 7.498347363510219e-07,
      "loss": 0.2779,
      "step": 2093
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2839625441224283,
      "learning_rate": 7.489462353796792e-07,
      "loss": 0.2652,
      "step": 2094
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.461952777175989,
      "learning_rate": 7.480579458802307e-07,
      "loss": 0.2964,
      "step": 2095
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2354629485291837,
      "learning_rate": 7.471698686009149e-07,
      "loss": 0.2695,
      "step": 2096
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4767741728643218,
      "learning_rate": 7.46282004289793e-07,
      "loss": 0.2797,
      "step": 2097
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.435906585416531,
      "learning_rate": 7.453943536947449e-07,
      "loss": 0.2383,
      "step": 2098
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3346098559962103,
      "learning_rate": 7.44506917563473e-07,
      "loss": 0.278,
      "step": 2099
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.6073668597304467,
      "learning_rate": 7.436196966434967e-07,
      "loss": 0.3092,
      "step": 2100
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.342933109587325,
      "learning_rate": 7.427326916821557e-07,
      "loss": 0.2889,
      "step": 2101
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.410578090840819,
      "learning_rate": 7.41845903426606e-07,
      "loss": 0.3085,
      "step": 2102
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.525339056164691,
      "learning_rate": 7.409593326238238e-07,
      "loss": 0.2591,
      "step": 2103
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2445227765026416,
      "learning_rate": 7.400729800205996e-07,
      "loss": 0.2499,
      "step": 2104
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3990383814636984,
      "learning_rate": 7.391868463635412e-07,
      "loss": 0.2688,
      "step": 2105
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2685711781633047,
      "learning_rate": 7.383009323990722e-07,
      "loss": 0.2827,
      "step": 2106
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3055301604055773,
      "learning_rate": 7.3741523887343e-07,
      "loss": 0.2671,
      "step": 2107
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.557224320802981,
      "learning_rate": 7.365297665326677e-07,
      "loss": 0.2837,
      "step": 2108
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.534642480368157,
      "learning_rate": 7.356445161226515e-07,
      "loss": 0.2691,
      "step": 2109
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.356329998491783,
      "learning_rate": 7.347594883890607e-07,
      "loss": 0.2751,
      "step": 2110
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.381325655091747,
      "learning_rate": 7.338746840773865e-07,
      "loss": 0.2565,
      "step": 2111
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.329511980402188,
      "learning_rate": 7.329901039329325e-07,
      "loss": 0.263,
      "step": 2112
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.315894476466652,
      "learning_rate": 7.321057487008135e-07,
      "loss": 0.3127,
      "step": 2113
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2856850537832103,
      "learning_rate": 7.312216191259551e-07,
      "loss": 0.2547,
      "step": 2114
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.421444178537356,
      "learning_rate": 7.303377159530918e-07,
      "loss": 0.3091,
      "step": 2115
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.243237220702003,
      "learning_rate": 7.294540399267682e-07,
      "loss": 0.2723,
      "step": 2116
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5157378247007247,
      "learning_rate": 7.285705917913372e-07,
      "loss": 0.272,
      "step": 2117
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.453951629479585,
      "learning_rate": 7.276873722909604e-07,
      "loss": 0.2891,
      "step": 2118
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.300751946901452,
      "learning_rate": 7.268043821696062e-07,
      "loss": 0.2803,
      "step": 2119
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5899121934419096,
      "learning_rate": 7.259216221710495e-07,
      "loss": 0.2943,
      "step": 2120
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2701438074597586,
      "learning_rate": 7.250390930388723e-07,
      "loss": 0.2895,
      "step": 2121
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.732218819170221,
      "learning_rate": 7.241567955164609e-07,
      "loss": 0.2743,
      "step": 2122
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3387796014090907,
      "learning_rate": 7.232747303470081e-07,
      "loss": 0.2776,
      "step": 2123
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5764272321852286,
      "learning_rate": 7.223928982735095e-07,
      "loss": 0.2892,
      "step": 2124
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.383832218103511,
      "learning_rate": 7.215113000387653e-07,
      "loss": 0.2517,
      "step": 2125
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.464831541878814,
      "learning_rate": 7.206299363853781e-07,
      "loss": 0.2689,
      "step": 2126
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.471104492666639,
      "learning_rate": 7.19748808055753e-07,
      "loss": 0.2722,
      "step": 2127
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.329307106617614,
      "learning_rate": 7.188679157920976e-07,
      "loss": 0.2766,
      "step": 2128
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3363762963465784,
      "learning_rate": 7.179872603364199e-07,
      "loss": 0.2662,
      "step": 2129
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.517632149515168,
      "learning_rate": 7.171068424305286e-07,
      "loss": 0.2746,
      "step": 2130
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5706870497329652,
      "learning_rate": 7.162266628160322e-07,
      "loss": 0.2633,
      "step": 2131
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5354944977016265,
      "learning_rate": 7.153467222343386e-07,
      "loss": 0.2902,
      "step": 2132
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7456345440150387,
      "learning_rate": 7.144670214266551e-07,
      "loss": 0.287,
      "step": 2133
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3894076576548855,
      "learning_rate": 7.135875611339853e-07,
      "loss": 0.3036,
      "step": 2134
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.569731455695933,
      "learning_rate": 7.127083420971319e-07,
      "loss": 0.2747,
      "step": 2135
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2928767146566758,
      "learning_rate": 7.11829365056693e-07,
      "loss": 0.2583,
      "step": 2136
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.673719868392139,
      "learning_rate": 7.109506307530645e-07,
      "loss": 0.2716,
      "step": 2137
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3579020039258674,
      "learning_rate": 7.100721399264362e-07,
      "loss": 0.2868,
      "step": 2138
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.266348724055388,
      "learning_rate": 7.091938933167936e-07,
      "loss": 0.2455,
      "step": 2139
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.227236593653924,
      "learning_rate": 7.083158916639168e-07,
      "loss": 0.2457,
      "step": 2140
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.332130191191915,
      "learning_rate": 7.074381357073781e-07,
      "loss": 0.2814,
      "step": 2141
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3944392677131745,
      "learning_rate": 7.065606261865452e-07,
      "loss": 0.2994,
      "step": 2142
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4322935118276092,
      "learning_rate": 7.056833638405761e-07,
      "loss": 0.3136,
      "step": 2143
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5671930014493745,
      "learning_rate": 7.048063494084218e-07,
      "loss": 0.2926,
      "step": 2144
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.318564278312186,
      "learning_rate": 7.039295836288237e-07,
      "loss": 0.2545,
      "step": 2145
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.464705863159734,
      "learning_rate": 7.030530672403138e-07,
      "loss": 0.2708,
      "step": 2146
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.19609942264482,
      "learning_rate": 7.021768009812155e-07,
      "loss": 0.2596,
      "step": 2147
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.48676163657945,
      "learning_rate": 7.013007855896396e-07,
      "loss": 0.2627,
      "step": 2148
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3182899968588386,
      "learning_rate": 7.004250218034863e-07,
      "loss": 0.279,
      "step": 2149
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3442020814482314,
      "learning_rate": 6.99549510360444e-07,
      "loss": 0.2704,
      "step": 2150
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.22906721368112,
      "learning_rate": 6.986742519979883e-07,
      "loss": 0.2606,
      "step": 2151
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2962396809815453,
      "learning_rate": 6.977992474533823e-07,
      "loss": 0.2615,
      "step": 2152
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.385100652981511,
      "learning_rate": 6.969244974636744e-07,
      "loss": 0.3269,
      "step": 2153
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5104597172170724,
      "learning_rate": 6.960500027656989e-07,
      "loss": 0.2623,
      "step": 2154
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.343953006786303,
      "learning_rate": 6.951757640960753e-07,
      "loss": 0.2832,
      "step": 2155
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3828926943026008,
      "learning_rate": 6.943017821912068e-07,
      "loss": 0.3251,
      "step": 2156
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.989832235987735,
      "learning_rate": 6.934280577872813e-07,
      "loss": 0.2892,
      "step": 2157
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.724481526794439,
      "learning_rate": 6.925545916202691e-07,
      "loss": 0.2734,
      "step": 2158
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.428029211150564,
      "learning_rate": 6.916813844259233e-07,
      "loss": 0.3051,
      "step": 2159
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.259379386342519,
      "learning_rate": 6.908084369397782e-07,
      "loss": 0.2725,
      "step": 2160
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.402931976669789,
      "learning_rate": 6.899357498971499e-07,
      "loss": 0.3116,
      "step": 2161
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.316897428090023,
      "learning_rate": 6.890633240331353e-07,
      "loss": 0.2725,
      "step": 2162
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2528634833638503,
      "learning_rate": 6.881911600826114e-07,
      "loss": 0.2364,
      "step": 2163
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3508074001500647,
      "learning_rate": 6.873192587802339e-07,
      "loss": 0.273,
      "step": 2164
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.460979430802097,
      "learning_rate": 6.864476208604373e-07,
      "loss": 0.2938,
      "step": 2165
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2576345269648708,
      "learning_rate": 6.855762470574344e-07,
      "loss": 0.2741,
      "step": 2166
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4334518552088227,
      "learning_rate": 6.847051381052165e-07,
      "loss": 0.2845,
      "step": 2167
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3696143758251127,
      "learning_rate": 6.838342947375506e-07,
      "loss": 0.2722,
      "step": 2168
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.32490402082567,
      "learning_rate": 6.829637176879801e-07,
      "loss": 0.2925,
      "step": 2169
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5248417813499,
      "learning_rate": 6.820934076898246e-07,
      "loss": 0.2985,
      "step": 2170
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.445099598380385,
      "learning_rate": 6.812233654761779e-07,
      "loss": 0.3044,
      "step": 2171
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.214657241657144,
      "learning_rate": 6.803535917799097e-07,
      "loss": 0.2414,
      "step": 2172
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3655223286007603,
      "learning_rate": 6.794840873336622e-07,
      "loss": 0.2806,
      "step": 2173
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4011629273315696,
      "learning_rate": 6.786148528698511e-07,
      "loss": 0.298,
      "step": 2174
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3645904484074682,
      "learning_rate": 6.777458891206647e-07,
      "loss": 0.2872,
      "step": 2175
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3303417451654598,
      "learning_rate": 6.768771968180642e-07,
      "loss": 0.2827,
      "step": 2176
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.42116804321365,
      "learning_rate": 6.760087766937806e-07,
      "loss": 0.287,
      "step": 2177
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.382779946387148,
      "learning_rate": 6.751406294793165e-07,
      "loss": 0.2552,
      "step": 2178
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.477672356064572,
      "learning_rate": 6.742727559059447e-07,
      "loss": 0.3088,
      "step": 2179
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.455414571828824,
      "learning_rate": 6.734051567047067e-07,
      "loss": 0.2795,
      "step": 2180
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.351414062300336,
      "learning_rate": 6.72537832606414e-07,
      "loss": 0.282,
      "step": 2181
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.22111778868477,
      "learning_rate": 6.716707843416459e-07,
      "loss": 0.2564,
      "step": 2182
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.175827676310216,
      "learning_rate": 6.708040126407492e-07,
      "loss": 0.2521,
      "step": 2183
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5166909133215754,
      "learning_rate": 6.699375182338378e-07,
      "loss": 0.2835,
      "step": 2184
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.267037067854833,
      "learning_rate": 6.690713018507916e-07,
      "loss": 0.262,
      "step": 2185
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.288265638185063,
      "learning_rate": 6.682053642212575e-07,
      "loss": 0.2488,
      "step": 2186
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2731622558940687,
      "learning_rate": 6.673397060746469e-07,
      "loss": 0.2665,
      "step": 2187
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4917477923811013,
      "learning_rate": 6.664743281401351e-07,
      "loss": 0.292,
      "step": 2188
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3827017764759604,
      "learning_rate": 6.656092311466623e-07,
      "loss": 0.2527,
      "step": 2189
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.224388121091182,
      "learning_rate": 6.647444158229318e-07,
      "loss": 0.2426,
      "step": 2190
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2232829090554005,
      "learning_rate": 6.638798828974099e-07,
      "loss": 0.2419,
      "step": 2191
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5867977881897732,
      "learning_rate": 6.630156330983243e-07,
      "loss": 0.2956,
      "step": 2192
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4204940802285067,
      "learning_rate": 6.621516671536649e-07,
      "loss": 0.3057,
      "step": 2193
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.348152749985618,
      "learning_rate": 6.612879857911824e-07,
      "loss": 0.294,
      "step": 2194
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3451848527753048,
      "learning_rate": 6.604245897383869e-07,
      "loss": 0.2748,
      "step": 2195
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8676379377626224,
      "learning_rate": 6.595614797225496e-07,
      "loss": 0.2689,
      "step": 2196
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3611535849479055,
      "learning_rate": 6.586986564706998e-07,
      "loss": 0.2612,
      "step": 2197
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7435701756217252,
      "learning_rate": 6.57836120709626e-07,
      "loss": 0.3083,
      "step": 2198
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.529965900786767,
      "learning_rate": 6.569738731658734e-07,
      "loss": 0.3059,
      "step": 2199
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2625128317474608,
      "learning_rate": 6.56111914565745e-07,
      "loss": 0.2622,
      "step": 2200
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3355623198620297,
      "learning_rate": 6.552502456353011e-07,
      "loss": 0.2772,
      "step": 2201
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.426170884306247,
      "learning_rate": 6.543888671003572e-07,
      "loss": 0.2864,
      "step": 2202
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.40017155684168,
      "learning_rate": 6.535277796864841e-07,
      "loss": 0.2741,
      "step": 2203
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.52362240442778,
      "learning_rate": 6.526669841190078e-07,
      "loss": 0.2956,
      "step": 2204
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.356002953270973,
      "learning_rate": 6.518064811230082e-07,
      "loss": 0.2837,
      "step": 2205
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6796883261044893,
      "learning_rate": 6.509462714233193e-07,
      "loss": 0.3092,
      "step": 2206
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3176863434708137,
      "learning_rate": 6.500863557445273e-07,
      "loss": 0.2456,
      "step": 2207
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6021904172206427,
      "learning_rate": 6.49226734810971e-07,
      "loss": 0.2975,
      "step": 2208
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4935328971745925,
      "learning_rate": 6.483674093467408e-07,
      "loss": 0.2831,
      "step": 2209
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3839197112529766,
      "learning_rate": 6.475083800756791e-07,
      "loss": 0.2826,
      "step": 2210
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.299681443388202,
      "learning_rate": 6.466496477213776e-07,
      "loss": 0.2215,
      "step": 2211
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2836145958696776,
      "learning_rate": 6.457912130071785e-07,
      "loss": 0.2875,
      "step": 2212
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.437927993707982,
      "learning_rate": 6.449330766561733e-07,
      "loss": 0.3069,
      "step": 2213
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.416185295716576,
      "learning_rate": 6.440752393912015e-07,
      "loss": 0.2728,
      "step": 2214
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2801588482913173,
      "learning_rate": 6.43217701934852e-07,
      "loss": 0.2855,
      "step": 2215
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2867858493533832,
      "learning_rate": 6.4236046500946e-07,
      "loss": 0.2627,
      "step": 2216
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.587876434898805,
      "learning_rate": 6.41503529337108e-07,
      "loss": 0.293,
      "step": 2217
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3088016451229136,
      "learning_rate": 6.406468956396249e-07,
      "loss": 0.2776,
      "step": 2218
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3788309790734488,
      "learning_rate": 6.397905646385844e-07,
      "loss": 0.2834,
      "step": 2219
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.278441350078684,
      "learning_rate": 6.389345370553064e-07,
      "loss": 0.2475,
      "step": 2220
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.9142417532273233,
      "learning_rate": 6.380788136108546e-07,
      "loss": 0.2945,
      "step": 2221
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.344305602286148,
      "learning_rate": 6.372233950260367e-07,
      "loss": 0.2612,
      "step": 2222
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.33848755529304,
      "learning_rate": 6.363682820214031e-07,
      "loss": 0.25,
      "step": 2223
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.429139743401095,
      "learning_rate": 6.355134753172473e-07,
      "loss": 0.2767,
      "step": 2224
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5080203198990803,
      "learning_rate": 6.34658975633605e-07,
      "loss": 0.293,
      "step": 2225
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.488305071582111,
      "learning_rate": 6.338047836902527e-07,
      "loss": 0.2923,
      "step": 2226
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6386594684053755,
      "learning_rate": 6.329509002067079e-07,
      "loss": 0.2638,
      "step": 2227
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.499223534801197,
      "learning_rate": 6.320973259022286e-07,
      "loss": 0.2789,
      "step": 2228
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.349727343761741,
      "learning_rate": 6.312440614958114e-07,
      "loss": 0.3011,
      "step": 2229
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.399789289121343,
      "learning_rate": 6.303911077061937e-07,
      "loss": 0.2913,
      "step": 2230
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.468975784925108,
      "learning_rate": 6.29538465251849e-07,
      "loss": 0.2514,
      "step": 2231
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3046212635405547,
      "learning_rate": 6.286861348509902e-07,
      "loss": 0.267,
      "step": 2232
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2861395547404557,
      "learning_rate": 6.278341172215669e-07,
      "loss": 0.2329,
      "step": 2233
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2936158988880835,
      "learning_rate": 6.269824130812644e-07,
      "loss": 0.2568,
      "step": 2234
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.241563640965623,
      "learning_rate": 6.261310231475054e-07,
      "loss": 0.2582,
      "step": 2235
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4414301771540305,
      "learning_rate": 6.252799481374472e-07,
      "loss": 0.287,
      "step": 2236
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2552834975225498,
      "learning_rate": 6.244291887679818e-07,
      "loss": 0.2436,
      "step": 2237
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.380287911860163,
      "learning_rate": 6.235787457557349e-07,
      "loss": 0.266,
      "step": 2238
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3759044308925366,
      "learning_rate": 6.227286198170662e-07,
      "loss": 0.299,
      "step": 2239
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.477400443290136,
      "learning_rate": 6.218788116680689e-07,
      "loss": 0.2634,
      "step": 2240
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4706236064839318,
      "learning_rate": 6.210293220245677e-07,
      "loss": 0.3339,
      "step": 2241
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4793553558938775,
      "learning_rate": 6.201801516021189e-07,
      "loss": 0.3025,
      "step": 2242
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.228421467160145,
      "learning_rate": 6.193313011160103e-07,
      "loss": 0.2664,
      "step": 2243
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.437534621664695,
      "learning_rate": 6.184827712812603e-07,
      "loss": 0.2625,
      "step": 2244
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3956581186100396,
      "learning_rate": 6.176345628126175e-07,
      "loss": 0.2898,
      "step": 2245
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.239063252739338,
      "learning_rate": 6.167866764245586e-07,
      "loss": 0.2541,
      "step": 2246
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.476757025154476,
      "learning_rate": 6.159391128312899e-07,
      "loss": 0.3004,
      "step": 2247
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2569840619493933,
      "learning_rate": 6.150918727467454e-07,
      "loss": 0.275,
      "step": 2248
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2133733010199363,
      "learning_rate": 6.142449568845877e-07,
      "loss": 0.287,
      "step": 2249
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.364274466964281,
      "learning_rate": 6.133983659582047e-07,
      "loss": 0.2928,
      "step": 2250
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.531639474882001,
      "learning_rate": 6.125521006807115e-07,
      "loss": 0.2825,
      "step": 2251
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.300776299226663,
      "learning_rate": 6.11706161764949e-07,
      "loss": 0.2593,
      "step": 2252
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1737940033605505,
      "learning_rate": 6.10860549923482e-07,
      "loss": 0.2513,
      "step": 2253
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.465815246077779,
      "learning_rate": 6.10015265868602e-07,
      "loss": 0.2757,
      "step": 2254
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2752963971548117,
      "learning_rate": 6.091703103123222e-07,
      "loss": 0.255,
      "step": 2255
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.501046208396167,
      "learning_rate": 6.083256839663806e-07,
      "loss": 0.2861,
      "step": 2256
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5397860645465875,
      "learning_rate": 6.074813875422365e-07,
      "loss": 0.3324,
      "step": 2257
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3053544026215484,
      "learning_rate": 6.066374217510724e-07,
      "loss": 0.2752,
      "step": 2258
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3995880048556217,
      "learning_rate": 6.057937873037924e-07,
      "loss": 0.29,
      "step": 2259
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.523188766633019,
      "learning_rate": 6.04950484911021e-07,
      "loss": 0.2783,
      "step": 2260
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.228431929782206,
      "learning_rate": 6.041075152831025e-07,
      "loss": 0.2707,
      "step": 2261
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.9934974905244562,
      "learning_rate": 6.032648791301018e-07,
      "loss": 0.2906,
      "step": 2262
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3233178461924853,
      "learning_rate": 6.024225771618023e-07,
      "loss": 0.2635,
      "step": 2263
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.723255393523626,
      "learning_rate": 6.015806100877069e-07,
      "loss": 0.2819,
      "step": 2264
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.571178926895352,
      "learning_rate": 6.007389786170354e-07,
      "loss": 0.3109,
      "step": 2265
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2559815923268878,
      "learning_rate": 5.998976834587246e-07,
      "loss": 0.2704,
      "step": 2266
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.49947468441889,
      "learning_rate": 5.990567253214295e-07,
      "loss": 0.2818,
      "step": 2267
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5920054788059637,
      "learning_rate": 5.98216104913519e-07,
      "loss": 0.2952,
      "step": 2268
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.999502386879307,
      "learning_rate": 5.973758229430805e-07,
      "loss": 0.2542,
      "step": 2269
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.282738274536259,
      "learning_rate": 5.965358801179137e-07,
      "loss": 0.2721,
      "step": 2270
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2313925933260172,
      "learning_rate": 5.956962771455337e-07,
      "loss": 0.2787,
      "step": 2271
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2704746180326767,
      "learning_rate": 5.948570147331692e-07,
      "loss": 0.2731,
      "step": 2272
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5502486483761717,
      "learning_rate": 5.940180935877619e-07,
      "loss": 0.3083,
      "step": 2273
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4309385928987153,
      "learning_rate": 5.931795144159665e-07,
      "loss": 0.2857,
      "step": 2274
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1658776156389146,
      "learning_rate": 5.923412779241492e-07,
      "loss": 0.2644,
      "step": 2275
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.494804348637084,
      "learning_rate": 5.91503384818388e-07,
      "loss": 0.2412,
      "step": 2276
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2382077913458502,
      "learning_rate": 5.906658358044703e-07,
      "loss": 0.2652,
      "step": 2277
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2962584594206747,
      "learning_rate": 5.89828631587896e-07,
      "loss": 0.2807,
      "step": 2278
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.188276662228928,
      "learning_rate": 5.889917728738724e-07,
      "loss": 0.2486,
      "step": 2279
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.527543431519013,
      "learning_rate": 5.88155260367317e-07,
      "loss": 0.2352,
      "step": 2280
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.302003991338731,
      "learning_rate": 5.873190947728551e-07,
      "loss": 0.2543,
      "step": 2281
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2480317567146004,
      "learning_rate": 5.864832767948198e-07,
      "loss": 0.2514,
      "step": 2282
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.524084873276555,
      "learning_rate": 5.85647807137252e-07,
      "loss": 0.2942,
      "step": 2283
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.369980013342227,
      "learning_rate": 5.848126865038989e-07,
      "loss": 0.2793,
      "step": 2284
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8317055902232497,
      "learning_rate": 5.83977915598213e-07,
      "loss": 0.2724,
      "step": 2285
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4188479265192777,
      "learning_rate": 5.83143495123353e-07,
      "loss": 0.2642,
      "step": 2286
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6498348586511487,
      "learning_rate": 5.823094257821821e-07,
      "loss": 0.2817,
      "step": 2287
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.568128866402004,
      "learning_rate": 5.814757082772682e-07,
      "loss": 0.2934,
      "step": 2288
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2940019570209067,
      "learning_rate": 5.806423433108821e-07,
      "loss": 0.2577,
      "step": 2289
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.348630798206626,
      "learning_rate": 5.798093315849983e-07,
      "loss": 0.2721,
      "step": 2290
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.394079405948414,
      "learning_rate": 5.789766738012931e-07,
      "loss": 0.2801,
      "step": 2291
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6282642371955447,
      "learning_rate": 5.781443706611454e-07,
      "loss": 0.289,
      "step": 2292
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3702665835924117,
      "learning_rate": 5.773124228656348e-07,
      "loss": 0.2578,
      "step": 2293
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8168042286446826,
      "learning_rate": 5.764808311155418e-07,
      "loss": 0.3191,
      "step": 2294
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4400086616895784,
      "learning_rate": 5.756495961113468e-07,
      "loss": 0.2866,
      "step": 2295
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.304434447879119,
      "learning_rate": 5.748187185532305e-07,
      "loss": 0.2867,
      "step": 2296
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4909981692133747,
      "learning_rate": 5.739881991410707e-07,
      "loss": 0.2543,
      "step": 2297
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2463457224543433,
      "learning_rate": 5.731580385744457e-07,
      "loss": 0.2313,
      "step": 2298
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.857451260559649,
      "learning_rate": 5.723282375526302e-07,
      "loss": 0.2663,
      "step": 2299
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2112224349538265,
      "learning_rate": 5.714987967745967e-07,
      "loss": 0.2684,
      "step": 2300
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3854594089360917,
      "learning_rate": 5.706697169390134e-07,
      "loss": 0.2865,
      "step": 2301
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6668868136628197,
      "learning_rate": 5.698409987442448e-07,
      "loss": 0.257,
      "step": 2302
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.137828218966821,
      "learning_rate": 5.690126428883515e-07,
      "loss": 0.276,
      "step": 2303
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.486527199626064,
      "learning_rate": 5.681846500690884e-07,
      "loss": 0.2888,
      "step": 2304
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.35238133058172,
      "learning_rate": 5.673570209839045e-07,
      "loss": 0.2749,
      "step": 2305
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.2026123661227297,
      "learning_rate": 5.66529756329942e-07,
      "loss": 0.2537,
      "step": 2306
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2395741026150864,
      "learning_rate": 5.657028568040365e-07,
      "loss": 0.2884,
      "step": 2307
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4458748873204863,
      "learning_rate": 5.64876323102717e-07,
      "loss": 0.2608,
      "step": 2308
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7200316283550663,
      "learning_rate": 5.640501559222034e-07,
      "loss": 0.278,
      "step": 2309
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.209972703940308,
      "learning_rate": 5.63224355958406e-07,
      "loss": 0.2916,
      "step": 2310
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5998394218333085,
      "learning_rate": 5.623989239069274e-07,
      "loss": 0.3057,
      "step": 2311
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.38565340315908,
      "learning_rate": 5.615738604630591e-07,
      "loss": 0.268,
      "step": 2312
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.568508776646103,
      "learning_rate": 5.607491663217838e-07,
      "loss": 0.3047,
      "step": 2313
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3544146184023838,
      "learning_rate": 5.599248421777707e-07,
      "loss": 0.292,
      "step": 2314
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.511187292747781,
      "learning_rate": 5.591008887253792e-07,
      "loss": 0.2809,
      "step": 2315
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3207736387674878,
      "learning_rate": 5.582773066586552e-07,
      "loss": 0.2698,
      "step": 2316
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.396898601611306,
      "learning_rate": 5.574540966713337e-07,
      "loss": 0.2789,
      "step": 2317
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5437105260347734,
      "learning_rate": 5.566312594568339e-07,
      "loss": 0.2654,
      "step": 2318
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.389316829041838,
      "learning_rate": 5.558087957082623e-07,
      "loss": 0.2631,
      "step": 2319
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3657439718027082,
      "learning_rate": 5.549867061184108e-07,
      "loss": 0.2951,
      "step": 2320
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.613468479680014,
      "learning_rate": 5.541649913797558e-07,
      "loss": 0.2997,
      "step": 2321
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.7800748740297463,
      "learning_rate": 5.533436521844581e-07,
      "loss": 0.271,
      "step": 2322
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4391679900312337,
      "learning_rate": 5.525226892243623e-07,
      "loss": 0.2932,
      "step": 2323
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.276031842799922,
      "learning_rate": 5.517021031909958e-07,
      "loss": 0.2882,
      "step": 2324
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.400813584246261,
      "learning_rate": 5.508818947755686e-07,
      "loss": 0.315,
      "step": 2325
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3432150318418765,
      "learning_rate": 5.500620646689728e-07,
      "loss": 0.258,
      "step": 2326
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.34350737859966,
      "learning_rate": 5.492426135617815e-07,
      "loss": 0.2588,
      "step": 2327
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.461391081740737,
      "learning_rate": 5.484235421442491e-07,
      "loss": 0.2932,
      "step": 2328
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2771199211489286,
      "learning_rate": 5.476048511063095e-07,
      "loss": 0.2641,
      "step": 2329
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5021933751522076,
      "learning_rate": 5.467865411375765e-07,
      "loss": 0.282,
      "step": 2330
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.32908541394442,
      "learning_rate": 5.459686129273432e-07,
      "loss": 0.2632,
      "step": 2331
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.67185336758561,
      "learning_rate": 5.451510671645806e-07,
      "loss": 0.2974,
      "step": 2332
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3283227432999998,
      "learning_rate": 5.443339045379379e-07,
      "loss": 0.2605,
      "step": 2333
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.419537722246254,
      "learning_rate": 5.435171257357416e-07,
      "loss": 0.2619,
      "step": 2334
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3691225216183653,
      "learning_rate": 5.427007314459948e-07,
      "loss": 0.2878,
      "step": 2335
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4319599179402496,
      "learning_rate": 5.418847223563761e-07,
      "loss": 0.2798,
      "step": 2336
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3038917195379143,
      "learning_rate": 5.410690991542407e-07,
      "loss": 0.2465,
      "step": 2337
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3787734383661636,
      "learning_rate": 5.402538625266183e-07,
      "loss": 0.2965,
      "step": 2338
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2196406147403054,
      "learning_rate": 5.394390131602132e-07,
      "loss": 0.2643,
      "step": 2339
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4182567726274042,
      "learning_rate": 5.386245517414026e-07,
      "loss": 0.245,
      "step": 2340
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.343449748727217,
      "learning_rate": 5.378104789562373e-07,
      "loss": 0.2887,
      "step": 2341
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2836474205993706,
      "learning_rate": 5.36996795490442e-07,
      "loss": 0.2683,
      "step": 2342
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3647325537081225,
      "learning_rate": 5.361835020294122e-07,
      "loss": 0.2615,
      "step": 2343
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.8029499896540897,
      "learning_rate": 5.353705992582146e-07,
      "loss": 0.2397,
      "step": 2344
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3598025459643845,
      "learning_rate": 5.345580878615877e-07,
      "loss": 0.2764,
      "step": 2345
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2772605562966164,
      "learning_rate": 5.337459685239394e-07,
      "loss": 0.2361,
      "step": 2346
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.398278736692514,
      "learning_rate": 5.329342419293488e-07,
      "loss": 0.277,
      "step": 2347
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4037681130844675,
      "learning_rate": 5.321229087615634e-07,
      "loss": 0.2763,
      "step": 2348
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.485457451422128,
      "learning_rate": 5.313119697039984e-07,
      "loss": 0.2885,
      "step": 2349
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3282359187966373,
      "learning_rate": 5.305014254397377e-07,
      "loss": 0.2649,
      "step": 2350
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.38908486454763,
      "learning_rate": 5.296912766515338e-07,
      "loss": 0.2835,
      "step": 2351
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.592192298182752,
      "learning_rate": 5.288815240218048e-07,
      "loss": 0.3013,
      "step": 2352
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.35159951282308,
      "learning_rate": 5.280721682326348e-07,
      "loss": 0.2669,
      "step": 2353
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.314369465048757,
      "learning_rate": 5.272632099657743e-07,
      "loss": 0.2702,
      "step": 2354
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5082008674395184,
      "learning_rate": 5.264546499026387e-07,
      "loss": 0.2712,
      "step": 2355
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1962925179788866,
      "learning_rate": 5.256464887243094e-07,
      "loss": 0.2556,
      "step": 2356
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.243902743791842,
      "learning_rate": 5.248387271115291e-07,
      "loss": 0.2622,
      "step": 2357
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.281956857693016,
      "learning_rate": 5.240313657447057e-07,
      "loss": 0.2766,
      "step": 2358
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.419361403863879,
      "learning_rate": 5.232244053039099e-07,
      "loss": 0.2697,
      "step": 2359
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3917488210639513,
      "learning_rate": 5.224178464688741e-07,
      "loss": 0.2663,
      "step": 2360
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.331041543408664,
      "learning_rate": 5.216116899189928e-07,
      "loss": 0.2658,
      "step": 2361
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.821521842305285,
      "learning_rate": 5.208059363333217e-07,
      "loss": 0.296,
      "step": 2362
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.868996764673546,
      "learning_rate": 5.200005863905767e-07,
      "loss": 0.2982,
      "step": 2363
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2425213278054152,
      "learning_rate": 5.191956407691343e-07,
      "loss": 0.2369,
      "step": 2364
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.332277471029029,
      "learning_rate": 5.183911001470295e-07,
      "loss": 0.2435,
      "step": 2365
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.493223595181339,
      "learning_rate": 5.17586965201957e-07,
      "loss": 0.268,
      "step": 2366
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4352432752453614,
      "learning_rate": 5.167832366112694e-07,
      "loss": 0.2768,
      "step": 2367
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.461278109640111,
      "learning_rate": 5.159799150519772e-07,
      "loss": 0.3012,
      "step": 2368
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.146603860090843,
      "learning_rate": 5.151770012007479e-07,
      "loss": 0.2744,
      "step": 2369
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2789999167061885,
      "learning_rate": 5.143744957339056e-07,
      "loss": 0.2775,
      "step": 2370
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1648708799970486,
      "learning_rate": 5.135723993274303e-07,
      "loss": 0.2581,
      "step": 2371
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.40279231393275,
      "learning_rate": 5.127707126569576e-07,
      "loss": 0.2625,
      "step": 2372
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.879779242816951,
      "learning_rate": 5.11969436397778e-07,
      "loss": 0.2719,
      "step": 2373
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4951831674750053,
      "learning_rate": 5.111685712248363e-07,
      "loss": 0.2983,
      "step": 2374
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3148481953963143,
      "learning_rate": 5.103681178127302e-07,
      "loss": 0.2609,
      "step": 2375
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4355470361980087,
      "learning_rate": 5.095680768357122e-07,
      "loss": 0.268,
      "step": 2376
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.957762249807943,
      "learning_rate": 5.087684489676861e-07,
      "loss": 0.2723,
      "step": 2377
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3268375859011248,
      "learning_rate": 5.079692348822085e-07,
      "loss": 0.2763,
      "step": 2378
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.131960016007764,
      "learning_rate": 5.071704352524862e-07,
      "loss": 0.2984,
      "step": 2379
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.40458029742982,
      "learning_rate": 5.06372050751378e-07,
      "loss": 0.2836,
      "step": 2380
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7074747538153594,
      "learning_rate": 5.055740820513932e-07,
      "loss": 0.2832,
      "step": 2381
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2862593178591277,
      "learning_rate": 5.047765298246907e-07,
      "loss": 0.2952,
      "step": 2382
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3946399410902326,
      "learning_rate": 5.039793947430773e-07,
      "loss": 0.2616,
      "step": 2383
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3943577455752307,
      "learning_rate": 5.031826774780097e-07,
      "loss": 0.2822,
      "step": 2384
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3088941950755317,
      "learning_rate": 5.023863787005929e-07,
      "loss": 0.2454,
      "step": 2385
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.28104166201968,
      "learning_rate": 5.015904990815792e-07,
      "loss": 0.2674,
      "step": 2386
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4601117984407956,
      "learning_rate": 5.007950392913662e-07,
      "loss": 0.3097,
      "step": 2387
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.292624142786809,
      "learning_rate": 5.000000000000002e-07,
      "loss": 0.2736,
      "step": 2388
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4788897424360155,
      "learning_rate": 4.992053818771714e-07,
      "loss": 0.2613,
      "step": 2389
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.359979008814565,
      "learning_rate": 4.984111855922176e-07,
      "loss": 0.2764,
      "step": 2390
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2760545050797067,
      "learning_rate": 4.976174118141185e-07,
      "loss": 0.2722,
      "step": 2391
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4792421447033077,
      "learning_rate": 4.968240612114995e-07,
      "loss": 0.2531,
      "step": 2392
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3183540813170143,
      "learning_rate": 4.960311344526292e-07,
      "loss": 0.2784,
      "step": 2393
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.286214631829258,
      "learning_rate": 4.952386322054188e-07,
      "loss": 0.2646,
      "step": 2394
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.403792277262232,
      "learning_rate": 4.944465551374238e-07,
      "loss": 0.2963,
      "step": 2395
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.229517392197221,
      "learning_rate": 4.936549039158385e-07,
      "loss": 0.2491,
      "step": 2396
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.344675961661458,
      "learning_rate": 4.928636792075007e-07,
      "loss": 0.2838,
      "step": 2397
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.208281211784219,
      "learning_rate": 4.920728816788883e-07,
      "loss": 0.2643,
      "step": 2398
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.292389280601437,
      "learning_rate": 4.912825119961194e-07,
      "loss": 0.2835,
      "step": 2399
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.458712202984134,
      "learning_rate": 4.904925708249516e-07,
      "loss": 0.2845,
      "step": 2400
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3774224537052615,
      "learning_rate": 4.897030588307816e-07,
      "loss": 0.2813,
      "step": 2401
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.297106016389134,
      "learning_rate": 4.889139766786447e-07,
      "loss": 0.2957,
      "step": 2402
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4943961815678883,
      "learning_rate": 4.881253250332141e-07,
      "loss": 0.2811,
      "step": 2403
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.319088781552554,
      "learning_rate": 4.873371045588001e-07,
      "loss": 0.2814,
      "step": 2404
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.352193260486395,
      "learning_rate": 4.865493159193504e-07,
      "loss": 0.2689,
      "step": 2405
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5413080645396913,
      "learning_rate": 4.857619597784482e-07,
      "loss": 0.3134,
      "step": 2406
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.420124609671713,
      "learning_rate": 4.84975036799313e-07,
      "loss": 0.2668,
      "step": 2407
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8124452637866133,
      "learning_rate": 4.841885476447995e-07,
      "loss": 0.2866,
      "step": 2408
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.258369093161929,
      "learning_rate": 4.834024929773956e-07,
      "loss": 0.2565,
      "step": 2409
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2445143082198666,
      "learning_rate": 4.826168734592253e-07,
      "loss": 0.2663,
      "step": 2410
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.439178512532609,
      "learning_rate": 4.818316897520449e-07,
      "loss": 0.2866,
      "step": 2411
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4628414329677835,
      "learning_rate": 4.810469425172439e-07,
      "loss": 0.2673,
      "step": 2412
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2814847882421305,
      "learning_rate": 4.802626324158432e-07,
      "loss": 0.2663,
      "step": 2413
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.436509033085469,
      "learning_rate": 4.794787601084965e-07,
      "loss": 0.2738,
      "step": 2414
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.347829927437003,
      "learning_rate": 4.786953262554891e-07,
      "loss": 0.2818,
      "step": 2415
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2396757844804287,
      "learning_rate": 4.779123315167361e-07,
      "loss": 0.2528,
      "step": 2416
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3879035459539453,
      "learning_rate": 4.771297765517833e-07,
      "loss": 0.264,
      "step": 2417
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.413721326849401,
      "learning_rate": 4.763476620198047e-07,
      "loss": 0.2489,
      "step": 2418
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.353494934214268,
      "learning_rate": 4.755659885796054e-07,
      "loss": 0.2713,
      "step": 2419
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2907994697079594,
      "learning_rate": 4.747847568896177e-07,
      "loss": 0.2749,
      "step": 2420
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.340449182606632,
      "learning_rate": 4.740039676079022e-07,
      "loss": 0.295,
      "step": 2421
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2745448209728005,
      "learning_rate": 4.73223621392146e-07,
      "loss": 0.2573,
      "step": 2422
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.326430648936994,
      "learning_rate": 4.724437188996637e-07,
      "loss": 0.2724,
      "step": 2423
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.574729677370753,
      "learning_rate": 4.716642607873967e-07,
      "loss": 0.3077,
      "step": 2424
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.368767987272835,
      "learning_rate": 4.708852477119116e-07,
      "loss": 0.2912,
      "step": 2425
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.327984474192384,
      "learning_rate": 4.7010668032939925e-07,
      "loss": 0.2689,
      "step": 2426
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3809594525501567,
      "learning_rate": 4.6932855929567606e-07,
      "loss": 0.2723,
      "step": 2427
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2477212537841136,
      "learning_rate": 4.6855088526618204e-07,
      "loss": 0.2677,
      "step": 2428
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.8939219591101,
      "learning_rate": 4.6777365889598176e-07,
      "loss": 0.2546,
      "step": 2429
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3686432161421394,
      "learning_rate": 4.6699688083976085e-07,
      "loss": 0.2646,
      "step": 2430
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.425030952188078,
      "learning_rate": 4.662205517518286e-07,
      "loss": 0.2732,
      "step": 2431
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.50809725901002,
      "learning_rate": 4.6544467228611584e-07,
      "loss": 0.2584,
      "step": 2432
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.479077560409588,
      "learning_rate": 4.646692430961744e-07,
      "loss": 0.2749,
      "step": 2433
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.7200829790390317,
      "learning_rate": 4.6389426483517736e-07,
      "loss": 0.2805,
      "step": 2434
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3144164267512886,
      "learning_rate": 4.631197381559173e-07,
      "loss": 0.2975,
      "step": 2435
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.459976750375668,
      "learning_rate": 4.6234566371080697e-07,
      "loss": 0.2956,
      "step": 2436
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4508229207793812,
      "learning_rate": 4.6157204215187795e-07,
      "loss": 0.2788,
      "step": 2437
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.418521332143917,
      "learning_rate": 4.6079887413078034e-07,
      "loss": 0.2774,
      "step": 2438
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3565178114840957,
      "learning_rate": 4.6002616029878226e-07,
      "loss": 0.2461,
      "step": 2439
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.318714557506511,
      "learning_rate": 4.5925390130676913e-07,
      "loss": 0.2673,
      "step": 2440
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.570508945139977,
      "learning_rate": 4.584820978052434e-07,
      "loss": 0.3228,
      "step": 2441
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.426739836009848,
      "learning_rate": 4.5771075044432385e-07,
      "loss": 0.2663,
      "step": 2442
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5163005062865373,
      "learning_rate": 4.5693985987374475e-07,
      "loss": 0.3013,
      "step": 2443
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.397302513009581,
      "learning_rate": 4.5616942674285596e-07,
      "loss": 0.2689,
      "step": 2444
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.384358665290794,
      "learning_rate": 4.553994517006219e-07,
      "loss": 0.274,
      "step": 2445
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3780130465359197,
      "learning_rate": 4.54629935395621e-07,
      "loss": 0.2703,
      "step": 2446
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.41175660090204,
      "learning_rate": 4.5386087847604583e-07,
      "loss": 0.2761,
      "step": 2447
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2308940957310397,
      "learning_rate": 4.5309228158970027e-07,
      "loss": 0.2744,
      "step": 2448
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2622794723399196,
      "learning_rate": 4.523241453840033e-07,
      "loss": 0.2634,
      "step": 2449
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4892472914365382,
      "learning_rate": 4.51556470505984e-07,
      "loss": 0.251,
      "step": 2450
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5240215459027504,
      "learning_rate": 4.507892576022838e-07,
      "loss": 0.2355,
      "step": 2451
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.206020087049321,
      "learning_rate": 4.500225073191539e-07,
      "loss": 0.2829,
      "step": 2452
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.254017885293891,
      "learning_rate": 4.4925622030245645e-07,
      "loss": 0.2649,
      "step": 2453
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3498609298835365,
      "learning_rate": 4.484903971976641e-07,
      "loss": 0.2857,
      "step": 2454
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0971087145254033,
      "learning_rate": 4.4772503864985813e-07,
      "loss": 0.2547,
      "step": 2455
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.321955789347521,
      "learning_rate": 4.469601453037276e-07,
      "loss": 0.2653,
      "step": 2456
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.205208474190725,
      "learning_rate": 4.4619571780357046e-07,
      "loss": 0.2622,
      "step": 2457
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2349667352343308,
      "learning_rate": 4.4543175679329337e-07,
      "loss": 0.2675,
      "step": 2458
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.49822879969086,
      "learning_rate": 4.4466826291640867e-07,
      "loss": 0.2621,
      "step": 2459
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.338010522791837,
      "learning_rate": 4.439052368160351e-07,
      "loss": 0.2782,
      "step": 2460
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5150398161628766,
      "learning_rate": 4.43142679134898e-07,
      "loss": 0.2697,
      "step": 2461
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.324998946630601,
      "learning_rate": 4.4238059051532774e-07,
      "loss": 0.2486,
      "step": 2462
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.748258160226059,
      "learning_rate": 4.4161897159926044e-07,
      "loss": 0.2896,
      "step": 2463
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5017394373991007,
      "learning_rate": 4.4085782302823604e-07,
      "loss": 0.2904,
      "step": 2464
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.378543367007354,
      "learning_rate": 4.400971454433975e-07,
      "loss": 0.2693,
      "step": 2465
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.422935392437184,
      "learning_rate": 4.39336939485492e-07,
      "loss": 0.2735,
      "step": 2466
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3311674451261903,
      "learning_rate": 4.3857720579486887e-07,
      "loss": 0.2516,
      "step": 2467
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.36415216404044,
      "learning_rate": 4.3781794501148105e-07,
      "loss": 0.2804,
      "step": 2468
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6607955171263975,
      "learning_rate": 4.3705915777488113e-07,
      "loss": 0.2872,
      "step": 2469
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4385599819583006,
      "learning_rate": 4.363008447242239e-07,
      "loss": 0.3045,
      "step": 2470
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.398963114273932,
      "learning_rate": 4.355430064982646e-07,
      "loss": 0.2633,
      "step": 2471
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4263112035637553,
      "learning_rate": 4.3478564373535844e-07,
      "loss": 0.2872,
      "step": 2472
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2860756043193895,
      "learning_rate": 4.3402875707346033e-07,
      "loss": 0.2481,
      "step": 2473
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3064939908293614,
      "learning_rate": 4.3327234715012373e-07,
      "loss": 0.3014,
      "step": 2474
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3342857525505023,
      "learning_rate": 4.3251641460250086e-07,
      "loss": 0.2614,
      "step": 2475
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3856176203445134,
      "learning_rate": 4.3176096006734175e-07,
      "loss": 0.2783,
      "step": 2476
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3528075219115725,
      "learning_rate": 4.3100598418099377e-07,
      "loss": 0.2615,
      "step": 2477
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4978626002403077,
      "learning_rate": 4.30251487579401e-07,
      "loss": 0.2893,
      "step": 2478
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3772114464772973,
      "learning_rate": 4.2949747089810407e-07,
      "loss": 0.277,
      "step": 2479
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.532502965255537,
      "learning_rate": 4.2874393477223913e-07,
      "loss": 0.283,
      "step": 2480
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.433748637942874,
      "learning_rate": 4.279908798365378e-07,
      "loss": 0.2877,
      "step": 2481
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3727980561715913,
      "learning_rate": 4.272383067253253e-07,
      "loss": 0.2741,
      "step": 2482
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3850416527902296,
      "learning_rate": 4.264862160725229e-07,
      "loss": 0.2602,
      "step": 2483
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.513270043584115,
      "learning_rate": 4.25734608511644e-07,
      "loss": 0.2897,
      "step": 2484
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.350862190964246,
      "learning_rate": 4.2498348467579547e-07,
      "loss": 0.2748,
      "step": 2485
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.197139592567514,
      "learning_rate": 4.2423284519767735e-07,
      "loss": 0.2445,
      "step": 2486
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3065943115838583,
      "learning_rate": 4.2348269070957977e-07,
      "loss": 0.2764,
      "step": 2487
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3619768477901895,
      "learning_rate": 4.22733021843387e-07,
      "loss": 0.3006,
      "step": 2488
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.207189526645788,
      "learning_rate": 4.2198383923057224e-07,
      "loss": 0.273,
      "step": 2489
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.358662330834118,
      "learning_rate": 4.212351435022005e-07,
      "loss": 0.291,
      "step": 2490
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.721161217426416,
      "learning_rate": 4.2048693528892455e-07,
      "loss": 0.2514,
      "step": 2491
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.41417573342347,
      "learning_rate": 4.197392152209892e-07,
      "loss": 0.2955,
      "step": 2492
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4500824207202223,
      "learning_rate": 4.189919839282264e-07,
      "loss": 0.2735,
      "step": 2493
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5618427216529427,
      "learning_rate": 4.1824524204005706e-07,
      "loss": 0.2856,
      "step": 2494
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.380341795132665,
      "learning_rate": 4.1749899018548885e-07,
      "loss": 0.2561,
      "step": 2495
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.144872498829846,
      "learning_rate": 4.1675322899311736e-07,
      "loss": 0.2487,
      "step": 2496
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2617982886035275,
      "learning_rate": 4.1600795909112564e-07,
      "loss": 0.2319,
      "step": 2497
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.382061749693088,
      "learning_rate": 4.152631811072822e-07,
      "loss": 0.3004,
      "step": 2498
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4247661850403164,
      "learning_rate": 4.145188956689405e-07,
      "loss": 0.3104,
      "step": 2499
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3290005093252626,
      "learning_rate": 4.137751034030399e-07,
      "loss": 0.2591,
      "step": 2500
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.276287989665381,
      "learning_rate": 4.130318049361039e-07,
      "loss": 0.2786,
      "step": 2501
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3270973591183304,
      "learning_rate": 4.1228900089424155e-07,
      "loss": 0.2857,
      "step": 2502
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.456702277321644,
      "learning_rate": 4.1154669190314307e-07,
      "loss": 0.2732,
      "step": 2503
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.306313481934988,
      "learning_rate": 4.1080487858808334e-07,
      "loss": 0.2913,
      "step": 2504
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1824814739733234,
      "learning_rate": 4.10063561573919e-07,
      "loss": 0.2578,
      "step": 2505
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3460665199937334,
      "learning_rate": 4.0932274148508863e-07,
      "loss": 0.2752,
      "step": 2506
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3729008838322247,
      "learning_rate": 4.085824189456135e-07,
      "loss": 0.2646,
      "step": 2507
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.423768847342455,
      "learning_rate": 4.0784259457909363e-07,
      "loss": 0.2674,
      "step": 2508
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.430893115112683,
      "learning_rate": 4.071032690087111e-07,
      "loss": 0.2574,
      "step": 2509
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4690843164366507,
      "learning_rate": 4.0636444285722684e-07,
      "loss": 0.2577,
      "step": 2510
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.293998049058781,
      "learning_rate": 4.056261167469818e-07,
      "loss": 0.2649,
      "step": 2511
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.295221692402047,
      "learning_rate": 4.048882912998953e-07,
      "loss": 0.2805,
      "step": 2512
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2638696374900342,
      "learning_rate": 4.0415096713746523e-07,
      "loss": 0.249,
      "step": 2513
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.286203414947763,
      "learning_rate": 4.0341414488076697e-07,
      "loss": 0.2508,
      "step": 2514
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5829535270097934,
      "learning_rate": 4.026778251504532e-07,
      "loss": 0.291,
      "step": 2515
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5086684577605354,
      "learning_rate": 4.0194200856675333e-07,
      "loss": 0.2999,
      "step": 2516
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.360608262561292,
      "learning_rate": 4.0120669574947297e-07,
      "loss": 0.2708,
      "step": 2517
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2937174901208697,
      "learning_rate": 4.0047188731799343e-07,
      "loss": 0.265,
      "step": 2518
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3099838395487176,
      "learning_rate": 3.99737583891271e-07,
      "loss": 0.2728,
      "step": 2519
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.574811131070446,
      "learning_rate": 3.9900378608783703e-07,
      "loss": 0.2842,
      "step": 2520
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.524084353539817,
      "learning_rate": 3.982704945257956e-07,
      "loss": 0.2706,
      "step": 2521
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5793952681688364,
      "learning_rate": 3.9753770982282654e-07,
      "loss": 0.265,
      "step": 2522
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.175096353388101,
      "learning_rate": 3.9680543259618103e-07,
      "loss": 0.2393,
      "step": 2523
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2034119506106253,
      "learning_rate": 3.960736634626838e-07,
      "loss": 0.259,
      "step": 2524
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.187282761189505,
      "learning_rate": 3.9534240303873e-07,
      "loss": 0.2573,
      "step": 2525
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2560105286323884,
      "learning_rate": 3.9461165194028854e-07,
      "loss": 0.2578,
      "step": 2526
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2917686683304423,
      "learning_rate": 3.9388141078289774e-07,
      "loss": 0.261,
      "step": 2527
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4807963920053657,
      "learning_rate": 3.9315168018166676e-07,
      "loss": 0.3061,
      "step": 2528
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.36922156845111,
      "learning_rate": 3.924224607512753e-07,
      "loss": 0.2702,
      "step": 2529
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3190444175720692,
      "learning_rate": 3.9169375310597054e-07,
      "loss": 0.2649,
      "step": 2530
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5630193048920256,
      "learning_rate": 3.909655578595713e-07,
      "loss": 0.2565,
      "step": 2531
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3259427043076393,
      "learning_rate": 3.9023787562546284e-07,
      "loss": 0.2595,
      "step": 2532
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2907661152770395,
      "learning_rate": 3.895107070165995e-07,
      "loss": 0.2744,
      "step": 2533
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1958089968134478,
      "learning_rate": 3.887840526455014e-07,
      "loss": 0.2606,
      "step": 2534
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0772019948377327,
      "learning_rate": 3.880579131242566e-07,
      "loss": 0.2445,
      "step": 2535
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4373504411076095,
      "learning_rate": 3.873322890645201e-07,
      "loss": 0.2693,
      "step": 2536
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5830036698453145,
      "learning_rate": 3.8660718107751176e-07,
      "loss": 0.2844,
      "step": 2537
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.484204909542249,
      "learning_rate": 3.8588258977401636e-07,
      "loss": 0.2637,
      "step": 2538
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3622160811014967,
      "learning_rate": 3.851585157643844e-07,
      "loss": 0.2927,
      "step": 2539
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3793320717187747,
      "learning_rate": 3.844349596585298e-07,
      "loss": 0.2663,
      "step": 2540
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.361965899246749,
      "learning_rate": 3.8371192206593174e-07,
      "loss": 0.2719,
      "step": 2541
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.384032621212719,
      "learning_rate": 3.8298940359563057e-07,
      "loss": 0.2671,
      "step": 2542
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6938610355413224,
      "learning_rate": 3.822674048562309e-07,
      "loss": 0.2581,
      "step": 2543
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3434426912533093,
      "learning_rate": 3.8154592645589877e-07,
      "loss": 0.2656,
      "step": 2544
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.465646994145014,
      "learning_rate": 3.808249690023624e-07,
      "loss": 0.2686,
      "step": 2545
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3528660319084183,
      "learning_rate": 3.801045331029108e-07,
      "loss": 0.2803,
      "step": 2546
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3126792038268906,
      "learning_rate": 3.79384619364394e-07,
      "loss": 0.2662,
      "step": 2547
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.381341397933177,
      "learning_rate": 3.78665228393222e-07,
      "loss": 0.2654,
      "step": 2548
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.427083572259114,
      "learning_rate": 3.7794636079536436e-07,
      "loss": 0.2747,
      "step": 2549
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5762959209498875,
      "learning_rate": 3.772280171763501e-07,
      "loss": 0.2803,
      "step": 2550
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.445020108164999,
      "learning_rate": 3.765101981412665e-07,
      "loss": 0.2679,
      "step": 2551
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3630588946128537,
      "learning_rate": 3.757929042947593e-07,
      "loss": 0.2836,
      "step": 2552
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3140210795068104,
      "learning_rate": 3.7507613624103165e-07,
      "loss": 0.2908,
      "step": 2553
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4066462075449193,
      "learning_rate": 3.743598945838438e-07,
      "loss": 0.3071,
      "step": 2554
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4619938252669757,
      "learning_rate": 3.7364417992651266e-07,
      "loss": 0.2352,
      "step": 2555
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3731347309841744,
      "learning_rate": 3.7292899287191125e-07,
      "loss": 0.2533,
      "step": 2556
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3363844072722357,
      "learning_rate": 3.7221433402246815e-07,
      "loss": 0.2865,
      "step": 2557
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5048502027566593,
      "learning_rate": 3.715002039801671e-07,
      "loss": 0.279,
      "step": 2558
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5234443399823583,
      "learning_rate": 3.707866033465461e-07,
      "loss": 0.2743,
      "step": 2559
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.461815368653574,
      "learning_rate": 3.700735327226976e-07,
      "loss": 0.2504,
      "step": 2560
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4228042349715087,
      "learning_rate": 3.6936099270926734e-07,
      "loss": 0.2828,
      "step": 2561
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.420179888833707,
      "learning_rate": 3.686489839064543e-07,
      "loss": 0.2846,
      "step": 2562
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4906239765975884,
      "learning_rate": 3.679375069140099e-07,
      "loss": 0.309,
      "step": 2563
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.289665637068872,
      "learning_rate": 3.6722656233123706e-07,
      "loss": 0.2536,
      "step": 2564
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3349740705759654,
      "learning_rate": 3.6651615075699137e-07,
      "loss": 0.2808,
      "step": 2565
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4114347304296344,
      "learning_rate": 3.658062727896788e-07,
      "loss": 0.2927,
      "step": 2566
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5596060525617372,
      "learning_rate": 3.6509692902725597e-07,
      "loss": 0.2798,
      "step": 2567
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.402246854525871,
      "learning_rate": 3.6438812006722885e-07,
      "loss": 0.3023,
      "step": 2568
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.369821125049631,
      "learning_rate": 3.636798465066536e-07,
      "loss": 0.2748,
      "step": 2569
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2644575106815457,
      "learning_rate": 3.629721089421359e-07,
      "loss": 0.2624,
      "step": 2570
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2918717960060118,
      "learning_rate": 3.6226490796982925e-07,
      "loss": 0.2728,
      "step": 2571
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1864303860737917,
      "learning_rate": 3.615582441854348e-07,
      "loss": 0.2352,
      "step": 2572
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4095803133808533,
      "learning_rate": 3.6085211818420167e-07,
      "loss": 0.3267,
      "step": 2573
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1223371485411473,
      "learning_rate": 3.6014653056092593e-07,
      "loss": 0.2633,
      "step": 2574
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.207700617626701,
      "learning_rate": 3.5944148190995073e-07,
      "loss": 0.2399,
      "step": 2575
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1559620057419866,
      "learning_rate": 3.587369728251647e-07,
      "loss": 0.2567,
      "step": 2576
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.403221431935094,
      "learning_rate": 3.5803300390000133e-07,
      "loss": 0.246,
      "step": 2577
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8409300056102214,
      "learning_rate": 3.5732957572744e-07,
      "loss": 0.2835,
      "step": 2578
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4152625007808504,
      "learning_rate": 3.5662668890000415e-07,
      "loss": 0.3176,
      "step": 2579
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.307449285104854,
      "learning_rate": 3.559243440097622e-07,
      "loss": 0.237,
      "step": 2580
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.0720001522452485,
      "learning_rate": 3.5522254164832456e-07,
      "loss": 0.3037,
      "step": 2581
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.432606930026203,
      "learning_rate": 3.5452128240684556e-07,
      "loss": 0.2782,
      "step": 2582
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2833098442405837,
      "learning_rate": 3.538205668760218e-07,
      "loss": 0.2605,
      "step": 2583
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.239400351839413,
      "learning_rate": 3.53120395646092e-07,
      "loss": 0.2636,
      "step": 2584
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.330486004814981,
      "learning_rate": 3.524207693068364e-07,
      "loss": 0.284,
      "step": 2585
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5464804766981235,
      "learning_rate": 3.517216884475762e-07,
      "loss": 0.2748,
      "step": 2586
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1444559881500664,
      "learning_rate": 3.5102315365717303e-07,
      "loss": 0.2737,
      "step": 2587
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.167108947304041,
      "learning_rate": 3.503251655240288e-07,
      "loss": 0.2714,
      "step": 2588
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.502123863730012,
      "learning_rate": 3.4962772463608457e-07,
      "loss": 0.258,
      "step": 2589
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.518883768069017,
      "learning_rate": 3.489308315808209e-07,
      "loss": 0.2844,
      "step": 2590
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.340826659734753,
      "learning_rate": 3.482344869452565e-07,
      "loss": 0.2684,
      "step": 2591
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3794594889726643,
      "learning_rate": 3.475386913159483e-07,
      "loss": 0.2825,
      "step": 2592
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3529290513987755,
      "learning_rate": 3.468434452789911e-07,
      "loss": 0.2599,
      "step": 2593
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4524978022353587,
      "learning_rate": 3.461487494200154e-07,
      "loss": 0.2631,
      "step": 2594
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.288189711546269,
      "learning_rate": 3.4545460432419036e-07,
      "loss": 0.2626,
      "step": 2595
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4783830699080887,
      "learning_rate": 3.4476101057621966e-07,
      "loss": 0.3027,
      "step": 2596
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3321205098430235,
      "learning_rate": 3.4406796876034317e-07,
      "loss": 0.2448,
      "step": 2597
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.334725211239991,
      "learning_rate": 3.433754794603355e-07,
      "loss": 0.2855,
      "step": 2598
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.259009020940633,
      "learning_rate": 3.426835432595063e-07,
      "loss": 0.2452,
      "step": 2599
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.504960317513778,
      "learning_rate": 3.4199216074069903e-07,
      "loss": 0.271,
      "step": 2600
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4533948927974816,
      "learning_rate": 3.4130133248629065e-07,
      "loss": 0.2929,
      "step": 2601
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1717537961063798,
      "learning_rate": 3.40611059078192e-07,
      "loss": 0.2325,
      "step": 2602
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.371460083952491,
      "learning_rate": 3.399213410978446e-07,
      "loss": 0.2476,
      "step": 2603
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7576954694182683,
      "learning_rate": 3.392321791262249e-07,
      "loss": 0.2599,
      "step": 2604
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3982745069910436,
      "learning_rate": 3.3854357374383903e-07,
      "loss": 0.2699,
      "step": 2605
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2094367962380894,
      "learning_rate": 3.3785552553072517e-07,
      "loss": 0.2328,
      "step": 2606
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.304369436989134,
      "learning_rate": 3.371680350664512e-07,
      "loss": 0.2697,
      "step": 2607
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3650813356848883,
      "learning_rate": 3.364811029301159e-07,
      "loss": 0.2831,
      "step": 2608
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2515919828201922,
      "learning_rate": 3.3579472970034814e-07,
      "loss": 0.2401,
      "step": 2609
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2827874062481417,
      "learning_rate": 3.3510891595530564e-07,
      "loss": 0.2491,
      "step": 2610
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1908715491993274,
      "learning_rate": 3.3442366227267425e-07,
      "loss": 0.2541,
      "step": 2611
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3717075447613394,
      "learning_rate": 3.337389692296686e-07,
      "loss": 0.2761,
      "step": 2612
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2872731055219195,
      "learning_rate": 3.330548374030309e-07,
      "loss": 0.2577,
      "step": 2613
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3441712807359547,
      "learning_rate": 3.3237126736903166e-07,
      "loss": 0.2887,
      "step": 2614
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.490986134566394,
      "learning_rate": 3.316882597034663e-07,
      "loss": 0.2656,
      "step": 2615
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2382706884208705,
      "learning_rate": 3.3100581498165783e-07,
      "loss": 0.2658,
      "step": 2616
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.324163839691007,
      "learning_rate": 3.303239337784547e-07,
      "loss": 0.2477,
      "step": 2617
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2028924069980684,
      "learning_rate": 3.296426166682303e-07,
      "loss": 0.2943,
      "step": 2618
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7641272306318703,
      "learning_rate": 3.289618642248846e-07,
      "loss": 0.2767,
      "step": 2619
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.194964291798905,
      "learning_rate": 3.282816770218394e-07,
      "loss": 0.2871,
      "step": 2620
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.357021323177236,
      "learning_rate": 3.276020556320419e-07,
      "loss": 0.2768,
      "step": 2621
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.654814264416529,
      "learning_rate": 3.2692300062796254e-07,
      "loss": 0.3321,
      "step": 2622
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.29602703091403,
      "learning_rate": 3.2624451258159447e-07,
      "loss": 0.2763,
      "step": 2623
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4739428126752783,
      "learning_rate": 3.2556659206445327e-07,
      "loss": 0.2721,
      "step": 2624
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.725494800009238,
      "learning_rate": 3.248892396475765e-07,
      "loss": 0.2962,
      "step": 2625
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2994857273251244,
      "learning_rate": 3.2421245590152335e-07,
      "loss": 0.2613,
      "step": 2626
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.265001633392082,
      "learning_rate": 3.235362413963738e-07,
      "loss": 0.2489,
      "step": 2627
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.565517431965826,
      "learning_rate": 3.228605967017284e-07,
      "loss": 0.2866,
      "step": 2628
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.032018556302911,
      "learning_rate": 3.221855223867076e-07,
      "loss": 0.2603,
      "step": 2629
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.36995929708129,
      "learning_rate": 3.215110190199518e-07,
      "loss": 0.275,
      "step": 2630
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2582387013579166,
      "learning_rate": 3.2083708716961986e-07,
      "loss": 0.2719,
      "step": 2631
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.6561695353472348,
      "learning_rate": 3.201637274033899e-07,
      "loss": 0.2949,
      "step": 2632
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1972133812785444,
      "learning_rate": 3.194909402884576e-07,
      "loss": 0.262,
      "step": 2633
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4304389915393703,
      "learning_rate": 3.188187263915365e-07,
      "loss": 0.2677,
      "step": 2634
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.374947867030814,
      "learning_rate": 3.181470862788573e-07,
      "loss": 0.2901,
      "step": 2635
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3264780260706974,
      "learning_rate": 3.174760205161678e-07,
      "loss": 0.2657,
      "step": 2636
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.418903400270687,
      "learning_rate": 3.168055296687305e-07,
      "loss": 0.282,
      "step": 2637
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.25516044788765,
      "learning_rate": 3.161356143013257e-07,
      "loss": 0.2631,
      "step": 2638
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3131577049947527,
      "learning_rate": 3.154662749782476e-07,
      "loss": 0.2647,
      "step": 2639
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3814425730813724,
      "learning_rate": 3.1479751226330566e-07,
      "loss": 0.2769,
      "step": 2640
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5373863660884393,
      "learning_rate": 3.141293267198236e-07,
      "loss": 0.2953,
      "step": 2641
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4174812864003847,
      "learning_rate": 3.13461718910638e-07,
      "loss": 0.2943,
      "step": 2642
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3206104540840973,
      "learning_rate": 3.127946893981008e-07,
      "loss": 0.2671,
      "step": 2643
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.5344070466388873,
      "learning_rate": 3.1212823874407513e-07,
      "loss": 0.3006,
      "step": 2644
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.344494797925107,
      "learning_rate": 3.1146236750993757e-07,
      "loss": 0.2756,
      "step": 2645
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.250948017574977,
      "learning_rate": 3.107970762565755e-07,
      "loss": 0.2626,
      "step": 2646
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.342448369121599,
      "learning_rate": 3.1013236554438817e-07,
      "loss": 0.2823,
      "step": 2647
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4801146316539855,
      "learning_rate": 3.094682359332871e-07,
      "loss": 0.2701,
      "step": 2648
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3519971171584273,
      "learning_rate": 3.0880468798269286e-07,
      "loss": 0.245,
      "step": 2649
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3023529595295553,
      "learning_rate": 3.0814172225153623e-07,
      "loss": 0.2862,
      "step": 2650
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.479935156267912,
      "learning_rate": 3.0747933929825786e-07,
      "loss": 0.2595,
      "step": 2651
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2767570811714357,
      "learning_rate": 3.0681753968080735e-07,
      "loss": 0.2454,
      "step": 2652
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.405319864949606,
      "learning_rate": 3.061563239566439e-07,
      "loss": 0.2668,
      "step": 2653
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3523938041145374,
      "learning_rate": 3.0549569268273314e-07,
      "loss": 0.271,
      "step": 2654
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2832859994293195,
      "learning_rate": 3.048356464155495e-07,
      "loss": 0.2562,
      "step": 2655
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1814233292526097,
      "learning_rate": 3.041761857110744e-07,
      "loss": 0.2458,
      "step": 2656
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3524324132974135,
      "learning_rate": 3.0351731112479627e-07,
      "loss": 0.2521,
      "step": 2657
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4893308737060544,
      "learning_rate": 3.0285902321170943e-07,
      "loss": 0.2848,
      "step": 2658
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.58810016842196,
      "learning_rate": 3.0220132252631416e-07,
      "loss": 0.3069,
      "step": 2659
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.684462905942372,
      "learning_rate": 3.015442096226163e-07,
      "loss": 0.2914,
      "step": 2660
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.5481577407259155,
      "learning_rate": 3.008876850541262e-07,
      "loss": 0.2866,
      "step": 2661
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4742375445466247,
      "learning_rate": 3.00231749373859e-07,
      "loss": 0.2657,
      "step": 2662
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.418562491235517,
      "learning_rate": 2.995764031343336e-07,
      "loss": 0.269,
      "step": 2663
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.238527067720981,
      "learning_rate": 2.989216468875725e-07,
      "loss": 0.2661,
      "step": 2664
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3820690839491427,
      "learning_rate": 2.9826748118510106e-07,
      "loss": 0.2402,
      "step": 2665
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5059711601939862,
      "learning_rate": 2.9761390657794727e-07,
      "loss": 0.2816,
      "step": 2666
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.336363737866262,
      "learning_rate": 2.9696092361664125e-07,
      "loss": 0.2732,
      "step": 2667
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2351641737827395,
      "learning_rate": 2.96308532851215e-07,
      "loss": 0.2638,
      "step": 2668
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.455015050921343,
      "learning_rate": 2.956567348312012e-07,
      "loss": 0.2741,
      "step": 2669
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4782570931707997,
      "learning_rate": 2.9500553010563356e-07,
      "loss": 0.2617,
      "step": 2670
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.412270563323574,
      "learning_rate": 2.94354919223046e-07,
      "loss": 0.2446,
      "step": 2671
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.208344037910064,
      "learning_rate": 2.9370490273147217e-07,
      "loss": 0.2516,
      "step": 2672
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.651043462680491,
      "learning_rate": 2.9305548117844504e-07,
      "loss": 0.2722,
      "step": 2673
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4735510597437114,
      "learning_rate": 2.9240665511099636e-07,
      "loss": 0.2675,
      "step": 2674
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.258433909290774,
      "learning_rate": 2.9175842507565695e-07,
      "loss": 0.2557,
      "step": 2675
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.408322811827988,
      "learning_rate": 2.911107916184539e-07,
      "loss": 0.2982,
      "step": 2676
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3516191496170067,
      "learning_rate": 2.9046375528491376e-07,
      "loss": 0.2785,
      "step": 2677
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2900556855731247,
      "learning_rate": 2.89817316620059e-07,
      "loss": 0.2639,
      "step": 2678
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.0394892227608734,
      "learning_rate": 2.891714761684093e-07,
      "loss": 0.272,
      "step": 2679
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2794822571116966,
      "learning_rate": 2.8852623447397915e-07,
      "loss": 0.248,
      "step": 2680
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.313592477288886,
      "learning_rate": 2.8788159208027973e-07,
      "loss": 0.2649,
      "step": 2681
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.537623352307677,
      "learning_rate": 2.8723754953031777e-07,
      "loss": 0.2874,
      "step": 2682
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.492465481071438,
      "learning_rate": 2.8659410736659416e-07,
      "loss": 0.267,
      "step": 2683
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5525190678003082,
      "learning_rate": 2.8595126613110363e-07,
      "loss": 0.2814,
      "step": 2684
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6229646083995553,
      "learning_rate": 2.853090263653354e-07,
      "loss": 0.3226,
      "step": 2685
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2844022457053117,
      "learning_rate": 2.846673886102714e-07,
      "loss": 0.2898,
      "step": 2686
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3647776025257463,
      "learning_rate": 2.840263534063877e-07,
      "loss": 0.263,
      "step": 2687
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2855963169893183,
      "learning_rate": 2.833859212936519e-07,
      "loss": 0.272,
      "step": 2688
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.765864320378852,
      "learning_rate": 2.827460928115232e-07,
      "loss": 0.2951,
      "step": 2689
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7069540638044005,
      "learning_rate": 2.8210686849895307e-07,
      "loss": 0.3033,
      "step": 2690
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.353518345754197,
      "learning_rate": 2.8146824889438356e-07,
      "loss": 0.3012,
      "step": 2691
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2556279108688435,
      "learning_rate": 2.808302345357486e-07,
      "loss": 0.2602,
      "step": 2692
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.515343160953233,
      "learning_rate": 2.8019282596047046e-07,
      "loss": 0.2657,
      "step": 2693
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5698374203328034,
      "learning_rate": 2.7955602370546227e-07,
      "loss": 0.2823,
      "step": 2694
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.372728140279168,
      "learning_rate": 2.789198283071261e-07,
      "loss": 0.2836,
      "step": 2695
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.528942553797293,
      "learning_rate": 2.78284240301353e-07,
      "loss": 0.2802,
      "step": 2696
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3999481678317376,
      "learning_rate": 2.776492602235223e-07,
      "loss": 0.302,
      "step": 2697
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.334677905881842,
      "learning_rate": 2.770148886085013e-07,
      "loss": 0.259,
      "step": 2698
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1839619355909523,
      "learning_rate": 2.763811259906447e-07,
      "loss": 0.268,
      "step": 2699
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.266845888284121,
      "learning_rate": 2.7574797290379413e-07,
      "loss": 0.232,
      "step": 2700
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.480842434672786,
      "learning_rate": 2.751154298812781e-07,
      "loss": 0.2678,
      "step": 2701
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2677658357528414,
      "learning_rate": 2.74483497455911e-07,
      "loss": 0.2665,
      "step": 2702
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4011260292496845,
      "learning_rate": 2.73852176159993e-07,
      "loss": 0.2534,
      "step": 2703
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.253785637191656,
      "learning_rate": 2.732214665253092e-07,
      "loss": 0.2379,
      "step": 2704
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.602202388621679,
      "learning_rate": 2.7259136908312995e-07,
      "loss": 0.3228,
      "step": 2705
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3452687489865895,
      "learning_rate": 2.719618843642095e-07,
      "loss": 0.2966,
      "step": 2706
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.184038611317654,
      "learning_rate": 2.713330128987864e-07,
      "loss": 0.2524,
      "step": 2707
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3034671990517026,
      "learning_rate": 2.707047552165822e-07,
      "loss": 0.2575,
      "step": 2708
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.405072030453159,
      "learning_rate": 2.700771118468017e-07,
      "loss": 0.2868,
      "step": 2709
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.252834431948804,
      "learning_rate": 2.6945008331813224e-07,
      "loss": 0.2549,
      "step": 2710
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6519575726519546,
      "learning_rate": 2.688236701587431e-07,
      "loss": 0.3061,
      "step": 2711
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3694153979510038,
      "learning_rate": 2.6819787289628526e-07,
      "loss": 0.2777,
      "step": 2712
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5804112331460813,
      "learning_rate": 2.6757269205789113e-07,
      "loss": 0.2905,
      "step": 2713
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3565782140319422,
      "learning_rate": 2.6694812817017387e-07,
      "loss": 0.267,
      "step": 2714
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3170272128324116,
      "learning_rate": 2.663241817592261e-07,
      "loss": 0.2478,
      "step": 2715
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3698915573154906,
      "learning_rate": 2.6570085335062164e-07,
      "loss": 0.2489,
      "step": 2716
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2915645771544253,
      "learning_rate": 2.6507814346941293e-07,
      "loss": 0.2934,
      "step": 2717
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.268037849281149,
      "learning_rate": 2.64456052640132e-07,
      "loss": 0.2947,
      "step": 2718
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.402245114365464,
      "learning_rate": 2.6383458138678827e-07,
      "loss": 0.2441,
      "step": 2719
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.433258683555696,
      "learning_rate": 2.6321373023287007e-07,
      "loss": 0.2361,
      "step": 2720
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.638457709606131,
      "learning_rate": 2.6259349970134403e-07,
      "loss": 0.2723,
      "step": 2721
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1506166647148444,
      "learning_rate": 2.6197389031465324e-07,
      "loss": 0.2288,
      "step": 2722
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2800718518628473,
      "learning_rate": 2.613549025947169e-07,
      "loss": 0.2345,
      "step": 2723
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2215835594373874,
      "learning_rate": 2.60736537062932e-07,
      "loss": 0.245,
      "step": 2724
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4457713208187712,
      "learning_rate": 2.6011879424017005e-07,
      "loss": 0.3009,
      "step": 2725
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3109213740501753,
      "learning_rate": 2.5950167464677985e-07,
      "loss": 0.2648,
      "step": 2726
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.367702983358573,
      "learning_rate": 2.588851788025832e-07,
      "loss": 0.2656,
      "step": 2727
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.267989855733225,
      "learning_rate": 2.582693072268778e-07,
      "loss": 0.2742,
      "step": 2728
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3799453877005488,
      "learning_rate": 2.5765406043843483e-07,
      "loss": 0.2879,
      "step": 2729
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4183363741864863,
      "learning_rate": 2.5703943895549975e-07,
      "loss": 0.2452,
      "step": 2730
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.352119842346004,
      "learning_rate": 2.5642544329579085e-07,
      "loss": 0.2769,
      "step": 2731
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3630801759639217,
      "learning_rate": 2.558120739764995e-07,
      "loss": 0.2806,
      "step": 2732
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.511379597161267,
      "learning_rate": 2.551993315142894e-07,
      "loss": 0.2976,
      "step": 2733
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1875569102599113,
      "learning_rate": 2.5458721642529637e-07,
      "loss": 0.2228,
      "step": 2734
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3201124027880544,
      "learning_rate": 2.5397572922512735e-07,
      "loss": 0.2644,
      "step": 2735
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5622681136534515,
      "learning_rate": 2.53364870428861e-07,
      "loss": 0.2763,
      "step": 2736
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.650004763755733,
      "learning_rate": 2.527546405510461e-07,
      "loss": 0.2547,
      "step": 2737
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.814911905875072,
      "learning_rate": 2.5214504010570214e-07,
      "loss": 0.2653,
      "step": 2738
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2147385657579566,
      "learning_rate": 2.515360696063179e-07,
      "loss": 0.245,
      "step": 2739
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.306210277214575,
      "learning_rate": 2.5092772956585205e-07,
      "loss": 0.269,
      "step": 2740
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.239930705597454,
      "learning_rate": 2.503200204967317e-07,
      "loss": 0.2716,
      "step": 2741
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2169610526187618,
      "learning_rate": 2.497129429108531e-07,
      "loss": 0.2659,
      "step": 2742
    },
    {
      "epoch": 0.78,
      "grad_norm": 5.314771526733961,
      "learning_rate": 2.491064973195798e-07,
      "loss": 0.2949,
      "step": 2743
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.464722741180483,
      "learning_rate": 2.485006842337437e-07,
      "loss": 0.2622,
      "step": 2744
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4414530378845556,
      "learning_rate": 2.4789550416364347e-07,
      "loss": 0.309,
      "step": 2745
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3966166894823697,
      "learning_rate": 2.4729095761904483e-07,
      "loss": 0.2892,
      "step": 2746
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.293602180410425,
      "learning_rate": 2.466870451091796e-07,
      "loss": 0.2568,
      "step": 2747
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.570710790990384,
      "learning_rate": 2.4608376714274617e-07,
      "loss": 0.2488,
      "step": 2748
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.384964366245312,
      "learning_rate": 2.454811242279069e-07,
      "loss": 0.2628,
      "step": 2749
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.410683574063837,
      "learning_rate": 2.4487911687229113e-07,
      "loss": 0.2534,
      "step": 2750
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3339434682085076,
      "learning_rate": 2.4427774558299185e-07,
      "loss": 0.2967,
      "step": 2751
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2911955493857046,
      "learning_rate": 2.4367701086656624e-07,
      "loss": 0.2943,
      "step": 2752
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3451456650946283,
      "learning_rate": 2.430769132290357e-07,
      "loss": 0.2765,
      "step": 2753
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7071698183330892,
      "learning_rate": 2.4247745317588397e-07,
      "loss": 0.3126,
      "step": 2754
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3761776008143247,
      "learning_rate": 2.418786312120593e-07,
      "loss": 0.2556,
      "step": 2755
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.188824650286055,
      "learning_rate": 2.412804478419712e-07,
      "loss": 0.2809,
      "step": 2756
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3791611004508746,
      "learning_rate": 2.406829035694923e-07,
      "loss": 0.2718,
      "step": 2757
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3031708128532293,
      "learning_rate": 2.400859988979554e-07,
      "loss": 0.2666,
      "step": 2758
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.507943026823091,
      "learning_rate": 2.394897343301556e-07,
      "loss": 0.2832,
      "step": 2759
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1908843560087625,
      "learning_rate": 2.388941103683493e-07,
      "loss": 0.242,
      "step": 2760
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3430749497591057,
      "learning_rate": 2.382991275142524e-07,
      "loss": 0.2607,
      "step": 2761
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4206029000098783,
      "learning_rate": 2.3770478626904068e-07,
      "loss": 0.2676,
      "step": 2762
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.340638141700092,
      "learning_rate": 2.3711108713334994e-07,
      "loss": 0.2575,
      "step": 2763
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.398508636026643,
      "learning_rate": 2.3651803060727482e-07,
      "loss": 0.2855,
      "step": 2764
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4459378713673647,
      "learning_rate": 2.3592561719036952e-07,
      "loss": 0.2749,
      "step": 2765
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.423230704899453,
      "learning_rate": 2.3533384738164508e-07,
      "loss": 0.2519,
      "step": 2766
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0438979344407984,
      "learning_rate": 2.3474272167957143e-07,
      "loss": 0.2805,
      "step": 2767
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3500512947448704,
      "learning_rate": 2.341522405820756e-07,
      "loss": 0.2851,
      "step": 2768
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3808000861680303,
      "learning_rate": 2.3356240458654185e-07,
      "loss": 0.2639,
      "step": 2769
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5585487785990346,
      "learning_rate": 2.3297321418981075e-07,
      "loss": 0.2867,
      "step": 2770
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0898916437412027,
      "learning_rate": 2.3238466988817928e-07,
      "loss": 0.2489,
      "step": 2771
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.410983630793635,
      "learning_rate": 2.3179677217740013e-07,
      "loss": 0.2751,
      "step": 2772
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2514253584456005,
      "learning_rate": 2.3120952155268137e-07,
      "loss": 0.2576,
      "step": 2773
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4578035921751096,
      "learning_rate": 2.3062291850868588e-07,
      "loss": 0.2676,
      "step": 2774
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.185950464556516,
      "learning_rate": 2.3003696353953117e-07,
      "loss": 0.2439,
      "step": 2775
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.202339342263721,
      "learning_rate": 2.29451657138789e-07,
      "loss": 0.2578,
      "step": 2776
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3231308517243865,
      "learning_rate": 2.2886699979948444e-07,
      "loss": 0.2893,
      "step": 2777
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.235137483563809,
      "learning_rate": 2.2828299201409617e-07,
      "loss": 0.2766,
      "step": 2778
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.194310608489236,
      "learning_rate": 2.2769963427455552e-07,
      "loss": 0.2535,
      "step": 2779
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.372712133821499,
      "learning_rate": 2.2711692707224639e-07,
      "loss": 0.2858,
      "step": 2780
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.214835367155243,
      "learning_rate": 2.265348708980046e-07,
      "loss": 0.2615,
      "step": 2781
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6208064832788542,
      "learning_rate": 2.2595346624211786e-07,
      "loss": 0.2892,
      "step": 2782
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4452656131894877,
      "learning_rate": 2.2537271359432454e-07,
      "loss": 0.2791,
      "step": 2783
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.400722142292888,
      "learning_rate": 2.247926134438144e-07,
      "loss": 0.2646,
      "step": 2784
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3866676906776614,
      "learning_rate": 2.2421316627922715e-07,
      "loss": 0.2674,
      "step": 2785
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4233247286668496,
      "learning_rate": 2.236343725886527e-07,
      "loss": 0.2808,
      "step": 2786
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.291558351108696,
      "learning_rate": 2.230562328596306e-07,
      "loss": 0.2673,
      "step": 2787
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.343505929479789,
      "learning_rate": 2.2247874757914864e-07,
      "loss": 0.267,
      "step": 2788
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.189849698105635,
      "learning_rate": 2.2190191723364492e-07,
      "loss": 0.2542,
      "step": 2789
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.421695068222722,
      "learning_rate": 2.2132574230900482e-07,
      "loss": 0.2883,
      "step": 2790
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3300957788556116,
      "learning_rate": 2.2075022329056192e-07,
      "loss": 0.2993,
      "step": 2791
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.383095763086712,
      "learning_rate": 2.2017536066309684e-07,
      "loss": 0.2906,
      "step": 2792
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1953601942491043,
      "learning_rate": 2.1960115491083752e-07,
      "loss": 0.237,
      "step": 2793
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.307913404713227,
      "learning_rate": 2.1902760651745954e-07,
      "loss": 0.2765,
      "step": 2794
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4715336117778723,
      "learning_rate": 2.1845471596608378e-07,
      "loss": 0.278,
      "step": 2795
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2935559162523127,
      "learning_rate": 2.1788248373927675e-07,
      "loss": 0.2311,
      "step": 2796
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.9425745468248645,
      "learning_rate": 2.1731091031905113e-07,
      "loss": 0.2453,
      "step": 2797
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2150854366589776,
      "learning_rate": 2.16739996186864e-07,
      "loss": 0.2515,
      "step": 2798
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.353730791125616,
      "learning_rate": 2.1616974182361825e-07,
      "loss": 0.2687,
      "step": 2799
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.346278524063355,
      "learning_rate": 2.1560014770966006e-07,
      "loss": 0.264,
      "step": 2800
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3421743418395375,
      "learning_rate": 2.1503121432477932e-07,
      "loss": 0.2769,
      "step": 2801
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.362493254521407,
      "learning_rate": 2.1446294214820991e-07,
      "loss": 0.244,
      "step": 2802
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.390546907904252,
      "learning_rate": 2.1389533165862826e-07,
      "loss": 0.2489,
      "step": 2803
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.298123026178687,
      "learning_rate": 2.1332838333415447e-07,
      "loss": 0.2389,
      "step": 2804
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.22722586200972,
      "learning_rate": 2.1276209765234954e-07,
      "loss": 0.2404,
      "step": 2805
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.399010284462047,
      "learning_rate": 2.1219647509021698e-07,
      "loss": 0.2709,
      "step": 2806
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.434832098004782,
      "learning_rate": 2.116315161242015e-07,
      "loss": 0.2922,
      "step": 2807
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.227811226214471,
      "learning_rate": 2.110672212301896e-07,
      "loss": 0.2842,
      "step": 2808
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2952246407389745,
      "learning_rate": 2.1050359088350723e-07,
      "loss": 0.2774,
      "step": 2809
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.50504305057805,
      "learning_rate": 2.0994062555892123e-07,
      "loss": 0.2699,
      "step": 2810
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.342581244971091,
      "learning_rate": 2.0937832573063818e-07,
      "loss": 0.2346,
      "step": 2811
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2551191727768205,
      "learning_rate": 2.088166918723041e-07,
      "loss": 0.2472,
      "step": 2812
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2456476952574276,
      "learning_rate": 2.0825572445700401e-07,
      "loss": 0.2725,
      "step": 2813
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3033253017776203,
      "learning_rate": 2.076954239572616e-07,
      "loss": 0.2669,
      "step": 2814
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.388877308526638,
      "learning_rate": 2.0713579084503873e-07,
      "loss": 0.2724,
      "step": 2815
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3343989502499274,
      "learning_rate": 2.0657682559173506e-07,
      "loss": 0.2987,
      "step": 2816
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2413289141575885,
      "learning_rate": 2.060185286681878e-07,
      "loss": 0.2364,
      "step": 2817
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.520433126733271,
      "learning_rate": 2.0546090054467114e-07,
      "loss": 0.2692,
      "step": 2818
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4097024486795804,
      "learning_rate": 2.0490394169089597e-07,
      "loss": 0.2538,
      "step": 2819
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.243358516588946,
      "learning_rate": 2.0434765257600928e-07,
      "loss": 0.2506,
      "step": 2820
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5078519127537215,
      "learning_rate": 2.037920336685941e-07,
      "loss": 0.2922,
      "step": 2821
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4605547304027033,
      "learning_rate": 2.0323708543666883e-07,
      "loss": 0.2951,
      "step": 2822
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.293726838612549,
      "learning_rate": 2.0268280834768692e-07,
      "loss": 0.2494,
      "step": 2823
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4860982798648648,
      "learning_rate": 2.021292028685365e-07,
      "loss": 0.283,
      "step": 2824
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2593332441880984,
      "learning_rate": 2.0157626946553995e-07,
      "loss": 0.2997,
      "step": 2825
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.445996744842272,
      "learning_rate": 2.01024008604454e-07,
      "loss": 0.2568,
      "step": 2826
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1971138861459263,
      "learning_rate": 2.0047242075046744e-07,
      "loss": 0.246,
      "step": 2827
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.592379392720621,
      "learning_rate": 1.9992150636820415e-07,
      "loss": 0.2868,
      "step": 2828
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4169149111598256,
      "learning_rate": 1.993712659217194e-07,
      "loss": 0.2714,
      "step": 2829
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4872947332284334,
      "learning_rate": 1.9882169987450138e-07,
      "loss": 0.273,
      "step": 2830
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.483937778500142,
      "learning_rate": 1.982728086894694e-07,
      "loss": 0.2737,
      "step": 2831
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1706509090641597,
      "learning_rate": 1.977245928289748e-07,
      "loss": 0.2529,
      "step": 2832
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.210854981564041,
      "learning_rate": 1.971770527548008e-07,
      "loss": 0.2585,
      "step": 2833
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.67382194381519,
      "learning_rate": 1.9663018892816063e-07,
      "loss": 0.2802,
      "step": 2834
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2123984409162762,
      "learning_rate": 1.9608400180969743e-07,
      "loss": 0.2357,
      "step": 2835
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5011636033003968,
      "learning_rate": 1.9553849185948512e-07,
      "loss": 0.2659,
      "step": 2836
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3650456015272416,
      "learning_rate": 1.9499365953702674e-07,
      "loss": 0.3112,
      "step": 2837
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3662359880050072,
      "learning_rate": 1.9444950530125548e-07,
      "loss": 0.2701,
      "step": 2838
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3835584666707947,
      "learning_rate": 1.9390602961053194e-07,
      "loss": 0.2649,
      "step": 2839
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.596380722618807,
      "learning_rate": 1.933632329226459e-07,
      "loss": 0.286,
      "step": 2840
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7967913473061157,
      "learning_rate": 1.9282111569481506e-07,
      "loss": 0.2663,
      "step": 2841
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2110276623027456,
      "learning_rate": 1.9227967838368564e-07,
      "loss": 0.2591,
      "step": 2842
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.249417596052053,
      "learning_rate": 1.9173892144532956e-07,
      "loss": 0.2357,
      "step": 2843
    },
    {
      "epoch": 0.81,
      "grad_norm": 4.700061186446766,
      "learning_rate": 1.9119884533524665e-07,
      "loss": 0.2586,
      "step": 2844
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0079039657582887,
      "learning_rate": 1.9065945050836297e-07,
      "loss": 0.2734,
      "step": 2845
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.594893242581233,
      "learning_rate": 1.9012073741903068e-07,
      "loss": 0.2745,
      "step": 2846
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4748954516830683,
      "learning_rate": 1.8958270652102858e-07,
      "loss": 0.2767,
      "step": 2847
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5129604741794878,
      "learning_rate": 1.8904535826755908e-07,
      "loss": 0.2863,
      "step": 2848
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5981441940170984,
      "learning_rate": 1.8850869311125096e-07,
      "loss": 0.2985,
      "step": 2849
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3356548455202435,
      "learning_rate": 1.8797271150415705e-07,
      "loss": 0.2584,
      "step": 2850
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.1820448800506997,
      "learning_rate": 1.8743741389775469e-07,
      "loss": 0.2552,
      "step": 2851
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.23023753403099,
      "learning_rate": 1.8690280074294473e-07,
      "loss": 0.2587,
      "step": 2852
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5679736393340393,
      "learning_rate": 1.8636887249005174e-07,
      "loss": 0.2795,
      "step": 2853
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.177529079557188,
      "learning_rate": 1.8583562958882327e-07,
      "loss": 0.2363,
      "step": 2854
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4742523821737907,
      "learning_rate": 1.853030724884297e-07,
      "loss": 0.3092,
      "step": 2855
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2885878314952874,
      "learning_rate": 1.847712016374634e-07,
      "loss": 0.2582,
      "step": 2856
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.74968714414436,
      "learning_rate": 1.8424001748393904e-07,
      "loss": 0.2926,
      "step": 2857
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2223393631269373,
      "learning_rate": 1.8370952047529263e-07,
      "loss": 0.2478,
      "step": 2858
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2864500433093164,
      "learning_rate": 1.831797110583817e-07,
      "loss": 0.2364,
      "step": 2859
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4266795093411857,
      "learning_rate": 1.8265058967948433e-07,
      "loss": 0.2833,
      "step": 2860
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.609719240431143,
      "learning_rate": 1.8212215678429854e-07,
      "loss": 0.2891,
      "step": 2861
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4925100660920267,
      "learning_rate": 1.8159441281794352e-07,
      "loss": 0.2934,
      "step": 2862
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5036731108979406,
      "learning_rate": 1.8106735822495744e-07,
      "loss": 0.2703,
      "step": 2863
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.302888816291254,
      "learning_rate": 1.805409934492983e-07,
      "loss": 0.2605,
      "step": 2864
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3070765097071737,
      "learning_rate": 1.8001531893434185e-07,
      "loss": 0.2479,
      "step": 2865
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.511833666867204,
      "learning_rate": 1.7949033512288346e-07,
      "loss": 0.2459,
      "step": 2866
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.8799279280843644,
      "learning_rate": 1.7896604245713686e-07,
      "loss": 0.2613,
      "step": 2867
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.144326368427733,
      "learning_rate": 1.7844244137873298e-07,
      "loss": 0.2169,
      "step": 2868
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3584488923021802,
      "learning_rate": 1.779195323287208e-07,
      "loss": 0.2616,
      "step": 2869
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.705678283702525,
      "learning_rate": 1.7739731574756522e-07,
      "loss": 0.2795,
      "step": 2870
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2011956923784397,
      "learning_rate": 1.768757920751489e-07,
      "loss": 0.2416,
      "step": 2871
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2331302113818396,
      "learning_rate": 1.7635496175077081e-07,
      "loss": 0.265,
      "step": 2872
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3523724278598275,
      "learning_rate": 1.7583482521314595e-07,
      "loss": 0.2821,
      "step": 2873
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.9974888016714254,
      "learning_rate": 1.7531538290040382e-07,
      "loss": 0.2884,
      "step": 2874
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.55439064748466,
      "learning_rate": 1.7479663525009037e-07,
      "loss": 0.2989,
      "step": 2875
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3600553815923035,
      "learning_rate": 1.7427858269916563e-07,
      "loss": 0.259,
      "step": 2876
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3988600611863737,
      "learning_rate": 1.737612256840053e-07,
      "loss": 0.2789,
      "step": 2877
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3773615435211157,
      "learning_rate": 1.732445646403975e-07,
      "loss": 0.2787,
      "step": 2878
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.467114711446906,
      "learning_rate": 1.7272860000354538e-07,
      "loss": 0.27,
      "step": 2879
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.227839666993782,
      "learning_rate": 1.7221333220806477e-07,
      "loss": 0.2531,
      "step": 2880
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3135070130291506,
      "learning_rate": 1.7169876168798558e-07,
      "loss": 0.2465,
      "step": 2881
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.539791939043467,
      "learning_rate": 1.7118488887674887e-07,
      "loss": 0.2816,
      "step": 2882
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6893138314421017,
      "learning_rate": 1.7067171420720904e-07,
      "loss": 0.3075,
      "step": 2883
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2844568548404656,
      "learning_rate": 1.7015923811163224e-07,
      "loss": 0.2549,
      "step": 2884
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5417677784666646,
      "learning_rate": 1.696474610216958e-07,
      "loss": 0.3035,
      "step": 2885
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.283990586546514,
      "learning_rate": 1.691363833684889e-07,
      "loss": 0.2711,
      "step": 2886
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4459137162379867,
      "learning_rate": 1.6862600558251095e-07,
      "loss": 0.2835,
      "step": 2887
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3833279688766233,
      "learning_rate": 1.6811632809367204e-07,
      "loss": 0.2697,
      "step": 2888
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5303782470283664,
      "learning_rate": 1.6760735133129267e-07,
      "loss": 0.268,
      "step": 2889
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.232816947627661,
      "learning_rate": 1.6709907572410265e-07,
      "loss": 0.263,
      "step": 2890
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.296004011835285,
      "learning_rate": 1.665915017002414e-07,
      "loss": 0.246,
      "step": 2891
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.339328657097091,
      "learning_rate": 1.6608462968725733e-07,
      "loss": 0.2813,
      "step": 2892
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.188178965593008,
      "learning_rate": 1.6557846011210751e-07,
      "loss": 0.2206,
      "step": 2893
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5645938693793346,
      "learning_rate": 1.6507299340115744e-07,
      "loss": 0.2885,
      "step": 2894
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.339243501741995,
      "learning_rate": 1.645682299801804e-07,
      "loss": 0.2825,
      "step": 2895
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.367728617536032,
      "learning_rate": 1.6406417027435727e-07,
      "loss": 0.2447,
      "step": 2896
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.462794940200196,
      "learning_rate": 1.6356081470827633e-07,
      "loss": 0.2586,
      "step": 2897
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.9373717395321797,
      "learning_rate": 1.6305816370593262e-07,
      "loss": 0.2825,
      "step": 2898
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2061403889425364,
      "learning_rate": 1.6255621769072803e-07,
      "loss": 0.2256,
      "step": 2899
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.255180702409096,
      "learning_rate": 1.6205497708546933e-07,
      "loss": 0.2343,
      "step": 2900
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.326546351934489,
      "learning_rate": 1.6155444231237104e-07,
      "loss": 0.2713,
      "step": 2901
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.9170467115769405,
      "learning_rate": 1.6105461379305186e-07,
      "loss": 0.2874,
      "step": 2902
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.400555296130694,
      "learning_rate": 1.60555491948536e-07,
      "loss": 0.255,
      "step": 2903
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0770253089306814,
      "learning_rate": 1.6005707719925188e-07,
      "loss": 0.2383,
      "step": 2904
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.264987088165909,
      "learning_rate": 1.5955936996503284e-07,
      "loss": 0.2391,
      "step": 2905
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.337389217903713,
      "learning_rate": 1.590623706651164e-07,
      "loss": 0.2457,
      "step": 2906
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2921471576670096,
      "learning_rate": 1.5856607971814374e-07,
      "loss": 0.2613,
      "step": 2907
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.301923660369795,
      "learning_rate": 1.580704975421584e-07,
      "loss": 0.2399,
      "step": 2908
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4213038071881754,
      "learning_rate": 1.5757562455460805e-07,
      "loss": 0.244,
      "step": 2909
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5169986268703934,
      "learning_rate": 1.5708146117234223e-07,
      "loss": 0.2921,
      "step": 2910
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4131508151166474,
      "learning_rate": 1.5658800781161363e-07,
      "loss": 0.2583,
      "step": 2911
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3094984954712166,
      "learning_rate": 1.5609526488807611e-07,
      "loss": 0.2428,
      "step": 2912
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.297686613691211,
      "learning_rate": 1.5560323281678512e-07,
      "loss": 0.2762,
      "step": 2913
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.401738290881385,
      "learning_rate": 1.5511191201219732e-07,
      "loss": 0.2604,
      "step": 2914
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2765127696756253,
      "learning_rate": 1.5462130288817088e-07,
      "loss": 0.274,
      "step": 2915
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2840651600147557,
      "learning_rate": 1.5413140585796426e-07,
      "loss": 0.2799,
      "step": 2916
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3612164284457617,
      "learning_rate": 1.536422213342352e-07,
      "loss": 0.2449,
      "step": 2917
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2899490356795273,
      "learning_rate": 1.5315374972904238e-07,
      "loss": 0.2416,
      "step": 2918
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4698956489551285,
      "learning_rate": 1.5266599145384318e-07,
      "loss": 0.2752,
      "step": 2919
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.240094198960351,
      "learning_rate": 1.5217894691949518e-07,
      "loss": 0.242,
      "step": 2920
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3749916729226386,
      "learning_rate": 1.5169261653625343e-07,
      "loss": 0.2798,
      "step": 2921
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7876388271161603,
      "learning_rate": 1.5120700071377212e-07,
      "loss": 0.2577,
      "step": 2922
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.781441804581668,
      "learning_rate": 1.5072209986110373e-07,
      "loss": 0.272,
      "step": 2923
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4347166397268785,
      "learning_rate": 1.5023791438669797e-07,
      "loss": 0.2711,
      "step": 2924
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.458491054092882,
      "learning_rate": 1.4975444469840238e-07,
      "loss": 0.3303,
      "step": 2925
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2599999748751727,
      "learning_rate": 1.492716912034614e-07,
      "loss": 0.223,
      "step": 2926
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3778895116764045,
      "learning_rate": 1.487896543085161e-07,
      "loss": 0.2316,
      "step": 2927
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2784230957221787,
      "learning_rate": 1.48308334419604e-07,
      "loss": 0.278,
      "step": 2928
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3700733375661387,
      "learning_rate": 1.4782773194215882e-07,
      "loss": 0.2806,
      "step": 2929
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2901041783321707,
      "learning_rate": 1.473478472810097e-07,
      "loss": 0.263,
      "step": 2930
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.409600698001674,
      "learning_rate": 1.468686808403814e-07,
      "loss": 0.2804,
      "step": 2931
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5969716621663568,
      "learning_rate": 1.4639023302389364e-07,
      "loss": 0.2831,
      "step": 2932
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.394297205291279,
      "learning_rate": 1.4591250423456046e-07,
      "loss": 0.2396,
      "step": 2933
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.448689657248942,
      "learning_rate": 1.454354948747909e-07,
      "loss": 0.2427,
      "step": 2934
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.438782418407605,
      "learning_rate": 1.449592053463874e-07,
      "loss": 0.2961,
      "step": 2935
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.354138825480889,
      "learning_rate": 1.4448363605054636e-07,
      "loss": 0.2625,
      "step": 2936
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2592559986699303,
      "learning_rate": 1.440087873878574e-07,
      "loss": 0.2509,
      "step": 2937
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.330818222709863,
      "learning_rate": 1.4353465975830336e-07,
      "loss": 0.2567,
      "step": 2938
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4420070625144805,
      "learning_rate": 1.4306125356125896e-07,
      "loss": 0.2838,
      "step": 2939
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2464288961476915,
      "learning_rate": 1.4258856919549232e-07,
      "loss": 0.2555,
      "step": 2940
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3803013873706544,
      "learning_rate": 1.4211660705916285e-07,
      "loss": 0.2361,
      "step": 2941
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.410504833130406,
      "learning_rate": 1.4164536754982203e-07,
      "loss": 0.2519,
      "step": 2942
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7036576648926016,
      "learning_rate": 1.4117485106441186e-07,
      "loss": 0.2542,
      "step": 2943
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5126251019306363,
      "learning_rate": 1.407050579992658e-07,
      "loss": 0.2909,
      "step": 2944
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.246164260692375,
      "learning_rate": 1.4023598875010844e-07,
      "loss": 0.264,
      "step": 2945
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.268685222902053,
      "learning_rate": 1.3976764371205418e-07,
      "loss": 0.2696,
      "step": 2946
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.504224124261662,
      "learning_rate": 1.39300023279607e-07,
      "loss": 0.2633,
      "step": 2947
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5660103800190637,
      "learning_rate": 1.388331278466609e-07,
      "loss": 0.2827,
      "step": 2948
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4725541982526758,
      "learning_rate": 1.3836695780649976e-07,
      "loss": 0.3022,
      "step": 2949
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.446324380195441,
      "learning_rate": 1.379015135517958e-07,
      "loss": 0.2907,
      "step": 2950
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2514612224451906,
      "learning_rate": 1.374367954746094e-07,
      "loss": 0.2706,
      "step": 2951
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.415819572419356,
      "learning_rate": 1.3697280396639034e-07,
      "loss": 0.2523,
      "step": 2952
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1459371173307846,
      "learning_rate": 1.365095394179754e-07,
      "loss": 0.2396,
      "step": 2953
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.323396101986738,
      "learning_rate": 1.360470022195902e-07,
      "loss": 0.2739,
      "step": 2954
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3080472437525437,
      "learning_rate": 1.3558519276084635e-07,
      "loss": 0.2503,
      "step": 2955
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.473968673368382,
      "learning_rate": 1.3512411143074332e-07,
      "loss": 0.296,
      "step": 2956
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3430591542502466,
      "learning_rate": 1.3466375861766698e-07,
      "loss": 0.2451,
      "step": 2957
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.393077540550777,
      "learning_rate": 1.3420413470938942e-07,
      "loss": 0.2659,
      "step": 2958
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6086430834408345,
      "learning_rate": 1.3374524009306942e-07,
      "loss": 0.2641,
      "step": 2959
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.621421328007362,
      "learning_rate": 1.332870751552503e-07,
      "loss": 0.2779,
      "step": 2960
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.840293461456054,
      "learning_rate": 1.3282964028186172e-07,
      "loss": 0.3053,
      "step": 2961
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.470822092275965,
      "learning_rate": 1.3237293585821785e-07,
      "loss": 0.2622,
      "step": 2962
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.252474082136468,
      "learning_rate": 1.3191696226901795e-07,
      "loss": 0.2718,
      "step": 2963
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3074512131763516,
      "learning_rate": 1.314617198983454e-07,
      "loss": 0.243,
      "step": 2964
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.512138084713408,
      "learning_rate": 1.3100720912966766e-07,
      "loss": 0.272,
      "step": 2965
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3739192198771737,
      "learning_rate": 1.305534303458361e-07,
      "loss": 0.2755,
      "step": 2966
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2893167627464166,
      "learning_rate": 1.301003839290853e-07,
      "loss": 0.2408,
      "step": 2967
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.270167707949592,
      "learning_rate": 1.296480702610332e-07,
      "loss": 0.2501,
      "step": 2968
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3914772364832166,
      "learning_rate": 1.2919648972268027e-07,
      "loss": 0.2323,
      "step": 2969
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.472406533385478,
      "learning_rate": 1.2874564269440958e-07,
      "loss": 0.3096,
      "step": 2970
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5115059991420203,
      "learning_rate": 1.2829552955598622e-07,
      "loss": 0.3056,
      "step": 2971
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1147586969077694,
      "learning_rate": 1.2784615068655745e-07,
      "loss": 0.2611,
      "step": 2972
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3814559744375745,
      "learning_rate": 1.273975064646512e-07,
      "loss": 0.2569,
      "step": 2973
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.525852265833298,
      "learning_rate": 1.2694959726817767e-07,
      "loss": 0.2583,
      "step": 2974
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4224050505105277,
      "learning_rate": 1.2650242347442707e-07,
      "loss": 0.2269,
      "step": 2975
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3454068114753697,
      "learning_rate": 1.260559854600709e-07,
      "loss": 0.2746,
      "step": 2976
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.279877000511415,
      "learning_rate": 1.2561028360116e-07,
      "loss": 0.255,
      "step": 2977
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.416841526419076,
      "learning_rate": 1.251653182731254e-07,
      "loss": 0.2998,
      "step": 2978
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4618454435380572,
      "learning_rate": 1.2472108985077834e-07,
      "loss": 0.2692,
      "step": 2979
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5288543032906126,
      "learning_rate": 1.242775987083088e-07,
      "loss": 0.2515,
      "step": 2980
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.307446477470878,
      "learning_rate": 1.23834845219286e-07,
      "loss": 0.2711,
      "step": 2981
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3118979472768664,
      "learning_rate": 1.233928297566571e-07,
      "loss": 0.2632,
      "step": 2982
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0975762766164716,
      "learning_rate": 1.2295155269274827e-07,
      "loss": 0.2617,
      "step": 2983
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.252171146783034,
      "learning_rate": 1.225110143992638e-07,
      "loss": 0.296,
      "step": 2984
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.699367516738382,
      "learning_rate": 1.220712152472856e-07,
      "loss": 0.3007,
      "step": 2985
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3609242051199897,
      "learning_rate": 1.2163215560727214e-07,
      "loss": 0.2853,
      "step": 2986
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.239716626762453,
      "learning_rate": 1.2119383584905985e-07,
      "loss": 0.2527,
      "step": 2987
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.281800284296882,
      "learning_rate": 1.2075625634186205e-07,
      "loss": 0.2509,
      "step": 2988
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.416589421795546,
      "learning_rate": 1.203194174542682e-07,
      "loss": 0.2919,
      "step": 2989
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6405939356926527,
      "learning_rate": 1.1988331955424347e-07,
      "loss": 0.2755,
      "step": 2990
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.045979386286837,
      "learning_rate": 1.194479630091294e-07,
      "loss": 0.2376,
      "step": 2991
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2126916596240935,
      "learning_rate": 1.190133481856429e-07,
      "loss": 0.2668,
      "step": 2992
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2755646904017857,
      "learning_rate": 1.1857947544987668e-07,
      "loss": 0.2777,
      "step": 2993
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.431657462454105,
      "learning_rate": 1.1814634516729726e-07,
      "loss": 0.303,
      "step": 2994
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.38579299772321,
      "learning_rate": 1.177139577027465e-07,
      "loss": 0.2728,
      "step": 2995
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1828610624233415,
      "learning_rate": 1.1728231342044049e-07,
      "loss": 0.2322,
      "step": 2996
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.286268924458742,
      "learning_rate": 1.1685141268396902e-07,
      "loss": 0.2507,
      "step": 2997
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.34834559713555,
      "learning_rate": 1.1642125585629592e-07,
      "loss": 0.2757,
      "step": 2998
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.446898983893535,
      "learning_rate": 1.1599184329975809e-07,
      "loss": 0.2751,
      "step": 2999
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3969050527751103,
      "learning_rate": 1.1556317537606586e-07,
      "loss": 0.2653,
      "step": 3000
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2928562405437947,
      "learning_rate": 1.1513525244630196e-07,
      "loss": 0.2647,
      "step": 3001
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.343027458930917,
      "learning_rate": 1.1470807487092171e-07,
      "loss": 0.2419,
      "step": 3002
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4596000456064733,
      "learning_rate": 1.1428164300975274e-07,
      "loss": 0.2695,
      "step": 3003
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1712507233695026,
      "learning_rate": 1.1385595722199437e-07,
      "loss": 0.2453,
      "step": 3004
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1791223390148597,
      "learning_rate": 1.1343101786621745e-07,
      "loss": 0.2565,
      "step": 3005
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2200097978674984,
      "learning_rate": 1.1300682530036432e-07,
      "loss": 0.2485,
      "step": 3006
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3623281275120664,
      "learning_rate": 1.1258337988174793e-07,
      "loss": 0.2473,
      "step": 3007
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.315292263531697,
      "learning_rate": 1.1216068196705208e-07,
      "loss": 0.2621,
      "step": 3008
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3133663961978814,
      "learning_rate": 1.1173873191233096e-07,
      "loss": 0.2478,
      "step": 3009
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.430708800052876,
      "learning_rate": 1.1131753007300881e-07,
      "loss": 0.2708,
      "step": 3010
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5155824409494305,
      "learning_rate": 1.1089707680387961e-07,
      "loss": 0.2741,
      "step": 3011
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.9174229312451216,
      "learning_rate": 1.1047737245910615e-07,
      "loss": 0.275,
      "step": 3012
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.369638242381517,
      "learning_rate": 1.1005841739222166e-07,
      "loss": 0.2588,
      "step": 3013
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2897156128120524,
      "learning_rate": 1.0964021195612728e-07,
      "loss": 0.2717,
      "step": 3014
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3830666042149384,
      "learning_rate": 1.0922275650309321e-07,
      "loss": 0.2664,
      "step": 3015
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3412654270408115,
      "learning_rate": 1.0880605138475707e-07,
      "loss": 0.2655,
      "step": 3016
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.382093627470339,
      "learning_rate": 1.083900969521252e-07,
      "loss": 0.2944,
      "step": 3017
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.492195238746543,
      "learning_rate": 1.0797489355557188e-07,
      "loss": 0.2826,
      "step": 3018
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.328835974383053,
      "learning_rate": 1.0756044154483812e-07,
      "loss": 0.2666,
      "step": 3019
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2091174854967988,
      "learning_rate": 1.07146741269032e-07,
      "loss": 0.2796,
      "step": 3020
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2501063439867477,
      "learning_rate": 1.0673379307662855e-07,
      "loss": 0.2591,
      "step": 3021
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3549354214289524,
      "learning_rate": 1.0632159731546964e-07,
      "loss": 0.2486,
      "step": 3022
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2242715824156876,
      "learning_rate": 1.0591015433276306e-07,
      "loss": 0.2486,
      "step": 3023
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3033630244095527,
      "learning_rate": 1.054994644750824e-07,
      "loss": 0.2632,
      "step": 3024
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.343530716837555,
      "learning_rate": 1.050895280883668e-07,
      "loss": 0.258,
      "step": 3025
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4582728084197667,
      "learning_rate": 1.0468034551792083e-07,
      "loss": 0.2733,
      "step": 3026
    },
    {
      "epoch": 0.86,
      "grad_norm": 5.114240863591207,
      "learning_rate": 1.0427191710841443e-07,
      "loss": 0.2787,
      "step": 3027
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8725573321328333,
      "learning_rate": 1.0386424320388209e-07,
      "loss": 0.2729,
      "step": 3028
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2117518738730926,
      "learning_rate": 1.0345732414772224e-07,
      "loss": 0.2437,
      "step": 3029
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4384125875736773,
      "learning_rate": 1.0305116028269812e-07,
      "loss": 0.2661,
      "step": 3030
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.38059904238801,
      "learning_rate": 1.0264575195093628e-07,
      "loss": 0.2384,
      "step": 3031
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.288709522342751,
      "learning_rate": 1.022410994939279e-07,
      "loss": 0.2578,
      "step": 3032
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.309509735794672,
      "learning_rate": 1.0183720325252609e-07,
      "loss": 0.2495,
      "step": 3033
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3556405665317097,
      "learning_rate": 1.0143406356694795e-07,
      "loss": 0.2415,
      "step": 3034
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3462410688998205,
      "learning_rate": 1.0103168077677283e-07,
      "loss": 0.2824,
      "step": 3035
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1535339888759526,
      "learning_rate": 1.006300552209427e-07,
      "loss": 0.2374,
      "step": 3036
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3690379200829725,
      "learning_rate": 1.0022918723776175e-07,
      "loss": 0.2625,
      "step": 3037
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3032429338900373,
      "learning_rate": 9.982907716489586e-08,
      "loss": 0.272,
      "step": 3038
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.608970378774197,
      "learning_rate": 9.942972533937266e-08,
      "loss": 0.3033,
      "step": 3039
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3119793826541364,
      "learning_rate": 9.903113209758096e-08,
      "loss": 0.2726,
      "step": 3040
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.381000490542755,
      "learning_rate": 9.863329777527052e-08,
      "loss": 0.2681,
      "step": 3041
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4542599325019796,
      "learning_rate": 9.823622270755205e-08,
      "loss": 0.2564,
      "step": 3042
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4368086277548415,
      "learning_rate": 9.783990722889657e-08,
      "loss": 0.2614,
      "step": 3043
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.644344757700451,
      "learning_rate": 9.744435167313536e-08,
      "loss": 0.2969,
      "step": 3044
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.506641792481987,
      "learning_rate": 9.704955637345946e-08,
      "loss": 0.2606,
      "step": 3045
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.428419743065912,
      "learning_rate": 9.665552166241964e-08,
      "loss": 0.2581,
      "step": 3046
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.308346279825781,
      "learning_rate": 9.626224787192594e-08,
      "loss": 0.2351,
      "step": 3047
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4120125079732713,
      "learning_rate": 9.586973533324738e-08,
      "loss": 0.274,
      "step": 3048
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1444026166759813,
      "learning_rate": 9.547798437701193e-08,
      "loss": 0.2458,
      "step": 3049
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3280997441596494,
      "learning_rate": 9.508699533320597e-08,
      "loss": 0.2785,
      "step": 3050
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.468914423283346,
      "learning_rate": 9.46967685311737e-08,
      "loss": 0.3056,
      "step": 3051
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.251997965826707,
      "learning_rate": 9.430730429961808e-08,
      "loss": 0.2508,
      "step": 3052
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3898049144957327,
      "learning_rate": 9.391860296659915e-08,
      "loss": 0.2694,
      "step": 3053
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3671284513441533,
      "learning_rate": 9.353066485953454e-08,
      "loss": 0.2544,
      "step": 3054
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4648509691775216,
      "learning_rate": 9.314349030519842e-08,
      "loss": 0.2771,
      "step": 3055
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.694199196787725,
      "learning_rate": 9.275707962972279e-08,
      "loss": 0.289,
      "step": 3056
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2566611611970706,
      "learning_rate": 9.237143315859552e-08,
      "loss": 0.2622,
      "step": 3057
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7771475571941817,
      "learning_rate": 9.19865512166611e-08,
      "loss": 0.2728,
      "step": 3058
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.395294527739055,
      "learning_rate": 9.160243412811952e-08,
      "loss": 0.2783,
      "step": 3059
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.342150068379509,
      "learning_rate": 9.121908221652674e-08,
      "loss": 0.2592,
      "step": 3060
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.3201108482311854,
      "learning_rate": 9.083649580479491e-08,
      "loss": 0.2906,
      "step": 3061
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.38692281040695,
      "learning_rate": 9.045467521519045e-08,
      "loss": 0.2888,
      "step": 3062
    },
    {
      "epoch": 0.87,
      "grad_norm": 5.2488795924967935,
      "learning_rate": 9.00736207693349e-08,
      "loss": 0.2834,
      "step": 3063
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5617539242024745,
      "learning_rate": 8.969333278820445e-08,
      "loss": 0.281,
      "step": 3064
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2762094504720207,
      "learning_rate": 8.931381159212981e-08,
      "loss": 0.272,
      "step": 3065
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.397954895380229,
      "learning_rate": 8.893505750079622e-08,
      "loss": 0.2529,
      "step": 3066
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.451818318013084,
      "learning_rate": 8.855707083324181e-08,
      "loss": 0.2849,
      "step": 3067
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.330369584868695,
      "learning_rate": 8.817985190785882e-08,
      "loss": 0.2503,
      "step": 3068
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.347047792958819,
      "learning_rate": 8.780340104239282e-08,
      "loss": 0.2951,
      "step": 3069
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4605820045070477,
      "learning_rate": 8.742771855394204e-08,
      "loss": 0.2863,
      "step": 3070
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.346271559452563,
      "learning_rate": 8.705280475895848e-08,
      "loss": 0.2666,
      "step": 3071
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3650878311332355,
      "learning_rate": 8.66786599732453e-08,
      "loss": 0.2865,
      "step": 3072
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4735231589506377,
      "learning_rate": 8.630528451195873e-08,
      "loss": 0.2652,
      "step": 3073
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1078553801134716,
      "learning_rate": 8.593267868960674e-08,
      "loss": 0.2686,
      "step": 3074
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.367953894551575,
      "learning_rate": 8.556084282004905e-08,
      "loss": 0.2442,
      "step": 3075
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6606227224227568,
      "learning_rate": 8.518977721649679e-08,
      "loss": 0.2446,
      "step": 3076
    },
    {
      "epoch": 0.87,
      "grad_norm": 6.555949455627507,
      "learning_rate": 8.481948219151225e-08,
      "loss": 0.3033,
      "step": 3077
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.900102774963622,
      "learning_rate": 8.444995805700872e-08,
      "loss": 0.2805,
      "step": 3078
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2788626417574056,
      "learning_rate": 8.408120512424999e-08,
      "loss": 0.2559,
      "step": 3079
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.240323058843195,
      "learning_rate": 8.371322370385048e-08,
      "loss": 0.2779,
      "step": 3080
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3898302231497137,
      "learning_rate": 8.334601410577436e-08,
      "loss": 0.2853,
      "step": 3081
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3482574141862327,
      "learning_rate": 8.297957663933608e-08,
      "loss": 0.2752,
      "step": 3082
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.319169634223261,
      "learning_rate": 8.261391161319941e-08,
      "loss": 0.24,
      "step": 3083
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3155577880334555,
      "learning_rate": 8.224901933537776e-08,
      "loss": 0.2726,
      "step": 3084
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2189740851909225,
      "learning_rate": 8.18849001132329e-08,
      "loss": 0.2405,
      "step": 3085
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4746209392963223,
      "learning_rate": 8.15215542534765e-08,
      "loss": 0.268,
      "step": 3086
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.180319378355328,
      "learning_rate": 8.115898206216798e-08,
      "loss": 0.2591,
      "step": 3087
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.343287819286448,
      "learning_rate": 8.079718384471557e-08,
      "loss": 0.2634,
      "step": 3088
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.357102540519274,
      "learning_rate": 8.043615990587494e-08,
      "loss": 0.2686,
      "step": 3089
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3110864751314604,
      "learning_rate": 8.007591054975016e-08,
      "loss": 0.2883,
      "step": 3090
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.5614262632707647,
      "learning_rate": 7.971643607979273e-08,
      "loss": 0.2842,
      "step": 3091
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.335392757844915,
      "learning_rate": 7.93577367988012e-08,
      "loss": 0.2757,
      "step": 3092
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3085814088282475,
      "learning_rate": 7.899981300892144e-08,
      "loss": 0.258,
      "step": 3093
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1934363925819382,
      "learning_rate": 7.86426650116454e-08,
      "loss": 0.2362,
      "step": 3094
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1467726540158205,
      "learning_rate": 7.828629310781265e-08,
      "loss": 0.2312,
      "step": 3095
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.36570585706135,
      "learning_rate": 7.793069759760829e-08,
      "loss": 0.2702,
      "step": 3096
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3436174242214376,
      "learning_rate": 7.75758787805637e-08,
      "loss": 0.2409,
      "step": 3097
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.430656309119998,
      "learning_rate": 7.722183695555562e-08,
      "loss": 0.2931,
      "step": 3098
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4954309256933866,
      "learning_rate": 7.686857242080669e-08,
      "loss": 0.2587,
      "step": 3099
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2823343280136217,
      "learning_rate": 7.651608547388489e-08,
      "loss": 0.2446,
      "step": 3100
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4514656994841872,
      "learning_rate": 7.616437641170315e-08,
      "loss": 0.2562,
      "step": 3101
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3284698907360495,
      "learning_rate": 7.581344553051871e-08,
      "loss": 0.2606,
      "step": 3102
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6800251316823456,
      "learning_rate": 7.54632931259338e-08,
      "loss": 0.2645,
      "step": 3103
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5123704990491103,
      "learning_rate": 7.51139194928947e-08,
      "loss": 0.2784,
      "step": 3104
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3995486614027737,
      "learning_rate": 7.47653249256922e-08,
      "loss": 0.2573,
      "step": 3105
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2445829265036874,
      "learning_rate": 7.44175097179599e-08,
      "loss": 0.2652,
      "step": 3106
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.516997912739564,
      "learning_rate": 7.407047416267564e-08,
      "loss": 0.2722,
      "step": 3107
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.197115715153637,
      "learning_rate": 7.372421855216037e-08,
      "loss": 0.251,
      "step": 3108
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3742140188304464,
      "learning_rate": 7.337874317807802e-08,
      "loss": 0.2825,
      "step": 3109
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1502066887265965,
      "learning_rate": 7.303404833143522e-08,
      "loss": 0.242,
      "step": 3110
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.580142884634076,
      "learning_rate": 7.269013430258131e-08,
      "loss": 0.3137,
      "step": 3111
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3035826394343055,
      "learning_rate": 7.234700138120776e-08,
      "loss": 0.2367,
      "step": 3112
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3843530141893963,
      "learning_rate": 7.200464985634824e-08,
      "loss": 0.2524,
      "step": 3113
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3209743886788656,
      "learning_rate": 7.166308001637811e-08,
      "loss": 0.2899,
      "step": 3114
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3303005873390066,
      "learning_rate": 7.13222921490142e-08,
      "loss": 0.2637,
      "step": 3115
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.426996076177478,
      "learning_rate": 7.098228654131488e-08,
      "loss": 0.2905,
      "step": 3116
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2879093080690045,
      "learning_rate": 7.064306347967952e-08,
      "loss": 0.292,
      "step": 3117
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.337757585829966,
      "learning_rate": 7.03046232498482e-08,
      "loss": 0.248,
      "step": 3118
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.603799211351658,
      "learning_rate": 6.996696613690156e-08,
      "loss": 0.2754,
      "step": 3119
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.432035830910913,
      "learning_rate": 6.963009242526096e-08,
      "loss": 0.2708,
      "step": 3120
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4988923447017273,
      "learning_rate": 6.929400239868743e-08,
      "loss": 0.2578,
      "step": 3121
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1903315111152,
      "learning_rate": 6.895869634028217e-08,
      "loss": 0.2433,
      "step": 3122
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.182167491069264,
      "learning_rate": 6.862417453248593e-08,
      "loss": 0.224,
      "step": 3123
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.23234379480758,
      "learning_rate": 6.82904372570785e-08,
      "loss": 0.2661,
      "step": 3124
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1799081969004988,
      "learning_rate": 6.79574847951796e-08,
      "loss": 0.2723,
      "step": 3125
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6825595419677004,
      "learning_rate": 6.76253174272472e-08,
      "loss": 0.2784,
      "step": 3126
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5886007659552006,
      "learning_rate": 6.729393543307837e-08,
      "loss": 0.3001,
      "step": 3127
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4011926903124863,
      "learning_rate": 6.696333909180796e-08,
      "loss": 0.254,
      "step": 3128
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.379893316621335,
      "learning_rate": 6.663352868191008e-08,
      "loss": 0.2617,
      "step": 3129
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6057157508985167,
      "learning_rate": 6.630450448119617e-08,
      "loss": 0.2767,
      "step": 3130
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3009825684020466,
      "learning_rate": 6.597626676681545e-08,
      "loss": 0.2459,
      "step": 3131
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3740096091255003,
      "learning_rate": 6.564881581525449e-08,
      "loss": 0.2746,
      "step": 3132
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.665005965150247,
      "learning_rate": 6.532215190233747e-08,
      "loss": 0.3207,
      "step": 3133
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.319434599678059,
      "learning_rate": 6.499627530322582e-08,
      "loss": 0.2632,
      "step": 3134
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.684205728247248,
      "learning_rate": 6.467118629241718e-08,
      "loss": 0.2644,
      "step": 3135
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.44892949559697,
      "learning_rate": 6.434688514374632e-08,
      "loss": 0.262,
      "step": 3136
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3548233221374733,
      "learning_rate": 6.402337213038378e-08,
      "loss": 0.2764,
      "step": 3137
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.480068951853458,
      "learning_rate": 6.370064752483661e-08,
      "loss": 0.2784,
      "step": 3138
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4592994564515664,
      "learning_rate": 6.337871159894803e-08,
      "loss": 0.2834,
      "step": 3139
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2792956091900307,
      "learning_rate": 6.305756462389644e-08,
      "loss": 0.2547,
      "step": 3140
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4306357067447553,
      "learning_rate": 6.273720687019579e-08,
      "loss": 0.2767,
      "step": 3141
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.379110567627786,
      "learning_rate": 6.241763860769534e-08,
      "loss": 0.2725,
      "step": 3142
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2710952274288494,
      "learning_rate": 6.209886010557907e-08,
      "loss": 0.2485,
      "step": 3143
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5652561077781746,
      "learning_rate": 6.178087163236645e-08,
      "loss": 0.252,
      "step": 3144
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2341453207739805,
      "learning_rate": 6.146367345591053e-08,
      "loss": 0.253,
      "step": 3145
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3158832187436107,
      "learning_rate": 6.114726584339913e-08,
      "loss": 0.2655,
      "step": 3146
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4445635266013626,
      "learning_rate": 6.08316490613543e-08,
      "loss": 0.2669,
      "step": 3147
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.304129154768925,
      "learning_rate": 6.051682337563158e-08,
      "loss": 0.2705,
      "step": 3148
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5757049619729018,
      "learning_rate": 6.02027890514204e-08,
      "loss": 0.2662,
      "step": 3149
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.565737017154383,
      "learning_rate": 5.988954635324351e-08,
      "loss": 0.3128,
      "step": 3150
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3260226124352856,
      "learning_rate": 5.957709554495682e-08,
      "loss": 0.2633,
      "step": 3151
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3858294402669435,
      "learning_rate": 5.926543688974928e-08,
      "loss": 0.264,
      "step": 3152
    },
    {
      "epoch": 0.89,
      "grad_norm": 5.961869575469567,
      "learning_rate": 5.8954570650142424e-08,
      "loss": 0.2629,
      "step": 3153
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.474005827500162,
      "learning_rate": 5.864449708799057e-08,
      "loss": 0.2641,
      "step": 3154
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4705398326799717,
      "learning_rate": 5.833521646448003e-08,
      "loss": 0.2926,
      "step": 3155
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.319310223557441,
      "learning_rate": 5.8026729040129506e-08,
      "loss": 0.2458,
      "step": 3156
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.411383748029571,
      "learning_rate": 5.771903507478915e-08,
      "loss": 0.3025,
      "step": 3157
    },
    {
      "epoch": 0.89,
      "grad_norm": 6.037524189709264,
      "learning_rate": 5.741213482764118e-08,
      "loss": 0.2661,
      "step": 3158
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.534264378285265,
      "learning_rate": 5.7106028557199036e-08,
      "loss": 0.2815,
      "step": 3159
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4799284033923366,
      "learning_rate": 5.6800716521307356e-08,
      "loss": 0.2857,
      "step": 3160
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4307602896415217,
      "learning_rate": 5.649619897714186e-08,
      "loss": 0.2595,
      "step": 3161
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.309911730440573,
      "learning_rate": 5.61924761812087e-08,
      "loss": 0.2572,
      "step": 3162
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2708443975298596,
      "learning_rate": 5.588954838934523e-08,
      "loss": 0.2263,
      "step": 3163
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3928996224840224,
      "learning_rate": 5.558741585671845e-08,
      "loss": 0.262,
      "step": 3164
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.400883941359423,
      "learning_rate": 5.528607883782599e-08,
      "loss": 0.2699,
      "step": 3165
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3632036616640164,
      "learning_rate": 5.4985537586495157e-08,
      "loss": 0.2488,
      "step": 3166
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4002704113750326,
      "learning_rate": 5.4685792355882664e-08,
      "loss": 0.2626,
      "step": 3167
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4063425935418663,
      "learning_rate": 5.438684339847555e-08,
      "loss": 0.2591,
      "step": 3168
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.430614312790665,
      "learning_rate": 5.4088690966089254e-08,
      "loss": 0.2611,
      "step": 3169
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.419754243863106,
      "learning_rate": 5.379133530986901e-08,
      "loss": 0.2727,
      "step": 3170
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4662220396431747,
      "learning_rate": 5.349477668028801e-08,
      "loss": 0.283,
      "step": 3171
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3426309066637545,
      "learning_rate": 5.319901532714877e-08,
      "loss": 0.2701,
      "step": 3172
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4592232084554415,
      "learning_rate": 5.2904051499582105e-08,
      "loss": 0.2341,
      "step": 3173
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.46097236692037,
      "learning_rate": 5.2609885446047165e-08,
      "loss": 0.2851,
      "step": 3174
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.320945269240666,
      "learning_rate": 5.231651741433063e-08,
      "loss": 0.2515,
      "step": 3175
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6406931382496315,
      "learning_rate": 5.2023947651547275e-08,
      "loss": 0.2882,
      "step": 3176
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3858195376057347,
      "learning_rate": 5.17321764041394e-08,
      "loss": 0.249,
      "step": 3177
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2130979645960913,
      "learning_rate": 5.144120391787732e-08,
      "loss": 0.2428,
      "step": 3178
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4254984080961957,
      "learning_rate": 5.115103043785718e-08,
      "loss": 0.2485,
      "step": 3179
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3045501628031704,
      "learning_rate": 5.086165620850336e-08,
      "loss": 0.2768,
      "step": 3180
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2144689483497935,
      "learning_rate": 5.0573081473566315e-08,
      "loss": 0.2846,
      "step": 3181
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3229900518667357,
      "learning_rate": 5.028530647612306e-08,
      "loss": 0.2937,
      "step": 3182
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.480173992557888,
      "learning_rate": 4.999833145857768e-08,
      "loss": 0.2651,
      "step": 3183
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2829017879265145,
      "learning_rate": 4.971215666265938e-08,
      "loss": 0.2393,
      "step": 3184
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.698922135891398,
      "learning_rate": 4.942678232942399e-08,
      "loss": 0.2879,
      "step": 3185
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.409469865808042,
      "learning_rate": 4.9142208699252893e-08,
      "loss": 0.2864,
      "step": 3186
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.405615517945202,
      "learning_rate": 4.885843601185291e-08,
      "loss": 0.2805,
      "step": 3187
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.347723176787636,
      "learning_rate": 4.857546450625649e-08,
      "loss": 0.2615,
      "step": 3188
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2570397287056916,
      "learning_rate": 4.8293294420820754e-08,
      "loss": 0.255,
      "step": 3189
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.386877051850636,
      "learning_rate": 4.801192599322834e-08,
      "loss": 0.2543,
      "step": 3190
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4355257643620947,
      "learning_rate": 4.773135946048601e-08,
      "loss": 0.2548,
      "step": 3191
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.383789499942686,
      "learning_rate": 4.7451595058925594e-08,
      "loss": 0.2642,
      "step": 3192
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.22476355531068,
      "learning_rate": 4.717263302420282e-08,
      "loss": 0.2319,
      "step": 3193
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3890981659570376,
      "learning_rate": 4.689447359129794e-08,
      "loss": 0.2672,
      "step": 3194
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4735293733403685,
      "learning_rate": 4.661711699451476e-08,
      "loss": 0.2716,
      "step": 3195
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.225349534764869,
      "learning_rate": 4.6340563467481164e-08,
      "loss": 0.2744,
      "step": 3196
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.450129758872958,
      "learning_rate": 4.606481324314848e-08,
      "loss": 0.2513,
      "step": 3197
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2568166974492883,
      "learning_rate": 4.5789866553791245e-08,
      "loss": 0.2402,
      "step": 3198
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.633689105888111,
      "learning_rate": 4.551572363100731e-08,
      "loss": 0.2336,
      "step": 3199
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3558166985162363,
      "learning_rate": 4.52423847057174e-08,
      "loss": 0.2382,
      "step": 3200
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4980777479857506,
      "learning_rate": 4.496985000816489e-08,
      "loss": 0.3011,
      "step": 3201
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4025661382784196,
      "learning_rate": 4.469811976791604e-08,
      "loss": 0.3135,
      "step": 3202
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.288906258196443,
      "learning_rate": 4.442719421385921e-08,
      "loss": 0.2783,
      "step": 3203
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3401562728012664,
      "learning_rate": 4.415707357420517e-08,
      "loss": 0.2381,
      "step": 3204
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2560055476064536,
      "learning_rate": 4.388775807648659e-08,
      "loss": 0.2676,
      "step": 3205
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.307661722100612,
      "learning_rate": 4.3619247947557445e-08,
      "loss": 0.2451,
      "step": 3206
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.748113485649648,
      "learning_rate": 4.3351543413594263e-08,
      "loss": 0.2583,
      "step": 3207
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2513931431439382,
      "learning_rate": 4.308464470009432e-08,
      "loss": 0.2774,
      "step": 3208
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.502034598201456,
      "learning_rate": 4.2818552031876454e-08,
      "loss": 0.2926,
      "step": 3209
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.291150374878783,
      "learning_rate": 4.2553265633080146e-08,
      "loss": 0.2662,
      "step": 3210
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5215967746639354,
      "learning_rate": 4.228878572716588e-08,
      "loss": 0.266,
      "step": 3211
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.349662419335486,
      "learning_rate": 4.202511253691521e-08,
      "loss": 0.2708,
      "step": 3212
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3748199771039564,
      "learning_rate": 4.176224628442981e-08,
      "loss": 0.2805,
      "step": 3213
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3378089840973058,
      "learning_rate": 4.150018719113147e-08,
      "loss": 0.2765,
      "step": 3214
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4869406885817775,
      "learning_rate": 4.123893547776236e-08,
      "loss": 0.2708,
      "step": 3215
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3636653162321783,
      "learning_rate": 4.097849136438436e-08,
      "loss": 0.2616,
      "step": 3216
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.322882191550021,
      "learning_rate": 4.071885507037953e-08,
      "loss": 0.2479,
      "step": 3217
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6038193510966265,
      "learning_rate": 4.0460026814448934e-08,
      "loss": 0.3161,
      "step": 3218
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.9175710529853762,
      "learning_rate": 4.0202006814613165e-08,
      "loss": 0.3271,
      "step": 3219
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2335773040988847,
      "learning_rate": 3.994479528821204e-08,
      "loss": 0.2589,
      "step": 3220
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.29518040874271,
      "learning_rate": 3.9688392451904475e-08,
      "loss": 0.2662,
      "step": 3221
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4345206626996085,
      "learning_rate": 3.943279852166803e-08,
      "loss": 0.2724,
      "step": 3222
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.619129736310827,
      "learning_rate": 3.917801371279894e-08,
      "loss": 0.2775,
      "step": 3223
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2320266622210374,
      "learning_rate": 3.8924038239911975e-08,
      "loss": 0.2542,
      "step": 3224
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4111141473121123,
      "learning_rate": 3.8670872316939885e-08,
      "loss": 0.2827,
      "step": 3225
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.453538199319764,
      "learning_rate": 3.841851615713398e-08,
      "loss": 0.3011,
      "step": 3226
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3560675605285444,
      "learning_rate": 3.816696997306301e-08,
      "loss": 0.2999,
      "step": 3227
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.500501438703021,
      "learning_rate": 3.79162339766137e-08,
      "loss": 0.2847,
      "step": 3228
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4567324123602363,
      "learning_rate": 3.766630837899032e-08,
      "loss": 0.2924,
      "step": 3229
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.369206056840079,
      "learning_rate": 3.7417193390714476e-08,
      "loss": 0.2833,
      "step": 3230
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3537524798078473,
      "learning_rate": 3.716888922162487e-08,
      "loss": 0.2884,
      "step": 3231
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2654121630396,
      "learning_rate": 3.692139608087741e-08,
      "loss": 0.2761,
      "step": 3232
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.367419390871166,
      "learning_rate": 3.667471417694468e-08,
      "loss": 0.2457,
      "step": 3233
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2877453429907635,
      "learning_rate": 3.642884371761601e-08,
      "loss": 0.254,
      "step": 3234
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.573930551823851,
      "learning_rate": 3.6183784909997187e-08,
      "loss": 0.2634,
      "step": 3235
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.366805500727652,
      "learning_rate": 3.593953796051041e-08,
      "loss": 0.257,
      "step": 3236
    },
    {
      "epoch": 0.92,
      "grad_norm": 5.863808786508515,
      "learning_rate": 3.5696103074893793e-08,
      "loss": 0.2966,
      "step": 3237
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.566493498708618,
      "learning_rate": 3.545348045820173e-08,
      "loss": 0.2675,
      "step": 3238
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4598395736526837,
      "learning_rate": 3.521167031480432e-08,
      "loss": 0.2886,
      "step": 3239
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2523848463832543,
      "learning_rate": 3.497067284838673e-08,
      "loss": 0.2659,
      "step": 3240
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4175830736828057,
      "learning_rate": 3.4730488261950574e-08,
      "loss": 0.2866,
      "step": 3241
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.307963198877353,
      "learning_rate": 3.449111675781202e-08,
      "loss": 0.2474,
      "step": 3242
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.220977889209224,
      "learning_rate": 3.4252558537602786e-08,
      "loss": 0.2501,
      "step": 3243
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3093402757426995,
      "learning_rate": 3.401481380226889e-08,
      "loss": 0.2674,
      "step": 3244
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1868144442873,
      "learning_rate": 3.3777882752071715e-08,
      "loss": 0.2605,
      "step": 3245
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.219309920824285,
      "learning_rate": 3.354176558658728e-08,
      "loss": 0.2621,
      "step": 3246
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5444203511053654,
      "learning_rate": 3.33064625047057e-08,
      "loss": 0.2821,
      "step": 3247
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4616798789610947,
      "learning_rate": 3.307197370463133e-08,
      "loss": 0.2793,
      "step": 3248
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.300724708712612,
      "learning_rate": 3.283829938388294e-08,
      "loss": 0.2561,
      "step": 3249
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.3329227624540954,
      "learning_rate": 3.260543973929286e-08,
      "loss": 0.2779,
      "step": 3250
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3194303907345537,
      "learning_rate": 3.237339496700775e-08,
      "loss": 0.2392,
      "step": 3251
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.602542644339946,
      "learning_rate": 3.2142165262487365e-08,
      "loss": 0.3197,
      "step": 3252
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.38983381123609,
      "learning_rate": 3.1911750820505015e-08,
      "loss": 0.2636,
      "step": 3253
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1980480700182974,
      "learning_rate": 3.168215183514733e-08,
      "loss": 0.2387,
      "step": 3254
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.278995719811263,
      "learning_rate": 3.145336849981395e-08,
      "loss": 0.2432,
      "step": 3255
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3941204185182294,
      "learning_rate": 3.1225401007217934e-08,
      "loss": 0.2821,
      "step": 3256
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.36472481649528,
      "learning_rate": 3.0998249549384346e-08,
      "loss": 0.2432,
      "step": 3257
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.261121367712428,
      "learning_rate": 3.077191431765147e-08,
      "loss": 0.266,
      "step": 3258
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.440840562496626,
      "learning_rate": 3.0546395502669795e-08,
      "loss": 0.2726,
      "step": 3259
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.416700603257129,
      "learning_rate": 3.032169329440226e-08,
      "loss": 0.278,
      "step": 3260
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9480803404548483,
      "learning_rate": 3.009780788212379e-08,
      "loss": 0.2961,
      "step": 3261
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3246552045964135,
      "learning_rate": 2.9874739454421424e-08,
      "loss": 0.2645,
      "step": 3262
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2128528098505083,
      "learning_rate": 2.965248819919397e-08,
      "loss": 0.2527,
      "step": 3263
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2228799005968125,
      "learning_rate": 2.943105430365178e-08,
      "loss": 0.2694,
      "step": 3264
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3867533631022613,
      "learning_rate": 2.921043795431699e-08,
      "loss": 0.2819,
      "step": 3265
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3560004236511376,
      "learning_rate": 2.8990639337022838e-08,
      "loss": 0.2677,
      "step": 3266
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.484908011066342,
      "learning_rate": 2.8771658636913886e-08,
      "loss": 0.2858,
      "step": 3267
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.414734426146304,
      "learning_rate": 2.85534960384457e-08,
      "loss": 0.2608,
      "step": 3268
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.384125145372157,
      "learning_rate": 2.8336151725384727e-08,
      "loss": 0.238,
      "step": 3269
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.539193357795301,
      "learning_rate": 2.8119625880808183e-08,
      "loss": 0.2764,
      "step": 3270
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.383186119315387,
      "learning_rate": 2.7903918687103733e-08,
      "loss": 0.2722,
      "step": 3271
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1790139505551247,
      "learning_rate": 2.7689030325969476e-08,
      "loss": 0.2391,
      "step": 3272
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7517357675515552,
      "learning_rate": 2.7474960978414064e-08,
      "loss": 0.2607,
      "step": 3273
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4227957748137516,
      "learning_rate": 2.7261710824755812e-08,
      "loss": 0.2766,
      "step": 3274
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.236432074590604,
      "learning_rate": 2.704928004462337e-08,
      "loss": 0.2739,
      "step": 3275
    },
    {
      "epoch": 0.93,
      "grad_norm": 4.958590582391775,
      "learning_rate": 2.683766881695504e-08,
      "loss": 0.2751,
      "step": 3276
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3924049229226285,
      "learning_rate": 2.6626877319998798e-08,
      "loss": 0.2794,
      "step": 3277
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3948885814981016,
      "learning_rate": 2.641690573131228e-08,
      "loss": 0.2757,
      "step": 3278
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2237442319076095,
      "learning_rate": 2.6207754227761892e-08,
      "loss": 0.26,
      "step": 3279
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2420191639484632,
      "learning_rate": 2.5999422985524157e-08,
      "loss": 0.2707,
      "step": 3280
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3290621853224764,
      "learning_rate": 2.579191218008403e-08,
      "loss": 0.2739,
      "step": 3281
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.347622446698043,
      "learning_rate": 2.5585221986235693e-08,
      "loss": 0.2533,
      "step": 3282
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.323131153292077,
      "learning_rate": 2.537935257808177e-08,
      "loss": 0.272,
      "step": 3283
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.318533312951456,
      "learning_rate": 2.5174304129033653e-08,
      "loss": 0.2675,
      "step": 3284
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.481506357351729,
      "learning_rate": 2.4970076811811513e-08,
      "loss": 0.2553,
      "step": 3285
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.351142696730123,
      "learning_rate": 2.4766670798443412e-08,
      "loss": 0.2712,
      "step": 3286
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.319674755862298,
      "learning_rate": 2.4564086260265847e-08,
      "loss": 0.2722,
      "step": 3287
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.387010165950708,
      "learning_rate": 2.436232336792321e-08,
      "loss": 0.2832,
      "step": 3288
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3906373099226284,
      "learning_rate": 2.416138229136777e-08,
      "loss": 0.2621,
      "step": 3289
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.347750085520224,
      "learning_rate": 2.3961263199859915e-08,
      "loss": 0.2609,
      "step": 3290
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.291141282114224,
      "learning_rate": 2.3761966261967247e-08,
      "loss": 0.2441,
      "step": 3291
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3569274367279274,
      "learning_rate": 2.3563491645564925e-08,
      "loss": 0.2671,
      "step": 3292
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3740086526710327,
      "learning_rate": 2.336583951783555e-08,
      "loss": 0.2842,
      "step": 3293
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.245727364756435,
      "learning_rate": 2.3169010045268723e-08,
      "loss": 0.2671,
      "step": 3294
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3848931241980336,
      "learning_rate": 2.2973003393661372e-08,
      "loss": 0.2561,
      "step": 3295
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.368790604055993,
      "learning_rate": 2.2777819728116988e-08,
      "loss": 0.2614,
      "step": 3296
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5764287797220984,
      "learning_rate": 2.2583459213046162e-08,
      "loss": 0.288,
      "step": 3297
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8656696962228563,
      "learning_rate": 2.238992201216594e-08,
      "loss": 0.2705,
      "step": 3298
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.344589554713833,
      "learning_rate": 2.219720828849969e-08,
      "loss": 0.2329,
      "step": 3299
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.388564658275402,
      "learning_rate": 2.2005318204377565e-08,
      "loss": 0.289,
      "step": 3300
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5719941834626807,
      "learning_rate": 2.18142519214356e-08,
      "loss": 0.3029,
      "step": 3301
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4035422211888378,
      "learning_rate": 2.1624009600616056e-08,
      "loss": 0.2601,
      "step": 3302
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5636385948099423,
      "learning_rate": 2.1434591402166967e-08,
      "loss": 0.286,
      "step": 3303
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3912117460001006,
      "learning_rate": 2.1245997485642485e-08,
      "loss": 0.2394,
      "step": 3304
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5857138602537892,
      "learning_rate": 2.1058228009902092e-08,
      "loss": 0.2813,
      "step": 3305
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2418195013942563,
      "learning_rate": 2.087128313311115e-08,
      "loss": 0.2754,
      "step": 3306
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.411005422972666,
      "learning_rate": 2.0685163012740036e-08,
      "loss": 0.2702,
      "step": 3307
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3891746886806846,
      "learning_rate": 2.0499867805564784e-08,
      "loss": 0.2797,
      "step": 3308
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2603837327608254,
      "learning_rate": 2.0315397667666433e-08,
      "loss": 0.2452,
      "step": 3309
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4929320500854577,
      "learning_rate": 2.013175275443102e-08,
      "loss": 0.2558,
      "step": 3310
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.569810976957224,
      "learning_rate": 1.9948933220549248e-08,
      "loss": 0.2974,
      "step": 3311
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.35226642441117,
      "learning_rate": 1.9766939220017153e-08,
      "loss": 0.2618,
      "step": 3312
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3079105807378486,
      "learning_rate": 1.9585770906134668e-08,
      "loss": 0.2746,
      "step": 3313
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3644399960317277,
      "learning_rate": 1.940542843150683e-08,
      "loss": 0.2819,
      "step": 3314
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.396111835884552,
      "learning_rate": 1.9225911948042683e-08,
      "loss": 0.2613,
      "step": 3315
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3463888293024384,
      "learning_rate": 1.9047221606955712e-08,
      "loss": 0.2504,
      "step": 3316
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3679034793186093,
      "learning_rate": 1.886935755876329e-08,
      "loss": 0.2733,
      "step": 3317
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.259794421485142,
      "learning_rate": 1.8692319953286906e-08,
      "loss": 0.2703,
      "step": 3318
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3388390277874422,
      "learning_rate": 1.8516108939651943e-08,
      "loss": 0.2855,
      "step": 3319
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.543297868344573,
      "learning_rate": 1.8340724666287555e-08,
      "loss": 0.2757,
      "step": 3320
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.254819797018417,
      "learning_rate": 1.816616728092646e-08,
      "loss": 0.2511,
      "step": 3321
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3420892520736505,
      "learning_rate": 1.7992436930604483e-08,
      "loss": 0.2922,
      "step": 3322
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.446367425978593,
      "learning_rate": 1.7819533761661344e-08,
      "loss": 0.2647,
      "step": 3323
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4262242704514434,
      "learning_rate": 1.7647457919739872e-08,
      "loss": 0.2687,
      "step": 3324
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3242793523904606,
      "learning_rate": 1.7476209549785903e-08,
      "loss": 0.2397,
      "step": 3325
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.426182963897848,
      "learning_rate": 1.7305788796048272e-08,
      "loss": 0.2973,
      "step": 3326
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.703243585335758,
      "learning_rate": 1.7136195802078478e-08,
      "loss": 0.2707,
      "step": 3327
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.543425586558075,
      "learning_rate": 1.6967430710731258e-08,
      "loss": 0.2497,
      "step": 3328
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.214936036687701,
      "learning_rate": 1.6799493664163668e-08,
      "loss": 0.2467,
      "step": 3329
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.394052290637097,
      "learning_rate": 1.6632384803835332e-08,
      "loss": 0.257,
      "step": 3330
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4616826296848093,
      "learning_rate": 1.6466104270508098e-08,
      "loss": 0.235,
      "step": 3331
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2261144395575623,
      "learning_rate": 1.6300652204246255e-08,
      "loss": 0.2369,
      "step": 3332
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3220778215462743,
      "learning_rate": 1.6136028744416218e-08,
      "loss": 0.2586,
      "step": 3333
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.332569555366564,
      "learning_rate": 1.5972234029686616e-08,
      "loss": 0.2727,
      "step": 3334
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.43062826390679,
      "learning_rate": 1.5809268198027524e-08,
      "loss": 0.2972,
      "step": 3335
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4845491954069296,
      "learning_rate": 1.5647131386711367e-08,
      "loss": 0.2835,
      "step": 3336
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.593439289006337,
      "learning_rate": 1.5485823732311775e-08,
      "loss": 0.2638,
      "step": 3337
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.473215011505199,
      "learning_rate": 1.532534537070429e-08,
      "loss": 0.2811,
      "step": 3338
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.256085966417221,
      "learning_rate": 1.516569643706578e-08,
      "loss": 0.2546,
      "step": 3339
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4258033040572258,
      "learning_rate": 1.5006877065874335e-08,
      "loss": 0.2915,
      "step": 3340
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.268360402644574,
      "learning_rate": 1.4848887390909614e-08,
      "loss": 0.2953,
      "step": 3341
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4874133228590356,
      "learning_rate": 1.4691727545251942e-08,
      "loss": 0.2913,
      "step": 3342
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.202250492491265,
      "learning_rate": 1.4535397661283089e-08,
      "loss": 0.2529,
      "step": 3343
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.343475727802179,
      "learning_rate": 1.4379897870685498e-08,
      "loss": 0.2372,
      "step": 3344
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4606844418448905,
      "learning_rate": 1.4225228304442172e-08,
      "loss": 0.2593,
      "step": 3345
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3663621030257858,
      "learning_rate": 1.4071389092837338e-08,
      "loss": 0.2785,
      "step": 3346
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2640297556060873,
      "learning_rate": 1.3918380365455228e-08,
      "loss": 0.2446,
      "step": 3347
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.20404868372011,
      "learning_rate": 1.3766202251180858e-08,
      "loss": 0.2756,
      "step": 3348
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1297206085618754,
      "learning_rate": 1.3614854878199577e-08,
      "loss": 0.2259,
      "step": 3349
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4107497897973005,
      "learning_rate": 1.3464338373996741e-08,
      "loss": 0.2662,
      "step": 3350
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.581593802277168,
      "learning_rate": 1.3314652865358156e-08,
      "loss": 0.3059,
      "step": 3351
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.308818850381563,
      "learning_rate": 1.3165798478369183e-08,
      "loss": 0.2747,
      "step": 3352
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2932563254591054,
      "learning_rate": 1.3017775338415638e-08,
      "loss": 0.2804,
      "step": 3353
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.525942716214574,
      "learning_rate": 1.287058357018278e-08,
      "loss": 0.2755,
      "step": 3354
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4985923891428476,
      "learning_rate": 1.2724223297655878e-08,
      "loss": 0.3062,
      "step": 3355
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.356662166585638,
      "learning_rate": 1.2578694644119425e-08,
      "loss": 0.2611,
      "step": 3356
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3775670454411615,
      "learning_rate": 1.2433997732157586e-08,
      "loss": 0.2657,
      "step": 3357
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.050051946828104,
      "learning_rate": 1.2290132683654086e-08,
      "loss": 0.2086,
      "step": 3358
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5458546624396257,
      "learning_rate": 1.2147099619791767e-08,
      "loss": 0.2822,
      "step": 3359
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.415412671153464,
      "learning_rate": 1.2004898661052588e-08,
      "loss": 0.2783,
      "step": 3360
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2751172201041,
      "learning_rate": 1.186352992721773e-08,
      "loss": 0.2502,
      "step": 3361
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3132166812169714,
      "learning_rate": 1.1722993537367277e-08,
      "loss": 0.2829,
      "step": 3362
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5768434717911473,
      "learning_rate": 1.1583289609880308e-08,
      "loss": 0.2876,
      "step": 3363
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3452615889347994,
      "learning_rate": 1.1444418262434586e-08,
      "loss": 0.2531,
      "step": 3364
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4335383089249945,
      "learning_rate": 1.1306379612006645e-08,
      "loss": 0.2877,
      "step": 3365
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2737448478494238,
      "learning_rate": 1.1169173774871477e-08,
      "loss": 0.2422,
      "step": 3366
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3643371162101996,
      "learning_rate": 1.1032800866602632e-08,
      "loss": 0.2475,
      "step": 3367
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.308016964987461,
      "learning_rate": 1.0897261002072222e-08,
      "loss": 0.2385,
      "step": 3368
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.396351551587896,
      "learning_rate": 1.0762554295450366e-08,
      "loss": 0.2635,
      "step": 3369
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4150045202475745,
      "learning_rate": 1.0628680860205518e-08,
      "loss": 0.2565,
      "step": 3370
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2957529859487034,
      "learning_rate": 1.0495640809104256e-08,
      "loss": 0.2808,
      "step": 3371
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.441843889242134,
      "learning_rate": 1.0363434254211268e-08,
      "loss": 0.2541,
      "step": 3372
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4618524049071153,
      "learning_rate": 1.0232061306888917e-08,
      "loss": 0.2706,
      "step": 3373
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5357395909750866,
      "learning_rate": 1.0101522077797352e-08,
      "loss": 0.2733,
      "step": 3374
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.619183803188383,
      "learning_rate": 9.97181667689495e-09,
      "loss": 0.2894,
      "step": 3375
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.534613440279317,
      "learning_rate": 9.842945213437092e-09,
      "loss": 0.2671,
      "step": 3376
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.230444791490229,
      "learning_rate": 9.714907795977168e-09,
      "loss": 0.2841,
      "step": 3377
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.7079530309001902,
      "learning_rate": 9.587704532365681e-09,
      "loss": 0.2649,
      "step": 3378
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4746543060966806,
      "learning_rate": 9.461335529750814e-09,
      "loss": 0.2536,
      "step": 3379
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.474784362397472,
      "learning_rate": 9.33580089457786e-09,
      "loss": 0.2703,
      "step": 3380
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.449321332065921,
      "learning_rate": 9.211100732589127e-09,
      "loss": 0.278,
      "step": 3381
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3510232296452678,
      "learning_rate": 9.087235148824368e-09,
      "loss": 0.2727,
      "step": 3382
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2950727777946143,
      "learning_rate": 8.964204247620011e-09,
      "loss": 0.2902,
      "step": 3383
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4002683065846058,
      "learning_rate": 8.842008132609602e-09,
      "loss": 0.2745,
      "step": 3384
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.462639292571834,
      "learning_rate": 8.720646906723583e-09,
      "loss": 0.2739,
      "step": 3385
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.419294993393598,
      "learning_rate": 8.600120672188738e-09,
      "loss": 0.2354,
      "step": 3386
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2659319854149955,
      "learning_rate": 8.480429530529076e-09,
      "loss": 0.2479,
      "step": 3387
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.2481201180611,
      "learning_rate": 8.361573582564729e-09,
      "loss": 0.2959,
      "step": 3388
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3327587692717473,
      "learning_rate": 8.2435529284125e-09,
      "loss": 0.2548,
      "step": 3389
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.024764248268901,
      "learning_rate": 8.126367667485534e-09,
      "loss": 0.291,
      "step": 3390
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.133213647977136,
      "learning_rate": 8.010017898493315e-09,
      "loss": 0.229,
      "step": 3391
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2073354750539815,
      "learning_rate": 7.89450371944167e-09,
      "loss": 0.2341,
      "step": 3392
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3459203903671693,
      "learning_rate": 7.779825227632319e-09,
      "loss": 0.2735,
      "step": 3393
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2982394761984124,
      "learning_rate": 7.665982519663327e-09,
      "loss": 0.2733,
      "step": 3394
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4779060772165153,
      "learning_rate": 7.552975691428654e-09,
      "loss": 0.2992,
      "step": 3395
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3653518625493897,
      "learning_rate": 7.440804838117931e-09,
      "loss": 0.2544,
      "step": 3396
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3191375340363085,
      "learning_rate": 7.329470054217024e-09,
      "loss": 0.2596,
      "step": 3397
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.233643860181891,
      "learning_rate": 7.21897143350747e-09,
      "loss": 0.2291,
      "step": 3398
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.645260025143113,
      "learning_rate": 7.109309069065928e-09,
      "loss": 0.2604,
      "step": 3399
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2409004059704185,
      "learning_rate": 7.000483053265505e-09,
      "loss": 0.264,
      "step": 3400
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0328585106470216,
      "learning_rate": 6.892493477774097e-09,
      "loss": 0.245,
      "step": 3401
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4964028149676083,
      "learning_rate": 6.7853404335554974e-09,
      "loss": 0.2678,
      "step": 3402
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.558672079681962,
      "learning_rate": 6.679024010868617e-09,
      "loss": 0.3041,
      "step": 3403
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.40141182727304,
      "learning_rate": 6.573544299267708e-09,
      "loss": 0.2448,
      "step": 3404
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.516652123088787,
      "learning_rate": 6.468901387602366e-09,
      "loss": 0.3198,
      "step": 3405
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3091424744946814,
      "learning_rate": 6.36509536401697e-09,
      "loss": 0.2587,
      "step": 3406
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3951209136151093,
      "learning_rate": 6.262126315951355e-09,
      "loss": 0.2814,
      "step": 3407
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.313803926109103,
      "learning_rate": 6.159994330140139e-09,
      "loss": 0.2694,
      "step": 3408
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5797502248297772,
      "learning_rate": 6.0586994926128396e-09,
      "loss": 0.2871,
      "step": 3409
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.359256899842382,
      "learning_rate": 5.958241888693871e-09,
      "loss": 0.2953,
      "step": 3410
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.35053897730414,
      "learning_rate": 5.858621603002434e-09,
      "loss": 0.2837,
      "step": 3411
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.329705678442523,
      "learning_rate": 5.7598387194524035e-09,
      "loss": 0.2842,
      "step": 3412
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2477466985107877,
      "learning_rate": 5.66189332125222e-09,
      "loss": 0.2917,
      "step": 3413
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.255582150057296,
      "learning_rate": 5.564785490904778e-09,
      "loss": 0.2669,
      "step": 3414
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3365220893647405,
      "learning_rate": 5.468515310207866e-09,
      "loss": 0.2792,
      "step": 3415
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.489205245133401,
      "learning_rate": 5.373082860253286e-09,
      "loss": 0.2997,
      "step": 3416
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.290026567655139,
      "learning_rate": 5.278488221427402e-09,
      "loss": 0.2529,
      "step": 3417
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2195804476343555,
      "learning_rate": 5.184731473410697e-09,
      "loss": 0.2438,
      "step": 3418
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.440156596286958,
      "learning_rate": 5.0918126951779995e-09,
      "loss": 0.2575,
      "step": 3419
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.277947551672406,
      "learning_rate": 4.999731964998255e-09,
      "loss": 0.2365,
      "step": 3420
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1878020075418747,
      "learning_rate": 4.90848936043442e-09,
      "loss": 0.2642,
      "step": 3421
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3846931366612654,
      "learning_rate": 4.818084958343571e-09,
      "loss": 0.2614,
      "step": 3422
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3490608640052697,
      "learning_rate": 4.728518834876683e-09,
      "loss": 0.2779,
      "step": 3423
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.631539271095303,
      "learning_rate": 4.639791065478737e-09,
      "loss": 0.2739,
      "step": 3424
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2169767571560963,
      "learning_rate": 4.551901724888063e-09,
      "loss": 0.2424,
      "step": 3425
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5267588387109967,
      "learning_rate": 4.46485088713755e-09,
      "loss": 0.2745,
      "step": 3426
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.516514525166744,
      "learning_rate": 4.378638625553099e-09,
      "loss": 0.2924,
      "step": 3427
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8289199823796185,
      "learning_rate": 4.29326501275451e-09,
      "loss": 0.2984,
      "step": 3428
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.37941989685982,
      "learning_rate": 4.208730120655257e-09,
      "loss": 0.272,
      "step": 3429
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3135649095456374,
      "learning_rate": 4.125034020461937e-09,
      "loss": 0.2414,
      "step": 3430
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.343939860797385,
      "learning_rate": 4.042176782675266e-09,
      "loss": 0.2724,
      "step": 3431
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.89142975164576,
      "learning_rate": 3.9601584770887485e-09,
      "loss": 0.2475,
      "step": 3432
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4270415644781242,
      "learning_rate": 3.878979172789454e-09,
      "loss": 0.2627,
      "step": 3433
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2244448517637934,
      "learning_rate": 3.798638938157683e-09,
      "loss": 0.2423,
      "step": 3434
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5572297378768036,
      "learning_rate": 3.7191378408670817e-09,
      "loss": 0.2761,
      "step": 3435
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4173689424138356,
      "learning_rate": 3.640475947884303e-09,
      "loss": 0.2652,
      "step": 3436
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3181700082518804,
      "learning_rate": 3.562653325469345e-09,
      "loss": 0.2339,
      "step": 3437
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.358757031542546,
      "learning_rate": 3.4856700391748817e-09,
      "loss": 0.2793,
      "step": 3438
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4888349477689635,
      "learning_rate": 3.40952615384682e-09,
      "loss": 0.2668,
      "step": 3439
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4841252669275717,
      "learning_rate": 3.3342217336239653e-09,
      "loss": 0.2661,
      "step": 3440
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.535579975411954,
      "learning_rate": 3.2597568419382437e-09,
      "loss": 0.2823,
      "step": 3441
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.446986754997376,
      "learning_rate": 3.1861315415139257e-09,
      "loss": 0.2666,
      "step": 3442
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.304931837380515,
      "learning_rate": 3.113345894368402e-09,
      "loss": 0.2594,
      "step": 3443
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.359112779339876,
      "learning_rate": 3.0413999618117415e-09,
      "loss": 0.2738,
      "step": 3444
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3442231737629338,
      "learning_rate": 2.9702938044467994e-09,
      "loss": 0.2618,
      "step": 3445
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.872008136124653,
      "learning_rate": 2.9000274821687765e-09,
      "loss": 0.2378,
      "step": 3446
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.393992867182129,
      "learning_rate": 2.830601054165549e-09,
      "loss": 0.2688,
      "step": 3447
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4955852552862514,
      "learning_rate": 2.7620145789177816e-09,
      "loss": 0.3084,
      "step": 3448
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.394092561435307,
      "learning_rate": 2.6942681141981506e-09,
      "loss": 0.2509,
      "step": 3449
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2975660397285913,
      "learning_rate": 2.6273617170722295e-09,
      "loss": 0.2669,
      "step": 3450
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2922318671409196,
      "learning_rate": 2.5612954438977154e-09,
      "loss": 0.2503,
      "step": 3451
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.271404899827809,
      "learning_rate": 2.4960693503245367e-09,
      "loss": 0.2689,
      "step": 3452
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.173109962410258,
      "learning_rate": 2.4316834912951887e-09,
      "loss": 0.2373,
      "step": 3453
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.277192362094739,
      "learning_rate": 2.3681379210442885e-09,
      "loss": 0.255,
      "step": 3454
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.419018625423423,
      "learning_rate": 2.3054326930984636e-09,
      "loss": 0.2867,
      "step": 3455
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.704302423417181,
      "learning_rate": 2.243567860276796e-09,
      "loss": 0.296,
      "step": 3456
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.397799847013115,
      "learning_rate": 2.1825434746903793e-09,
      "loss": 0.2527,
      "step": 3457
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.35412657731211,
      "learning_rate": 2.1223595877420953e-09,
      "loss": 0.2718,
      "step": 3458
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6890571841517277,
      "learning_rate": 2.0630162501272806e-09,
      "loss": 0.2542,
      "step": 3459
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.438948016197178,
      "learning_rate": 2.0045135118328394e-09,
      "loss": 0.2801,
      "step": 3460
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.267259101241706,
      "learning_rate": 1.946851422138018e-09,
      "loss": 0.2311,
      "step": 3461
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.551728847298024,
      "learning_rate": 1.890030029613521e-09,
      "loss": 0.2799,
      "step": 3462
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.479703785478036,
      "learning_rate": 1.8340493821222824e-09,
      "loss": 0.2788,
      "step": 3463
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6460667172496315,
      "learning_rate": 1.7789095268188058e-09,
      "loss": 0.2908,
      "step": 3464
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5203858422375642,
      "learning_rate": 1.7246105101493825e-09,
      "loss": 0.3041,
      "step": 3465
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2142928954216696,
      "learning_rate": 1.671152377852092e-09,
      "loss": 0.2428,
      "step": 3466
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.265712772154652,
      "learning_rate": 1.6185351749569142e-09,
      "loss": 0.259,
      "step": 3467
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2995218830476962,
      "learning_rate": 1.5667589457849516e-09,
      "loss": 0.2413,
      "step": 3468
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.1040150036059075,
      "learning_rate": 1.5158237339494283e-09,
      "loss": 0.234,
      "step": 3469
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3875856781933744,
      "learning_rate": 1.4657295823549132e-09,
      "loss": 0.2668,
      "step": 3470
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.462460581054891,
      "learning_rate": 1.4164765331976525e-09,
      "loss": 0.2631,
      "step": 3471
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4861816892269157,
      "learning_rate": 1.3680646279651265e-09,
      "loss": 0.2766,
      "step": 3472
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.455987767118866,
      "learning_rate": 1.320493907436604e-09,
      "loss": 0.2293,
      "step": 3473
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7477765197169726,
      "learning_rate": 1.2737644116826985e-09,
      "loss": 0.305,
      "step": 3474
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2249571672813317,
      "learning_rate": 1.227876180065368e-09,
      "loss": 0.258,
      "step": 3475
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2543225771684656,
      "learning_rate": 1.1828292512380267e-09,
      "loss": 0.2526,
      "step": 3476
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3257233204429535,
      "learning_rate": 1.1386236631452107e-09,
      "loss": 0.2569,
      "step": 3477
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5350058521661056,
      "learning_rate": 1.095259453023023e-09,
      "loss": 0.2867,
      "step": 3478
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4593700491079966,
      "learning_rate": 1.0527366573986895e-09,
      "loss": 0.2921,
      "step": 3479
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2846641331816384,
      "learning_rate": 1.0110553120908915e-09,
      "loss": 0.2572,
      "step": 3480
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3702436683714185,
      "learning_rate": 9.70215452209211e-10,
      "loss": 0.2596,
      "step": 3481
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.392817209094412,
      "learning_rate": 9.302171121546853e-10,
      "loss": 0.2476,
      "step": 3482
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.616727121844519,
      "learning_rate": 8.910603256192529e-10,
      "loss": 0.2712,
      "step": 3483
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5477779958428575,
      "learning_rate": 8.527451255863071e-10,
      "loss": 0.2786,
      "step": 3484
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.42618092930159,
      "learning_rate": 8.152715443300318e-10,
      "loss": 0.2721,
      "step": 3485
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5225716230160917,
      "learning_rate": 7.786396134158435e-10,
      "loss": 0.2834,
      "step": 3486
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.614699044791809,
      "learning_rate": 7.42849363700282e-10,
      "loss": 0.2844,
      "step": 3487
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5011157795190915,
      "learning_rate": 7.079008253306762e-10,
      "loss": 0.2605,
      "step": 3488
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.493778563490711,
      "learning_rate": 6.737940277454778e-10,
      "loss": 0.2774,
      "step": 3489
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.257688702699588,
      "learning_rate": 6.405289996741503e-10,
      "loss": 0.2579,
      "step": 3490
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.427775485003037,
      "learning_rate": 6.081057691370572e-10,
      "loss": 0.2593,
      "step": 3491
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2505185636951026,
      "learning_rate": 5.76524363445463e-10,
      "loss": 0.2815,
      "step": 3492
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4718683751201636,
      "learning_rate": 5.457848092015327e-10,
      "loss": 0.2612,
      "step": 3493
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3198523130298656,
      "learning_rate": 5.158871322984426e-10,
      "loss": 0.2698,
      "step": 3494
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.232091467187299,
      "learning_rate": 4.868313579200479e-10,
      "loss": 0.2533,
      "step": 3495
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4747398852089417,
      "learning_rate": 4.5861751054110385e-10,
      "loss": 0.2753,
      "step": 3496
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5764286483014556,
      "learning_rate": 4.3124561392715584e-10,
      "loss": 0.2567,
      "step": 3497
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.367029999731487,
      "learning_rate": 4.047156911345384e-10,
      "loss": 0.2753,
      "step": 3498
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.440774884458288,
      "learning_rate": 3.7902776451048667e-10,
      "loss": 0.2955,
      "step": 3499
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2906056439515177,
      "learning_rate": 3.5418185569280337e-10,
      "loss": 0.2582,
      "step": 3500
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2754598342103325,
      "learning_rate": 3.3017798561030266e-10,
      "loss": 0.2427,
      "step": 3501
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.401165101202187,
      "learning_rate": 3.070161744820332e-10,
      "loss": 0.2716,
      "step": 3502
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1729922241238833,
      "learning_rate": 2.846964418182773e-10,
      "loss": 0.2476,
      "step": 3503
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2677043074356003,
      "learning_rate": 2.632188064196628e-10,
      "loss": 0.2563,
      "step": 3504
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.449392876606644,
      "learning_rate": 2.4258328637771776e-10,
      "loss": 0.2601,
      "step": 3505
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3055918692627793,
      "learning_rate": 2.22789899074427e-10,
      "loss": 0.2693,
      "step": 3506
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4329321525802556,
      "learning_rate": 2.0383866118245385e-10,
      "loss": 0.2406,
      "step": 3507
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2791612559079786,
      "learning_rate": 1.8572958866514e-10,
      "loss": 0.2761,
      "step": 3508
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.560549908458607,
      "learning_rate": 1.684626967765057e-10,
      "loss": 0.235,
      "step": 3509
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5078118323749727,
      "learning_rate": 1.5203800006102774e-10,
      "loss": 0.3003,
      "step": 3510
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.493354960056434,
      "learning_rate": 1.3645551235386133e-10,
      "loss": 0.2504,
      "step": 3511
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.762856738714792,
      "learning_rate": 1.2171524678061818e-10,
      "loss": 0.2951,
      "step": 3512
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4196022508390054,
      "learning_rate": 1.0781721575781056e-10,
      "loss": 0.2877,
      "step": 3513
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1964914531229427,
      "learning_rate": 9.476143099207412e-11,
      "loss": 0.2594,
      "step": 3514
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3678613204157104,
      "learning_rate": 8.254790348072304e-11,
      "loss": 0.2815,
      "step": 3515
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3026631969295814,
      "learning_rate": 7.117664351186103e-11,
      "loss": 0.2821,
      "step": 3516
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.560841032353236,
      "learning_rate": 6.06476606638262e-11,
      "loss": 0.2817,
      "step": 3517
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.358566543205405,
      "learning_rate": 5.096096380552417e-11,
      "loss": 0.2727,
      "step": 3518
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.47453393582181,
      "learning_rate": 4.211656109642803e-11,
      "loss": 0.2822,
      "step": 3519
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1677384205058012,
      "learning_rate": 3.411445998668938e-11,
      "loss": 0.2386,
      "step": 3520
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.405772186235948,
      "learning_rate": 2.6954667216472217e-11,
      "loss": 0.2798,
      "step": 3521
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4163648834715747,
      "learning_rate": 2.063718881695209e-11,
      "loss": 0.2753,
      "step": 3522
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1951499438686906,
      "learning_rate": 1.516203010953898e-11,
      "loss": 0.2416,
      "step": 3523
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5347777261578788,
      "learning_rate": 1.0529195706099337e-11,
      "loss": 0.3174,
      "step": 3524
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.303176321896944,
      "learning_rate": 6.738689509067086e-12,
      "loss": 0.2859,
      "step": 3525
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.253837882658441,
      "learning_rate": 3.790514711332626e-12,
      "loss": 0.2613,
      "step": 3526
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5952689897785053,
      "learning_rate": 1.6846737963538415e-12,
      "loss": 0.2775,
      "step": 3527
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.359674481777684,
      "learning_rate": 4.211685378230356e-13,
      "loss": 0.2754,
      "step": 3528
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4963844359266916,
      "learning_rate": 0.0,
      "loss": 0.2803,
      "step": 3529
    },
    {
      "epoch": 1.0,
      "step": 3529,
      "total_flos": 696559979986944.0,
      "train_loss": 0.3025049172588839,
      "train_runtime": 35270.0898,
      "train_samples_per_second": 3.202,
      "train_steps_per_second": 0.1
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 3529,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25000,
  "total_flos": 696559979986944.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
