|
{ |
|
"best_metric": 0.07866430282592773, |
|
"best_model_checkpoint": "saves/Mistral-7B/lora/train_1/checkpoint-50", |
|
"epoch": 2.8828828828828827, |
|
"eval_steps": 10, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 0.26596060395240784, |
|
"learning_rate": 0.00028885859539033357, |
|
"loss": 0.4784, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"eval_loss": 0.10740000009536743, |
|
"eval_runtime": 92.81, |
|
"eval_samples_per_second": 15.774, |
|
"eval_steps_per_second": 1.972, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 0.3412328362464905, |
|
"learning_rate": 0.00025708946018368484, |
|
"loss": 0.0943, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"eval_loss": 0.08951539546251297, |
|
"eval_runtime": 92.8999, |
|
"eval_samples_per_second": 15.759, |
|
"eval_steps_per_second": 1.97, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.26343515515327454, |
|
"learning_rate": 0.0002094119649058735, |
|
"loss": 0.0763, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"eval_loss": 0.08366864919662476, |
|
"eval_runtime": 92.931, |
|
"eval_samples_per_second": 15.754, |
|
"eval_steps_per_second": 1.969, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"grad_norm": 0.20433703064918518, |
|
"learning_rate": 0.00015290869976577364, |
|
"loss": 0.0674, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"eval_loss": 0.08040930330753326, |
|
"eval_runtime": 92.8073, |
|
"eval_samples_per_second": 15.775, |
|
"eval_steps_per_second": 1.972, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"grad_norm": 0.23807112872600555, |
|
"learning_rate": 9.597334127929346e-05, |
|
"loss": 0.0622, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"eval_loss": 0.07866430282592773, |
|
"eval_runtime": 92.8443, |
|
"eval_samples_per_second": 15.768, |
|
"eval_steps_per_second": 1.971, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.21819865703582764, |
|
"learning_rate": 4.706375431968997e-05, |
|
"loss": 0.0596, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"eval_loss": 0.08040966093540192, |
|
"eval_runtime": 92.8758, |
|
"eval_samples_per_second": 15.763, |
|
"eval_steps_per_second": 1.97, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.5225225225225225, |
|
"grad_norm": 0.23914609849452972, |
|
"learning_rate": 1.3445558855078014e-05, |
|
"loss": 0.0493, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.5225225225225225, |
|
"eval_loss": 0.08447360247373581, |
|
"eval_runtime": 92.8784, |
|
"eval_samples_per_second": 15.763, |
|
"eval_steps_per_second": 1.97, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"grad_norm": 0.24320539832115173, |
|
"learning_rate": 1.1280712436549378e-07, |
|
"loss": 0.0523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"eval_loss": 0.08488883823156357, |
|
"eval_runtime": 92.8598, |
|
"eval_samples_per_second": 15.766, |
|
"eval_steps_per_second": 1.971, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"step": 80, |
|
"total_flos": 1.2688077065394586e+17, |
|
"train_loss": 0.117483252286911, |
|
"train_runtime": 3922.4935, |
|
"train_samples_per_second": 5.432, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 81, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2688077065394586e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|