| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6019110676397562, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.000000000000001e-06, | |
| "logits/chosen": -58.747344970703125, | |
| "logits/rejected": -59.84019470214844, | |
| "logps/chosen": -150.3143768310547, | |
| "logps/rejected": -179.38966369628906, | |
| "loss": 0.5314, | |
| "rewards/accuracies": 0.7318750023841858, | |
| "rewards/chosen": -0.8851571679115295, | |
| "rewards/margins": 0.6559739708900452, | |
| "rewards/rejected": -1.5411310195922852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.68421052631579e-06, | |
| "logits/chosen": -64.05355072021484, | |
| "logits/rejected": -65.13407897949219, | |
| "logps/chosen": -155.71795654296875, | |
| "logps/rejected": -190.02166748046875, | |
| "loss": 0.4485, | |
| "rewards/accuracies": 0.7715625166893005, | |
| "rewards/chosen": -1.3824467658996582, | |
| "rewards/margins": 1.1924123764038086, | |
| "rewards/rejected": -2.5748589038848877, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.263157894736842e-06, | |
| "logits/chosen": -64.59612274169922, | |
| "logits/rejected": -65.59567260742188, | |
| "logps/chosen": -158.51266479492188, | |
| "logps/rejected": -191.44497680664062, | |
| "loss": 0.4208, | |
| "rewards/accuracies": 0.7871875166893005, | |
| "rewards/chosen": -1.4413859844207764, | |
| "rewards/margins": 1.3812304735183716, | |
| "rewards/rejected": -2.8226163387298584, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 8.842105263157895e-06, | |
| "logits/chosen": -65.70255279541016, | |
| "logits/rejected": -66.5833969116211, | |
| "logps/chosen": -156.6477508544922, | |
| "logps/rejected": -195.2394561767578, | |
| "loss": 0.4062, | |
| "rewards/accuracies": 0.7973437309265137, | |
| "rewards/chosen": -1.4948838949203491, | |
| "rewards/margins": 1.513500690460205, | |
| "rewards/rejected": -3.0083847045898438, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.421052631578948e-06, | |
| "logits/chosen": -65.27395629882812, | |
| "logits/rejected": -66.06521606445312, | |
| "logps/chosen": -156.45945739746094, | |
| "logps/rejected": -196.63790893554688, | |
| "loss": 0.3898, | |
| "rewards/accuracies": 0.7978125214576721, | |
| "rewards/chosen": -1.6718982458114624, | |
| "rewards/margins": 1.6537814140319824, | |
| "rewards/rejected": -3.3256795406341553, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 5000, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |