pszemraj commited on
Commit
b8010b6
1 Parent(s): 57ae27c

update model

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/opt-2.7b",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
 
1
  {
2
+ "_name_or_path": "pszemraj/opt-peter-2.7B",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
latest CHANGED
@@ -1 +1 @@
1
- global_step1852
 
1
+ global_step4944
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16_training_metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/content/drive/MyDrive/Programming/hf-trainer/opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16", "overwrite_output_dir": true, "do_train": false, "do_eval": false, "do_predict": false, "evaluation_strategy": "no", "prediction_loss_only": false, "per_device_train_batch_size": 16, "per_device_eval_batch_size": 16, "per_gpu_train_batch_size": "None", "per_gpu_eval_batch_size": "None", "gradient_accumulation_steps": 4, "eval_accumulation_steps": 2, "eval_delay": 0, "learning_rate": 1e-05, "weight_decay": 0.1, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1, "num_train_epochs": 4, "max_steps": -1, "lr_scheduler_type": "cosine", "warmup_ratio": 0.05, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/content/drive/MyDrive/Programming/hf-trainer/opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16/logs", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 5, "logging_nan_inf_filter": true, "save_strategy": "epoch", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 42, "data_seed": "None", "bf16": true, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "amp", "bf16_full_eval": true, "fp16_full_eval": false, "tf32": "None", "local_rank": 0, "xpu_backend": "None", "tpu_num_cores": "None", "tpu_metrics_debug": false, "debug": "[]", "dataloader_drop_last": false, "eval_steps": "None", "dataloader_num_workers": 0, "past_index": -1, "run_name": "/content/drive/MyDrive/Programming/hf-trainer/opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16", "disable_tqdm": false, "remove_unused_columns": true, "label_names": "None", "load_best_model_at_end": false, "metric_for_best_model": "None", "greater_is_better": "None", "ignore_data_skip": false, "sharded_ddp": "[]", "fsdp": "[]", "fsdp_min_num_params": 0, "deepspeed": "ds_config_zero2_bf16.json", "label_smoothing_factor": 0.0, "optim": "adamw_hf", "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": "['tensorboard']", "ddp_find_unused_parameters": "None", "ddp_bucket_cap_mb": "None", "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": true, "resume_from_checkpoint": "None", "hub_model_id": "opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16", "hub_strategy": "end", "hub_token": "<HUB_TOKEN>", "hub_private_repo": false, "gradient_checkpointing": true, "include_inputs_for_metrics": false, "fp16_backend": "auto", "push_to_hub_model_id": "None", "push_to_hub_organization": "None", "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>", "_n_gpu": 1, "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "train_batch_size": 16, "eval_batch_size": 16, "configs_src": "opt-peter-2pt7B-ps_DS-msgs_Ep-4_Bs-16", "data_tag": "text-file-input"}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69aff7264edf04861c8f3b6eed3f553e343af5ee8aebeb5ad1635f39ad2b4683
3
  size 10606359699
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6769ae7cd42aacaa796f222afbbc70baa0315907cc33bcc8c5b45c6c198e1a21
3
  size 10606359699
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "facebook/opt-2.7b", "model_max_length": 512}
 
1
+ {"unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "facebook/opt-2.7b", "model_max_length": 512}
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b8aabfc07bd4f8b579a13e3c5ed3e8dbbef17b9bcbe8603b010a6c6aac8df2d
3
- size 4207
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:234f85c728a97d9fd1489171c69b8d6c8d2bed952c1d194af747ebc969ccd6ce
3
+ size 4271
vocab.json CHANGED
The diff for this file is too large to render. See raw diff