WinstonShum commited on
Commit
bb06616
1 Parent(s): 7ddc14b

WinstonShum/lora_model_causal_llama_3.1

Browse files
README.md CHANGED
@@ -35,14 +35,14 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 0.0001
39
  - train_batch_size: 2
40
  - eval_batch_size: 8
41
  - seed: 3407
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
- - lr_scheduler_warmup_steps: 10
45
- - num_epochs: 1
46
 
47
  ### Training results
48
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
  - train_batch_size: 2
40
  - eval_batch_size: 8
41
  - seed: 3407
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
+ - lr_scheduler_warmup_steps: 50
45
+ - num_epochs: 3
46
 
47
  ### Training results
48
 
adapter_config.json CHANGED
@@ -14,7 +14,7 @@
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 8,
17
- "lora_dropout": 0.05,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "o_proj",
27
- "v_proj",
28
  "gate_proj",
 
29
  "k_proj",
30
  "q_proj",
31
  "down_proj",
32
- "up_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 8,
17
+ "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "up_proj",
 
27
  "gate_proj",
28
+ "v_proj",
29
  "k_proj",
30
  "q_proj",
31
  "down_proj",
32
+ "o_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:249d1c5e64fd9eadd1ea1e1f6a70311fb93671ac2e8bd040ab615b94e01e1361
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e82bedcb596a1a946480177dd7b024bc7767cefe5bdd14830d2e2815e7de20b
3
  size 167832240
runs/Aug14_20-28-21_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667301.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fa9c121214c40b2a4f10fd670859c7df23980142a72cb649e49cc1c2934847
3
+ size 5661
runs/Aug14_20-29-33_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667374.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10133d72e742f3585869600fc8524fe88d64889d1d4c549986dc2500b505eba5
3
+ size 5661
runs/Aug14_20-30-58_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667458.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d782ed8214a698959b38049196c0da281c48944feb29949215cbb8fd1b4a87
3
+ size 5661
runs/Aug14_20-33-48_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667628.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c677b24d64d3808c16eeaf8ffae199ddaa927bc46b07ee84ee2c717c8dabd8
3
+ size 5661
runs/Aug14_20-37-12_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667833.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63354ab73b1d4dfe7bb84b6b7dab4b0c69851cb5bbd1a40cd750b463a2ae46ab
3
+ size 7938
runs/Aug14_20-38-44_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667925.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1914e529337dd6890569b4abc710759dc5309b9bbbb6e971bb01a0346225f7ac
3
+ size 665726
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fd3106cc7495c6292f92c14261df91fad7886a21c526023b1c8281fa4156b5b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37184ae993c0c2a43a4cffa3c56a066dd11732254dc6188b79fb5f7079320dfb
3
  size 5176