jeiku commited on
Commit
2ad8002
1 Parent(s): d5e36e0

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +105 -0
README.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model:
4
+ - Qwen/Qwen2.5-7B
5
+ library_name: transformers
6
+ ---
7
+
8
+ ```
9
+ base_model: Qwen/Qwen2.5-7B
10
+ model_type: AutoModelForCausalLM
11
+ tokenizer_type: AutoTokenizer
12
+
13
+ load_in_8bit: false
14
+ load_in_4bit: false
15
+ strict: false
16
+
17
+ datasets:
18
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
19
+ type: sharegpt
20
+ conversation: chatml
21
+ - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
22
+ type: sharegpt
23
+ conversation: chatml
24
+ - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
25
+ type: sharegpt
26
+ conversation: chatml
27
+ - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
28
+ type: sharegpt
29
+ conversation: chatml
30
+ - path: Nitral-AI/Reasoning-1shot_ShareGPT
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: Nitral-AI/GU_Instruct-ShareGPT
34
+ type: sharegpt
35
+ conversation: chatml
36
+ - path: Nitral-AI/Medical_Instruct-ShareGPT
37
+ type: sharegpt
38
+ conversation: chatml
39
+
40
+ chat_template: chatml
41
+
42
+ val_set_size: 0.01
43
+ output_dir: ./outputs/out
44
+
45
+ adapter:
46
+ lora_r:
47
+ lora_alpha:
48
+ lora_dropout:
49
+ lora_target_linear:
50
+
51
+ sequence_len: 8192
52
+ # sequence_len: 32768
53
+ sample_packing: true
54
+ eval_sample_packing: false
55
+ pad_to_sequence_len: true
56
+
57
+ plugins:
58
+ - axolotl.integrations.liger.LigerPlugin
59
+ liger_rope: true
60
+ liger_rms_norm: true
61
+ liger_swiglu: true
62
+ liger_fused_linear_cross_entropy: true
63
+
64
+ wandb_project: qwen7B
65
+ wandb_entity:
66
+ wandb_watch:
67
+ wandb_name: qwen7B
68
+ wandb_log_model:
69
+
70
+ gradient_accumulation_steps: 32
71
+ micro_batch_size: 1
72
+ num_epochs: 2
73
+ optimizer: adamw_bnb_8bit
74
+ lr_scheduler: cosine
75
+ learning_rate: 0.00001
76
+ weight_decay: 0.05
77
+
78
+ train_on_inputs: false
79
+ group_by_length: false
80
+ bf16: auto
81
+ fp16:
82
+ tf32: true
83
+
84
+ gradient_checkpointing: true
85
+ early_stopping_patience:
86
+ resume_from_checkpoint:
87
+ local_rank:
88
+ logging_steps: 1
89
+ xformers_attention:
90
+ flash_attention: true
91
+
92
+ warmup_ratio: 0.1
93
+ evals_per_epoch: 4
94
+ eval_table_size:
95
+ eval_max_new_tokens: 128
96
+ saves_per_epoch: 2
97
+
98
+ debug:
99
+ deepspeed:
100
+ fsdp:
101
+ fsdp_config:
102
+
103
+ special_tokens:
104
+ pad_token: <pad>
105
+ ```