bofenghuang commited on
Commit
9944b75
1 Parent(s): ce21f0e
Files changed (3) hide show
  1. tokenizer.json +25 -25
  2. tokenizer.model +2 -2
  3. tokenizer_config.json +13 -12
tokenizer.json CHANGED
@@ -50,7 +50,7 @@
50
  },
51
  {
52
  "id": 5,
53
- "content": "[TOOL_CALLS]",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 6,
62
- "content": "[AVAILABLE_TOOLS]",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 7,
71
- "content": "[/AVAILABLE_TOOLS]",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 8,
80
- "content": "[TOOL_RESULTS]",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 9,
89
- "content": "[/TOOL_RESULTS]",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 10,
98
- "content": "[control_8]",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 11,
107
- "content": "[control_9]",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 12,
116
- "content": "[control_10]",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 13,
125
- "content": "[control_11]",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "id": 14,
134
- "content": "[control_12]",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,7 +140,7 @@
140
  },
141
  {
142
  "id": 15,
143
- "content": "[control_13]",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +149,7 @@
149
  },
150
  {
151
  "id": 16,
152
- "content": "[control_14]",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -7043,18 +7043,18 @@
7043
  "</s>": 2,
7044
  "[INST]": 3,
7045
  "[/INST]": 4,
7046
- "[TOOL_CALLS]": 5,
7047
- "[AVAILABLE_TOOLS]": 6,
7048
- "[/AVAILABLE_TOOLS]": 7,
7049
- "[TOOL_RESULTS]": 8,
7050
- "[/TOOL_RESULTS]": 9,
7051
- "[control_8]": 10,
7052
- "[control_9]": 11,
7053
- "[control_10]": 12,
7054
- "[control_11]": 13,
7055
- "[control_12]": 14,
7056
- "[control_13]": 15,
7057
- "[control_14]": 16,
7058
  "[control_15]": 17,
7059
  "[control_16]": 18,
7060
  "[control_17]": 19,
@@ -98790,4 +98790,4 @@
98790
  "▁ ▁▁▁▁▁▁▁▁▁▁"
98791
  ]
98792
  }
98793
- }
 
50
  },
51
  {
52
  "id": 5,
53
+ "content": "<tool_call>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
 
59
  },
60
  {
61
  "id": 6,
62
+ "content": "<available_tools>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 7,
71
+ "content": "</available_tools>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 8,
80
+ "content": "<tool_response>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 9,
89
+ "content": "</tool_response>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 10,
98
+ "content": "<available_documents>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 11,
107
+ "content": "</available_documents>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 12,
116
+ "content": "<reference>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 13,
125
+ "content": "</reference>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 14,
134
+ "content": "<|system|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 15,
143
+ "content": "<|user|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
 
149
  },
150
  {
151
  "id": 16,
152
+ "content": "<|assistant|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
 
7043
  "</s>": 2,
7044
  "[INST]": 3,
7045
  "[/INST]": 4,
7046
+ "<tool_call>": 5,
7047
+ "<available_tools>": 6,
7048
+ "</available_tools>": 7,
7049
+ "<tool_response>": 8,
7050
+ "</tool_response>": 9,
7051
+ "<available_documents>": 10,
7052
+ "</available_documents>": 11,
7053
+ "<reference>": 12,
7054
+ "</reference>": 13,
7055
+ "<|system|>": 14,
7056
+ "<|user|>": 15,
7057
+ "<|assistant|>": 16,
7058
  "[control_15]": 17,
7059
  "[control_16]": 18,
7060
  "[control_17]": 19,
 
98790
  "▁ ▁▁▁▁▁▁▁▁▁▁"
98791
  ]
98792
  }
98793
+ }
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
- size 587404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6f8a481c33393f0799a3fbb8b4905efda071b90a3c852b134ef4959890190a
3
+ size 587420
tokenizer_config.json CHANGED
@@ -44,7 +44,7 @@
44
  "special": true
45
  },
46
  "5": {
47
- "content": "[TOOL_CALLS]",
48
  "lstrip": false,
49
  "normalized": false,
50
  "rstrip": false,
@@ -52,7 +52,7 @@
52
  "special": true
53
  },
54
  "6": {
55
- "content": "[AVAILABLE_TOOLS]",
56
  "lstrip": false,
57
  "normalized": false,
58
  "rstrip": false,
@@ -60,7 +60,7 @@
60
  "special": true
61
  },
62
  "7": {
63
- "content": "[/AVAILABLE_TOOLS]",
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  "special": true
69
  },
70
  "8": {
71
- "content": "[TOOL_RESULTS]",
72
  "lstrip": false,
73
  "normalized": false,
74
  "rstrip": false,
@@ -76,7 +76,7 @@
76
  "special": true
77
  },
78
  "9": {
79
- "content": "[/TOOL_RESULTS]",
80
  "lstrip": false,
81
  "normalized": false,
82
  "rstrip": false,
@@ -84,7 +84,7 @@
84
  "special": true
85
  },
86
  "10": {
87
- "content": "[control_8]",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
@@ -92,7 +92,7 @@
92
  "special": true
93
  },
94
  "11": {
95
- "content": "[control_9]",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  "special": true
101
  },
102
  "12": {
103
- "content": "[control_10]",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
@@ -108,7 +108,7 @@
108
  "special": true
109
  },
110
  "13": {
111
- "content": "[control_11]",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
@@ -116,7 +116,7 @@
116
  "special": true
117
  },
118
  "14": {
119
- "content": "[control_12]",
120
  "lstrip": false,
121
  "normalized": false,
122
  "rstrip": false,
@@ -124,7 +124,7 @@
124
  "special": true
125
  },
126
  "15": {
127
- "content": "[control_13]",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
@@ -132,7 +132,7 @@
132
  "special": true
133
  },
134
  "16": {
135
- "content": "[control_14]",
136
  "lstrip": false,
137
  "normalized": false,
138
  "rstrip": false,
@@ -6173,6 +6173,7 @@
6173
  }
6174
  },
6175
  "bos_token": "<s>",
 
6176
  "clean_up_tokenization_spaces": false,
6177
  "eos_token": "</s>",
6178
  "legacy": false,
 
44
  "special": true
45
  },
46
  "5": {
47
+ "content": "<tool_call>",
48
  "lstrip": false,
49
  "normalized": false,
50
  "rstrip": false,
 
52
  "special": true
53
  },
54
  "6": {
55
+ "content": "<available_tools>",
56
  "lstrip": false,
57
  "normalized": false,
58
  "rstrip": false,
 
60
  "special": true
61
  },
62
  "7": {
63
+ "content": "</available_tools>",
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
 
68
  "special": true
69
  },
70
  "8": {
71
+ "content": "<tool_response>",
72
  "lstrip": false,
73
  "normalized": false,
74
  "rstrip": false,
 
76
  "special": true
77
  },
78
  "9": {
79
+ "content": "</tool_response>",
80
  "lstrip": false,
81
  "normalized": false,
82
  "rstrip": false,
 
84
  "special": true
85
  },
86
  "10": {
87
+ "content": "<available_documents>",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
 
92
  "special": true
93
  },
94
  "11": {
95
+ "content": "</available_documents>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
 
100
  "special": true
101
  },
102
  "12": {
103
+ "content": "<reference>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
 
108
  "special": true
109
  },
110
  "13": {
111
+ "content": "</reference>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
 
116
  "special": true
117
  },
118
  "14": {
119
+ "content": "<|system|>",
120
  "lstrip": false,
121
  "normalized": false,
122
  "rstrip": false,
 
124
  "special": true
125
  },
126
  "15": {
127
+ "content": "<|user|>",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
 
132
  "special": true
133
  },
134
  "16": {
135
+ "content": "<|assistant|>",
136
  "lstrip": false,
137
  "normalized": false,
138
  "rstrip": false,
 
6173
  }
6174
  },
6175
  "bos_token": "<s>",
6176
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
6177
  "clean_up_tokenization_spaces": false,
6178
  "eos_token": "</s>",
6179
  "legacy": false,