cuierfei commited on
Commit
b32e033
1 Parent(s): 5874d94

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. config.json +0 -55
  3. modeling_intern_vit.py +1 -0
  4. modeling_internvl_chat.py +1 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: mit
3
  pipeline_tag: image-text-to-text
4
  ---
5
 
@@ -65,7 +65,7 @@ For more information about the pipeline parameters, please refer to [here](https
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
- lmdeploy serve api_server OpenGVLab/InternVL2-Llama3-76B-AWQ --server-port 23333
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
@@ -104,7 +104,7 @@ print(response)
104
 
105
  ## License
106
 
107
- This project is released under the MIT license, while InternLM is licensed under the Apache-2.0 license.
108
 
109
  ## Citation
110
 
 
1
  ---
2
+ license: llama3
3
  pipeline_tag: image-text-to-text
4
  ---
5
 
 
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
+ lmdeploy serve api_server OpenGVLab/InternVL2-Llama3-76B-AWQ --backend turbomind --server-port 23333 --model-format awq
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
 
104
 
105
  ## License
106
 
107
+ This project is released under the MIT license, while Llama3 is licensed under the Llama 3 Community License.
108
 
109
  ## Citation
110
 
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "/mnt/bigdisk/InternVL2-Llama3-76B",
4
  "architectures": [
5
  "InternVLChatModel"
6
  ],
@@ -115,86 +114,32 @@
115
  "use_llm_lora": 0,
116
  "use_thumbnail": true,
117
  "vision_config": {
118
- "_name_or_path": "",
119
- "add_cross_attention": false,
120
  "architectures": [
121
  "InternVisionModel"
122
  ],
123
  "attention_dropout": 0.0,
124
- "bad_words_ids": null,
125
- "begin_suppress_tokens": null,
126
- "bos_token_id": null,
127
- "chunk_size_feed_forward": 0,
128
- "cross_attention_hidden_size": null,
129
- "decoder_start_token_id": null,
130
- "diversity_penalty": 0.0,
131
- "do_sample": false,
132
  "drop_path_rate": 0.0,
133
  "dropout": 0.0,
134
- "early_stopping": false,
135
- "encoder_no_repeat_ngram_size": 0,
136
- "eos_token_id": null,
137
- "exponential_decay_length_penalty": null,
138
- "finetuning_task": null,
139
- "forced_bos_token_id": null,
140
- "forced_eos_token_id": null,
141
  "hidden_act": "gelu",
142
  "hidden_size": 3200,
143
- "id2label": {
144
- "0": "LABEL_0",
145
- "1": "LABEL_1"
146
- },
147
  "image_size": 448,
148
  "initializer_factor": 0.1,
149
  "initializer_range": 1e-10,
150
  "intermediate_size": 12800,
151
- "is_decoder": false,
152
- "is_encoder_decoder": false,
153
- "label2id": {
154
- "LABEL_0": 0,
155
- "LABEL_1": 1
156
- },
157
  "layer_norm_eps": 1e-06,
158
- "length_penalty": 1.0,
159
- "max_length": 20,
160
- "min_length": 0,
161
  "model_type": "intern_vit_6b",
162
- "no_repeat_ngram_size": 0,
163
  "norm_type": "rms_norm",
164
  "num_attention_heads": 25,
165
- "num_beam_groups": 1,
166
- "num_beams": 1,
167
  "num_channels": 3,
168
  "num_hidden_layers": 45,
169
- "num_return_sequences": 1,
170
  "output_attentions": false,
171
  "output_hidden_states": false,
172
- "output_scores": false,
173
- "pad_token_id": null,
174
  "patch_size": 14,
175
- "prefix": null,
176
- "problem_type": null,
177
- "pruned_heads": {},
178
  "qk_normalization": true,
179
  "qkv_bias": false,
180
- "remove_invalid_values": false,
181
- "repetition_penalty": 1.0,
182
  "return_dict": true,
183
- "return_dict_in_generate": false,
184
- "sep_token_id": null,
185
- "suppress_tokens": null,
186
- "task_specific_params": null,
187
- "temperature": 1.0,
188
- "tf_legacy_loss": false,
189
- "tie_encoder_decoder": false,
190
- "tie_word_embeddings": true,
191
- "tokenizer_class": null,
192
- "top_k": 50,
193
- "top_p": 1.0,
194
  "torch_dtype": "bfloat16",
195
- "torchscript": false,
196
  "transformers_version": "4.40.0",
197
- "typical_p": 1.0,
198
  "use_bfloat16": true,
199
  "use_flash_attn": true
200
  }
 
1
  {
2
  "_commit_hash": null,
 
3
  "architectures": [
4
  "InternVLChatModel"
5
  ],
 
114
  "use_llm_lora": 0,
115
  "use_thumbnail": true,
116
  "vision_config": {
 
 
117
  "architectures": [
118
  "InternVisionModel"
119
  ],
120
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
121
  "drop_path_rate": 0.0,
122
  "dropout": 0.0,
 
 
 
 
 
 
 
123
  "hidden_act": "gelu",
124
  "hidden_size": 3200,
 
 
 
 
125
  "image_size": 448,
126
  "initializer_factor": 0.1,
127
  "initializer_range": 1e-10,
128
  "intermediate_size": 12800,
 
 
 
 
 
 
129
  "layer_norm_eps": 1e-06,
 
 
 
130
  "model_type": "intern_vit_6b",
 
131
  "norm_type": "rms_norm",
132
  "num_attention_heads": 25,
 
 
133
  "num_channels": 3,
134
  "num_hidden_layers": 45,
 
135
  "output_attentions": false,
136
  "output_hidden_states": false,
 
 
137
  "patch_size": 14,
 
 
 
138
  "qk_normalization": true,
139
  "qkv_bias": false,
 
 
140
  "return_dict": true,
 
 
 
 
 
 
 
 
 
 
 
141
  "torch_dtype": "bfloat16",
 
142
  "transformers_version": "4.40.0",
 
143
  "use_bfloat16": true,
144
  "use_flash_attn": true
145
  }
modeling_intern_vit.py CHANGED
@@ -368,6 +368,7 @@ class InternVisionEncoder(nn.Module):
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
 
371
  config_class = InternVisionConfig
372
  _no_split_modules = ['InternVisionEncoderLayer']
373
 
 
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
371
+ _supports_flash_attn_2 = True
372
  config_class = InternVisionConfig
373
  _no_split_modules = ['InternVisionEncoderLayer']
374
 
modeling_internvl_chat.py CHANGED
@@ -33,6 +33,7 @@ def version_cmp(v1, v2, op='eq'):
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
 
36
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
37
 
38
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
 
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
36
+ _supports_flash_attn_2 = True
37
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
38
 
39
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):