maxiw commited on
Commit
64036af
1 Parent(s): 828c61d

WIP fix image loading

Browse files
Files changed (2) hide show
  1. app.py +14 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import AutoModelForCausalLM, AutoProcessor
 
4
  import torch
 
5
  from PIL import Image
6
- import subprocess
7
 
8
 
9
  models = {
10
- "Qwen/Qwen2-VL-7B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
11
  }
12
 
13
  processors = {
@@ -17,6 +19,13 @@ processors = {
17
  DESCRIPTION = "# Qwen2-VL Object Localization Demo"
18
 
19
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def run_example(image, text_input, model_id="Qwen/Qwen2-VL-7B-Instruct"):
22
  model = models[model_id].eval().cuda()
@@ -26,7 +35,7 @@ def run_example(image, text_input, model_id="Qwen/Qwen2-VL-7B-Instruct"):
26
  {
27
  "role": "user",
28
  "content": [
29
- {"type": "image", "image": image},
30
  {"type": "text", "text": f"Give a bounding box for {text_input}"},
31
  ],
32
  }
@@ -67,7 +76,7 @@ with gr.Blocks(css=css) as demo:
67
  with gr.Tab(label="Qwen2-VL Input"):
68
  with gr.Row():
69
  with gr.Column():
70
- input_img = gr.Image(label="Input Picture")
71
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
72
  text_input = gr.Textbox(label="Description of Localization Target")
73
  submit_btn = gr.Button(value="Submit")
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
4
+ from qwen_vl_utils import process_vision_info
5
  import torch
6
+ import base64
7
  from PIL import Image
8
+ from io import BytesIO
9
 
10
 
11
  models = {
12
+ "Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct") #, torch_dtype="auto", device_map="auto")
13
  }
14
 
15
  processors = {
 
19
  DESCRIPTION = "# Qwen2-VL Object Localization Demo"
20
 
21
 
22
+ def image_to_base64(image):
23
+ buffered = BytesIO()
24
+ image.save(buffered, format="PNG") # Save the image in memory as PNG
25
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # Encode image to base64
26
+ return img_str
27
+
28
+
29
  @spaces.GPU
30
  def run_example(image, text_input, model_id="Qwen/Qwen2-VL-7B-Instruct"):
31
  model = models[model_id].eval().cuda()
 
35
  {
36
  "role": "user",
37
  "content": [
38
+ {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
39
  {"type": "text", "text": f"Give a bounding box for {text_input}"},
40
  ],
41
  }
 
76
  with gr.Tab(label="Qwen2-VL Input"):
77
  with gr.Row():
78
  with gr.Column():
79
+ input_img = gr.Image(label="Input Picture", type="pil")
80
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
81
  text_input = gr.Textbox(label="Description of Localization Target")
82
  submit_btn = gr.Button(value="Submit")
requirements.txt CHANGED
@@ -3,6 +3,6 @@ Pillow==10.3.0
3
  Requests==2.31.0
4
  torch
5
  torchvision
6
- transformers
7
  accelerate==0.30.0
8
  qwen-vl-utils
 
3
  Requests==2.31.0
4
  torch
5
  torchvision
6
+ git+https://github.com/huggingface/transformers.git@main
7
  accelerate==0.30.0
8
  qwen-vl-utils