import gradio as gr #from transformers import AutoModelForCausalLM, AutoProcessor # Load the model and processor model_id = "microsoft/Phi-3-vision-128k-instruct" #model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2') #processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) # Define the function to generate text def generate_text(image, prompt): # Process the input inputs = "" # Generate the text generation_args = { "max_new_tokens": 500, "temperature": 0.0, "do_sample": False, } return image + prompt # Create the Gradio application gr.Interface( fn=generate_text, inputs=[ gr.Image(type="pil"), gr.Textbox(label="Prompt") ], outputs=gr.Textbox(), title="Phi-3-Vision Model", description="Generate text based on an image and prompt using the Phi-3-Vision model." ).launch(share=True,show_error=True)