|
from threading import Thread |
|
|
|
import gradio as gr |
|
import openvino as ov |
|
from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX |
|
from llava.conversation import conv_templates |
|
from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token |
|
from llava.model.builder import load_pretrained_model |
|
from transformers import TextIteratorStreamer |
|
|
|
css = """ |
|
.text textarea {font-size: 24px !important;} |
|
.text p {font-size: 24px !important;} |
|
""" |
|
|
|
model_path = "llava-med-imf16-llmint4" |
|
|
|
model_name = get_model_name_from_path(model_path) |
|
|
|
device = "GPU" if "GPU" in ov.Core().available_devices else "CPU" |
|
image_device = "NPU" if "NPU" in ov.Core().available_devices else device |
|
tokenizer, model, image_processor, context_len = load_pretrained_model( |
|
model_path=model_path, |
|
model_base=None, |
|
model_name=model_name, |
|
device=device, |
|
openvino=True, |
|
image_device=image_device, |
|
) |
|
print("models loaded") |
|
|
|
|
|
def reset_inputs(): |
|
return None, "", "" |
|
|
|
|
|
def prepare_inputs_image(image, question): |
|
conv_mode = "vicuna_v1" |
|
qs = question.replace(DEFAULT_IMAGE_TOKEN, "").strip() |
|
qs = DEFAULT_IMAGE_TOKEN + "\n" + qs |
|
|
|
conv = conv_templates[conv_mode].copy() |
|
conv.append_message(conv.roles[0], qs) |
|
conv.append_message(conv.roles[1], None) |
|
prompt = conv.get_prompt() |
|
input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) |
|
|
|
|
|
image_tensor = process_images([image], image_processor, model.config)[0] |
|
return input_ids, image_tensor |
|
|
|
|
|
def run_inference(image, message): |
|
""" |
|
Function to handle the chat input and generate model responses. |
|
""" |
|
if not message: |
|
return "" |
|
|
|
input_ids, image_tensor = prepare_inputs_image(image, message) |
|
|
|
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) |
|
generation_kwargs = { |
|
"streamer": streamer, |
|
"input_ids": input_ids, |
|
"images": image_tensor.unsqueeze(0).half(), |
|
"do_sample": False, |
|
"max_new_tokens": 512, |
|
"use_cache": True, |
|
} |
|
thread = Thread(target=model.generate, kwargs=generation_kwargs) |
|
thread.start() |
|
|
|
|
|
response = "" |
|
for new_text in streamer: |
|
response += new_text |
|
yield response |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# LLaVA-Med 1.5 OpenVINO Demo") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = gr.Image(type="pil", label="Upload an Image", height=300, width=500) |
|
with gr.Column(): |
|
text_input = gr.Textbox(label="Enter a Question", elem_classes="text", interactive=True) |
|
chatbot = gr.Textbox(label="Answer", elem_classes="text") |
|
|
|
with gr.Row(): |
|
process_button = gr.Button("Process") |
|
reset_button = gr.Button("Reset") |
|
|
|
gr.Markdown("NOTE: This OpenVINO model is unvalidated. Results are provisional and may contain errors. Use this demo to explore AI PC and OpenVINO optimizations") |
|
gr.Markdown("Source model: [microsoft/LLaVA-Med](https://github.com/microsoft/LLaVA-Med). For research purposes only.") |
|
|
|
process_button.click(run_inference, inputs=[image_input, text_input], outputs=chatbot) |
|
text_input.submit(run_inference, inputs=[image_input, text_input], outputs=chatbot) |
|
reset_button.click(reset_inputs, inputs=[], outputs=[image_input, text_input, chatbot]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(server_port=7788, server_name="0.0.0.0") |
|
|