helenai
/

llava-med-imf16-llmint4

Model card Files Files and versions Community

llava-med-imf16-llmint4 / appstream.py

helenai's picture

Upload appstream.py

cc22261 about 2 months ago

history blame contribute delete

3.64 kB

	from threading import Thread

	import gradio as gr
	import openvino as ov
	from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
	from llava.conversation import conv_templates
	from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
	from llava.model.builder import load_pretrained_model
	from transformers import TextIteratorStreamer

	css = """
	.text textarea {font-size: 24px !important;}
	.text p {font-size: 24px !important;}
	"""

	model_path = "llava-med-imf16-llmint4"
	# model_path = "llava-med-imint8-llmint4"
	model_name = get_model_name_from_path(model_path)

	device = "GPU" if "GPU" in ov.Core().available_devices else "CPU"
	image_device = "NPU" if "NPU" in ov.Core().available_devices else device
	tokenizer, model, image_processor, context_len = load_pretrained_model(
	model_path=model_path,
	model_base=None,
	model_name=model_name,
	device=device,
	openvino=True,
	image_device=image_device,
	)
	print("models loaded")


	def reset_inputs():
	return None, "", ""


	def prepare_inputs_image(image, question):
	conv_mode = "vicuna_v1" # default
	qs = question.replace(DEFAULT_IMAGE_TOKEN, "").strip()
	qs = DEFAULT_IMAGE_TOKEN + "\n" + qs # model.config.mm_use_im_start_end is False

	conv = conv_templates[conv_mode].copy()
	conv.append_message(conv.roles[0], qs)
	conv.append_message(conv.roles[1], None)
	prompt = conv.get_prompt()
	input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0)

	# image = Image.open(image_file)
	image_tensor = process_images([image], image_processor, model.config)[0]
	return input_ids, image_tensor


	def run_inference(image, message):
	"""
	Function to handle the chat input and generate model responses.
	"""
	if not message:
	return ""

	input_ids, image_tensor = prepare_inputs_image(image, message)

	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = {
	"streamer": streamer,
	"input_ids": input_ids,
	"images": image_tensor.unsqueeze(0).half(),
	"do_sample": False,
	"max_new_tokens": 512,
	"use_cache": True,
	}
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Stream output
	response = ""
	for new_text in streamer:
	response += new_text
	yield response


	with gr.Blocks(css=css) as demo:
	gr.Markdown("# LLaVA-Med 1.5 OpenVINO Demo")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload an Image", height=300, width=500)
	with gr.Column():
	text_input = gr.Textbox(label="Enter a Question", elem_classes="text", interactive=True)
	chatbot = gr.Textbox(label="Answer", elem_classes="text")

	with gr.Row():
	process_button = gr.Button("Process")
	reset_button = gr.Button("Reset")

	gr.Markdown("NOTE: This OpenVINO model is unvalidated. Results are provisional and may contain errors. Use this demo to explore AI PC and OpenVINO optimizations")
	gr.Markdown("Source model: [microsoft/LLaVA-Med](https://github.com/microsoft/LLaVA-Med). For research purposes only.")

	process_button.click(run_inference, inputs=[image_input, text_input], outputs=chatbot)
	text_input.submit(run_inference, inputs=[image_input, text_input], outputs=chatbot)
	reset_button.click(reset_inputs, inputs=[], outputs=[image_input, text_input, chatbot])

	if __name__ == "__main__":
	demo.launch(server_port=7788, server_name="0.0.0.0")