llava-med-imf16-llmint4 / llavamed_inference_openvino.py

Update llavamed_inference_openvino.py

b9655f6 verified about 2 months ago

2.88 kB

	#!/usr/bin/env python
	# coding: utf-8

	# # LLaVA-Med 1.5 OpenVINO demo


	import json
	import os
	import warnings

	warnings.filterwarnings("ignore")

	import numpy as np
	import torch
	from PIL import Image
	from transformers import logging

	from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
	from llava.conversation import conv_templates
	from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
	from llava.model.builder import load_pretrained_model

	logging.set_verbosity_error()
	image_folder = "data\\qa50_images"


	# ## Load Model and Data

	model_path = "llava-med-imf16-llmint4"
	# model_path = "llava-med-imint8-llmint4"
	model_name = get_model_name_from_path(model_path)
	tokenizer, model, image_processor, context_len = load_pretrained_model(
	model_path=model_path, model_base=None, model_name=model_name, device="gpu", openvino=True, image_device="gpu"
	)
	print("loaded models")

	# questions = []
	# with open("data/eval/llava_med_eval_qa50_qa.jsonl") as f:
	# for line in f:
	# questions.append(json.loads(line))


	# ## Functions

	def prepare_inputs_image(question, image):
	conv_mode = "vicuna_v1" # default
	qs = question.replace(DEFAULT_IMAGE_TOKEN, "").strip()
	qs = DEFAULT_IMAGE_TOKEN + "\n" + qs # model.config.mm_use_im_start_end is False

	conv = conv_templates[conv_mode].copy()
	conv.append_message(conv.roles[0], qs)
	conv.append_message(conv.roles[1], None)
	prompt = conv.get_prompt()
	input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0)

	# image = Image.open(image_file)
	image_tensor = process_images([image], image_processor, model.config)[0]
	return input_ids, image_tensor


	def run_inference_image(image, question):
	# image = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB)

	# cv2.imshow("Image", image)
	# question = input("Question:\n")
	#image = Image.open(image_file)
	input_ids, image_tensor = prepare_inputs_image(question, image)

	ov_output_ids = model.generate(
	input_ids,
	images=image_tensor.unsqueeze(0).half(),
	do_sample=False,
	# no_repeat_ngram_size=3,
	max_new_tokens=1024,
	use_cache=True,
	)

	input_length = input_ids.shape[-1]
	ov_output_ids = ov_output_ids[:, input_length:]
	answer = tokenizer.batch_decode(ov_output_ids, skip_special_tokens=True)[0].strip()
	# print(f"Answer: {answer}")
	return answer

	# suggested indices are indices where model output is similar to source model output
	# it may still be incorrect!
	# int8 image model: 2, 13, 14, 16, 17 (shorter) and 4, 5, 8 (longer)
	# f32 image model : 0, 2, 7, 9, 13, 14, 15, 16, 17, 18, 19 (shorter) and 3,5,6,8 (longer)

	if __name__ == "__main__":
	import sys
	image_file = sys.argv[1]
	run_inference_image(model, image_file)