import gradio as gr import torch from PIL import Image import time from threading import Thread from transformers import ( Qwen2VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer, ) # Load model and processor - CPU version MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct" processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) model = Qwen2VLForConditionalGeneration.from_pretrained( MODEL_ID, trust_remote_code=True, torch_dtype=torch.float32 # Using float32 for CPU compatibility ).to("cpu").eval() def extract_medicines(image): """Extract medicine names from prescription images.""" if image is None: return "Please upload a prescription image." # Process the image text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions." messages = [{ "role": "user", "content": [ {"type": "image", "image": Image.open(image)}, {"type": "text", "text": text}, ], }] prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = processor( text=[prompt_full], images=[Image.open(image)], return_tensors="pt", padding=True, ).to("cpu") # Generate response with torch.no_grad(): output = model.generate(**inputs, max_new_tokens=512) # Decode and return response response = processor.decode(output[0], skip_special_tokens=True) # Clean up the response to get just the model's answer if "<|assistant|>" in response: response = response.split("<|assistant|>")[1].strip() return response # Create a simple Gradio interface demo = gr.Interface( fn=extract_medicines, inputs=gr.Image(type="filepath", label="Upload Prescription Image"), outputs=gr.Textbox(label="Extracted Medicine Names"), title="Medicine Name Extractor", description="Upload prescription images to extract medicine names", examples=[["examples/prescription1.jpg"]], # Update with your actual example paths or remove if not available cache_examples=True, ) if __name__ == "__main__": demo.launch(debug=True)