import numpy as np from PIL import Image import gradio as gr from ultralytics import YOLO import cv2 import os # Load the YOLO model m_raw_model = YOLO("M-Raw.pt") n_raw_model = YOLO("N-Raw.pt") s_raw_model = YOLO("S-Raw.pt") m_pre_model = YOLO("M-Pre.pt") n_pre_model = YOLO("N-Pre.pt") s_pre_model = YOLO("S-Pre.pt") # Class to syllable map class_mapping = {0: 'Baybayin Character', 1: 'a', 2: 'b', 3: 'ba', 4: 'be', 5: 'bi', 6: 'bo', 7: 'bu', 8: 'd', 9: 'da', 10: 'di', 11: 'do', 12: 'du', 13: 'e', 14: 'g', 15: 'ga', 16: 'gi', 17: 'go', 18: 'gu', 19: 'ha', 20: 'he', 21: 'hi', 22: 'ho', 23: 'hu', 24: 'i', 25: 'k', 26: 'ka', 27: 'ki', 28: 'ko', 29: 'ku', 30: 'l', 31: 'la', 32: 'le', 33: 'li', 34: 'lo', 35: 'lu', 36: 'm', 37: 'ma', 38: 'me', 39: 'mi', 40: 'mo', 41: 'mu', 42: 'n', 43: 'na', 44: 'ng', 45: 'nga', 46: 'ngi', 47: 'ngo', 48: 'ngu', 49: 'ni', 50: 'no', 51: 'nu', 52: 'o', 53: 'p', 54: 'pa', 55: 'pe', 56: 'pi', 57: 'po', 58: 'pu', 59: 'r', 60: 'ra', 61: 're', 62: 'ri', 63: 'ro', 64: 'ru', 65: 's', 66: 'sa', 67: 'se', 68: 'si', 69: 'so', 70: 'su', 71: 't', 72: 'ta', 73: 'te', 74: 'ti', 75: 'to', 76: 'tu', 77: 'u', 78: 'w', 79: 'wa', 80: 'we', 81: 'wi', 82: 'y', 83: 'ya', 84: 'yi', 85: 'yo', 86: 'yu'} #-------- # Some text #-------- pageTitle = "Baybayin Instance Detection" msgWarning = "This demo was created by Adriel Amoguis and Miguel Flores for PCSC 2023" desc = """
Take a picture of some Baybayin text and upload it in the Image Box on the left. You can choose the confidence threshold and the IoU threshold using the sliders.
""" def snap(upload, conf, iou, show_conf=False): # if webcam is not None: # image = webcam # elif upload is not None: # image = upload # else: # image = webcam image = upload # If no model selected, use M-Raw # if model == None: model = "M-Raw" # Run the selected model results = None if model == "M-Raw": results = m_raw_model(image, conf=conf, iou=iou) elif model == "N-Raw": results = n_raw_model(image, conf=conf, iou=iou) elif model == "S-Raw": results = s_raw_model(image, conf=conf, iou=iou) elif model == 'M-Pre': results = m_pre_model(image, conf=conf, iou=iou) elif model == 'N-Pre': results = n_pre_model(image, conf=conf, iou=iou) elif model == 'S-Pre': results = s_pre_model(image, conf=conf, iou=iou) # Convert the results list into an output image result = results[0] if result.boxes.cls.cpu().numpy().size == 0: print("No detections.") return image classes = result.boxes.cls.cpu().numpy() probs = result.boxes.conf.cpu().numpy() boxes = result.boxes.xyxy.cpu().numpy() # print(classes) # print(probs) # print(boxes) # print(f"Detected {classes} with {probs:.2f} confidence.") words = [] for i in range(len(boxes)): x1, y1, x2, y2 = boxes[i] x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), int(image.shape[1] * 0.001)) if show_conf: cv2.putText(image, f"{class_mapping[int(classes[i])]} {probs[i]:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), int(image.shape[1] * 0.001)) else: cv2.putText(image, f"{class_mapping[int(classes[i])]}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), int(image.shape[1] * 0.00075)) # words.append([(x1,y1), (x2,y2), class_mapping[int(classes[i])]]) # Sort the words by top to bottom, left to right # words = sorted(words, key=lambda x: (x[0][1], x[0][0])) # words = [word[2] for word in words] return image # Get the samples array # samples_dir = "sample_images" # samples = os.listdir(samples_dir) # samples = [os.path.join(samples_dir, sample) for sample in samples] demo = gr.Interface( snap, [ # gr.Webcam(type="numpy", label="Webcam"), gr.Image(source="upload", type="numpy", label="Baybayin Image"), # gr.Radio(["M-Raw", "S-Raw", "N-Raw", "M-Pre", "S-Pre", "N-Pre"], label="Model", value="M-Raw"), gr.Slider(0, 1, value=0.5, label="Classifier Confidence Threshold"), gr.Slider(0, 1, value=0.5, label="IoU Threshold")], # gr.Checkbox(label="Show Confidence Scores", value=False), [gr.Image(type="numpy", label="Detected Baybayin")], #gr.Textbox(lines=5, label="Detected Latin Syllabes")], title="Baybayin Instance Detection", article=msgWarning, description=desc ).queue() if __name__ == "__main__": demo.launch()