microsoft
/

OmniParser-v2.0

endpoint-template

Model card Files Files and versions Community

alvarobartt HF Staff commited on Mar 27

Commit

1ebdf33

·

verified ·

1 Parent(s): acd5c25

Update handler.py

Files changed (1) hide show

handler.py +13 -13

handler.py CHANGED Viewed

@@ -51,15 +51,13 @@ class EndpointHandler:
         self.annotator = BoxAnnotator()
     def __call__(self, data: Dict[str, Any]) -> Any:
-        # inputs => {"inputs": data}
-        #   data => {
-        #     "image": url/base64,
-        #     "image_size": tuple(int, int) / list(int),
-        #     "bbox_threshold": float,
-        #     "iou_threshold": float,
-        #     [disabled] "use_paddleocr": bool,
-        #     "draw_bboxes": bool,
-        #   }
         data = data.pop("inputs")
         # read image from either url or base64 encoding
@@ -198,7 +196,7 @@ class EndpointHandler:
     def get_som_labeled_img(
         self,
         image: ImageType,
-        image_size: Optional[List[int]] = None,
         ocr_texts: Optional[List[str]] = None,
         ocr_bboxes: Optional[List[List[int]]] = None,
         bbox_threshold: float = 0.01,
@@ -211,11 +209,13 @@ class EndpointHandler:
         w, h = image.size
         if image_size is None:
-            image_size = [h, w]
         out = self.yolo.predict(
             image,
-            imgsz=image_size,
             conf=bbox_threshold,
             iou=iou_threshold or 0.7,
             verbose=False,
@@ -652,4 +652,4 @@ class BoxAnnotator:
             text_background_y1,
             text_background_x2,
             text_background_y2,
-        )

         self.annotator = BoxAnnotator()
     def __call__(self, data: Dict[str, Any]) -> Any:
+        # data should contain the following:
+        #  "inputs": {
+        #    "image": url/base64,
+        #    (optional) "image_size": tuple(int, int) / list(int),
+        #    (optional) "bbox_threshold": float,
+        #    (optional) "iou_threshold": float,
+        #  }
         data = data.pop("inputs")
         # read image from either url or base64 encoding
     def get_som_labeled_img(
         self,
         image: ImageType,
+        image_size: Optional[Dict[Literal["w", "h"], int]] = None,
         ocr_texts: Optional[List[str]] = None,
         ocr_bboxes: Optional[List[List[int]]] = None,
         bbox_threshold: float = 0.01,
         w, h = image.size
         if image_size is None:
+            imgsz = {"h": h, "w": w}
+        else:
+            imgsz = [image_size.get("h", h), image_size.get("w", w)]
         out = self.yolo.predict(
             image,
+            imgsz=imgsz,
             conf=bbox_threshold,
             iou=iou_threshold or 0.7,
             verbose=False,
             text_background_y1,
             text_background_x2,
             text_background_y2,
+        )