diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..b934f0343b1be0b6b9a4651d20819af4b34a35ca 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,24 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +dist/examples/1.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/10.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/11.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/12.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/13.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/14.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/15.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/16.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/17.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/18.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/19.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/2.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/20.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/21.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/3.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/4.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/5.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/6.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/7.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/8.jpg filter=lfs diff=lfs merge=lfs -text +dist/examples/9.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index e3f409513e4689967ea2f57e8dd264aa38641f1d..c14937bb0676761fbc3e159920992bcda9c845d1 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ --- title: Describe Anything -emoji: 🌖 -colorFrom: indigo -colorTo: pink +emoji: ⚡ +colorFrom: yellow +colorTo: purple sdk: gradio -sdk_version: 5.25.2 +sdk_version: 5.7.1 app_file: app.py pinned: false --- diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..b2ae9fe7a663454700eba465993b6236ff9f4e46 --- /dev/null +++ b/app.py @@ -0,0 +1,164 @@ +import os +os.environ["GRADIO_SSR_MODE"] = "false" + +if not os.path.exists("checkpoints"): + os.makedirs("checkpoints") + os.system("pip install gdown") + os.system("gdown https://drive.google.com/uc?id=1eQe6blJcyI7oy78C8ozwj1IUkbkFEItf; unzip -o dam_3b_v1.zip -d checkpoints") + +from segment_anything import sam_model_registry, SamPredictor +import gradio as gr +import numpy as np +import cv2 +import base64 +import torch +from PIL import Image +import io +import argparse +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from transformers import SamModel, SamProcessor +from dam import DescribeAnythingModel, disable_torch_init +try: + from spaces import GPU +except ImportError: + print("Spaces not installed, using dummy GPU decorator") + GPU = lambda fn: fn + +# Load SAM model +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) +sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + +@GPU(duration=75) +def image_to_sam_embedding(base64_image): + try: + # Decode base64 string to bytes + image_bytes = base64.b64decode(base64_image) + + # Convert bytes to PIL Image + image = Image.open(io.BytesIO(image_bytes)) + + # Process image with SAM processor + inputs = sam_processor(image, return_tensors="pt").to(device) + + # Get image embedding + with torch.no_grad(): + image_embedding = sam_model.get_image_embeddings(inputs["pixel_values"]) + + # Convert to CPU and numpy + image_embedding = image_embedding.cpu().numpy() + + # Encode the embedding as base64 + embedding_bytes = image_embedding.tobytes() + embedding_base64 = base64.b64encode(embedding_bytes).decode('utf-8') + + return embedding_base64 + except Exception as e: + print(f"Error processing image: {str(e)}") + raise gr.Error(f"Failed to process image: {str(e)}") + +@GPU(duration=75) +def describe(image_base64: str, mask_base64: str, query: str): + # Convert base64 to PIL Image + image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64) + img = Image.open(io.BytesIO(image_bytes)) + mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64) + mask = Image.open(io.BytesIO(mask_bytes)) + + # Process the mask + mask = Image.fromarray((np.array(mask.convert('L')) > 0).astype(np.uint8) * 255) + + # Get description using DAM with streaming + description_generator = dam.get_description(img, mask, query, streaming=True) + + # Stream the tokens + text = "" + for token in description_generator: + text += token + yield text + +@GPU(duration=75) +def describe_without_streaming(image_base64: str, mask_base64: str, query: str): + # Convert base64 to PIL Image + image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64) + img = Image.open(io.BytesIO(image_bytes)) + mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64) + mask = Image.open(io.BytesIO(mask_bytes)) + + # Process the mask + mask = Image.fromarray((np.array(mask.convert('L')) > 0).astype(np.uint8) * 255) + + # Get description using DAM + description = dam.get_description(img, mask, query) + + return description + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Describe Anything gradio demo") + parser.add_argument("--model-path", type=str, default="checkpoints/dam_3b_v1", help="Path to the model checkpoint") + parser.add_argument("--prompt-mode", type=str, default="full+focal_crop", help="Prompt mode") + parser.add_argument("--conv-mode", type=str, default="v1", help="Conversation mode") + parser.add_argument("--temperature", type=float, default=0.2, help="Sampling temperature") + parser.add_argument("--top_p", type=float, default=0.5, help="Top-p for sampling") + + args = parser.parse_args() + + # Initialize DAM model + disable_torch_init() + dam = DescribeAnythingModel( + model_path=args.model_path, + conv_mode=args.conv_mode, + prompt_mode=args.prompt_mode, + temperature=args.temperature, + top_p=args.top_p, + num_beams=1, + max_new_tokens=512, + ).to(device) + + # Create Gradio interface + with gr.Blocks() as demo: + gr.Interface( + fn=image_to_sam_embedding, + inputs=gr.Textbox(label="Image Base64"), + outputs=gr.Textbox(label="Embedding Base64"), + title="Image Embedding Generator", + api_name="image_to_sam_embedding" + ) + gr.Interface( + fn=describe, + inputs=[ + gr.Textbox(label="Image Base64"), + gr.Text(label="Mask Base64"), + gr.Text(label="Prompt") + ], + outputs=[ + gr.Text(label="Description") + ], + title="Mask Description Generator", + api_name="describe" + ) + gr.Interface( + fn=describe_without_streaming, + inputs=[ + gr.Textbox(label="Image Base64"), + gr.Text(label="Mask Base64"), + gr.Text(label="Prompt") + ], + outputs=[ + gr.Text(label="Description") + ], + title="Mask Description Generator (Non-Streaming)", + api_name="describe_without_streaming" + ) + + demo._block_thread = demo.block_thread + demo.block_thread = lambda: None + demo.launch() + + for route in demo.app.routes: + if route.path == "/": + demo.app.routes.remove(route) + demo.app.mount("/", StaticFiles(directory="dist", html=True), name="demo") + + demo._block_thread() diff --git a/dam/.DS_Store b/dam/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..baf2233d43329a4e0071291e621cd28d42bc8c5f Binary files /dev/null and b/dam/.DS_Store differ diff --git a/dam/__init__.py b/dam/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3a26f76f70c7d6757242a145cc6be9b346afb937 --- /dev/null +++ b/dam/__init__.py @@ -0,0 +1,2 @@ +from .describe_anything_model import * +from .model import * diff --git a/dam/__pycache__/__init__.cpython-310.pyc b/dam/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..943315dd316501db4d9d992da01d8c124a8a243d Binary files /dev/null and b/dam/__pycache__/__init__.cpython-310.pyc differ diff --git a/dam/__pycache__/describe_anything_model.cpython-310.pyc b/dam/__pycache__/describe_anything_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0a24a19dcdc956e9e840ea6c7f81063215bd2a6 Binary files /dev/null and b/dam/__pycache__/describe_anything_model.cpython-310.pyc differ diff --git a/dam/describe_anything_model.py b/dam/describe_anything_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b86283c1a0c3ec07dfc3b97504715324c718b0a1 --- /dev/null +++ b/dam/describe_anything_model.py @@ -0,0 +1,212 @@ +import torch +import torch.nn as nn +import numpy as np +from PIL import Image +from .model.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX +from .model.conversation import SeparatorStyle, conv_templates +from .model.mm_utils import KeywordsStoppingCriteria, process_image, tokenizer_image_token +from .model import get_model_name_from_path, load_pretrained_model +from transformers import TextIteratorStreamer +from threading import Thread + +class DescribeAnythingModel(nn.Module): + def __init__(self, model_path, conv_mode, prompt_mode, temperature, top_p, num_beams, max_new_tokens, **kwargs): + super().__init__() + + self.model_path = model_path + self.conv_mode = conv_mode + self.prompt_mode = prompt_mode + self.temperature = temperature + self.top_p = top_p + self.num_beams = num_beams + self.max_new_tokens = max_new_tokens + + tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, None, **kwargs) + model.config.image_processor = image_processor + + self.tokenizer = tokenizer + self.model = model + self.context_len = context_len + + self.model_name = get_model_name_from_path(model_path) + + def get_prompt(self, qs): + if DEFAULT_IMAGE_TOKEN not in qs: + raise ValueError("no tag found in input.") + + conv = conv_templates[self.conv_mode].copy() + conv.append_message(conv.roles[0], qs) + conv.append_message(conv.roles[1], None) + prompt = conv.get_prompt() + + return prompt, conv + + @staticmethod + def mask_to_box(mask_np): + mask_coords = np.argwhere(mask_np) + y0, x0 = mask_coords.min(axis=0) + y1, x1 = mask_coords.max(axis=0) + 1 + + h = y1 - y0 + w = x1 - x0 + + return x0, y0, w, h + + @classmethod + def crop_image(cls, pil_img, mask_np, crop_mode, min_box_w=48, min_box_h=48): + if crop_mode == "full": + # no crop + info = dict(mask_np=mask_np) + return pil_img, info + + if crop_mode == "crop": + # crop image and mask + x0, y0, w, h = cls.mask_to_box(mask_np) + img_np = np.asarray(pil_img) + assert img_np.shape[:2] == mask_np.shape, f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + cropped_mask_np = mask_np[y0:y0+h, x0:x0+w] + cropped_img_np = img_np[y0:y0+h, x0:x0+w] + cropped_pil_img = Image.fromarray(cropped_img_np) + elif crop_mode == "context_crop": + # crop image and mask + x0, y0, w, h = cls.mask_to_box(mask_np) + img_np = np.asarray(pil_img) + assert img_np.shape[:2] == mask_np.shape, f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + img_h, img_w = img_np.shape[:2] + cropped_mask_np = mask_np[max(y0-h, 0):min(y0+2*h, img_h), max(x0-w, 0):min(x0+2*w, img_w)] + cropped_img_np = img_np[max(y0-h, 0):min(y0+2*h, img_h), max(x0-w, 0):min(x0+2*w, img_w)] + cropped_pil_img = Image.fromarray(cropped_img_np) + elif crop_mode == "focal_crop": + # crop image and mask + x0, y0, w, h = cls.mask_to_box(mask_np) + img_np = np.asarray(pil_img) + assert img_np.shape[:2] == mask_np.shape, f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + img_h, img_w = img_np.shape[:2] + + xc, yc = x0 + w/2, y0 + h/2 + # focal_crop: need to have at least min_box_w and min_box_h pixels, otherwise resizing to (384, 384) leads to artifacts that may be OOD + w, h = max(w, min_box_w), max(h, min_box_h) + x0, y0 = int(xc - w / 2), int(yc - h / 2) + + cropped_mask_np = mask_np[max(y0-h, 0):min(y0+2*h, img_h), max(x0-w, 0):min(x0+2*w, img_w)] + cropped_img_np = img_np[max(y0-h, 0):min(y0+2*h, img_h), max(x0-w, 0):min(x0+2*w, img_w)] + cropped_pil_img = Image.fromarray(cropped_img_np) + elif crop_mode == "crop_mask": + # crop image and mask + x0, y0, w, h = cls.mask_to_box(mask_np) + img_np = np.asarray(pil_img) + assert img_np.shape[:2] == mask_np.shape, f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + cropped_mask_np = mask_np[y0:y0+h, x0:x0+w] + cropped_img_np = img_np[y0:y0+h, x0:x0+w] + # Mask the image + cropped_img_np = cropped_img_np * cropped_mask_np[..., None] + cropped_pil_img = Image.fromarray(cropped_img_np) + else: + raise ValueError(f"Unsupported crop_mode: {crop_mode}") + + info = dict(mask_np=cropped_mask_np) + return cropped_pil_img, info + + def get_description(self, image_pil, mask_pil, query, streaming=False): + prompt, conv = self.get_prompt(query) + if not isinstance(image_pil, (list, tuple)): + assert not isinstance(mask_pil, (list, tuple)), "image_pil and mask_pil must be both list or tuple or not list or tuple." + image_pils = [image_pil] + mask_pils = [mask_pil] + else: + image_pils = image_pil + mask_pils = mask_pil + description = self.get_description_from_prompt(image_pils, mask_pils, prompt, conv, streaming=streaming) + + return description + + def get_image_tensor(self, image_pil, mask_pil, crop_mode, crop_mode2): + # the pil has True/False (if the value is non-zero, then we treat it as True) + mask_np = (np.asarray(mask_pil) > 0).astype(np.uint8) + images_tensor, image_info = process_image(image_pil, self.model.config, None, pil_preprocess_fn=lambda pil_img: self.crop_image(image_pil, mask_np=mask_np, crop_mode=crop_mode)) + images_tensor = images_tensor[None].to(self.model.device, dtype=torch.float16) + + mask_np = image_info["mask_np"] + mask_pil = Image.fromarray(mask_np * 255) + + masks_tensor = process_image(mask_pil, self.model.config, None) + masks_tensor = masks_tensor[None].to(self.model.device, dtype=torch.float16) + + images_tensor = torch.cat((images_tensor, masks_tensor[:, :1, ...]), dim=1) + + if crop_mode2 is not None: + images_tensor2, image_info2 = process_image(image_pil, self.model.config, None, pil_preprocess_fn=lambda pil_img: self.crop_image(pil_img, mask_np=mask_np, crop_mode=crop_mode2)) + images_tensor2 = images_tensor2[None].to(self.model.device, dtype=torch.float16) + + mask_np2 = image_info2["mask_np"] + mask_pil2 = Image.fromarray(mask_np2 * 255) + + masks_tensor2 = process_image(mask_pil2, self.model.config, None) + masks_tensor2 = masks_tensor2[None].to(self.model.device, dtype=torch.float16) + + images_tensor2 = torch.cat((images_tensor2, masks_tensor2[:, :1, ...]), dim=1) + else: + images_tensor2 = None + + return torch.cat((images_tensor, images_tensor2), dim=1) if images_tensor2 is not None else images_tensor + + def get_description_from_prompt(self, image_pils, mask_pils, prompt, conv, streaming=False): + if streaming: + return self.get_description_from_prompt_iterator(image_pils, mask_pils, prompt, conv, streaming=True) + else: + # If streaming is False, there will be only one output + output = self.get_description_from_prompt_iterator(image_pils, mask_pils, prompt, conv, streaming=False) + return next(output) + + def get_description_from_prompt_iterator(self, image_pils, mask_pils, prompt, conv, streaming=False): + crop_mode, crop_mode2 = self.prompt_mode.split("+") + assert crop_mode == "full", "Current prompt only supports first crop as full (non-cropped). If you need other specifications, please update the prompt." + + assert len(image_pils) == len(mask_pils), f"image_pils and mask_pils must have the same length. Got {len(image_pils)} and {len(mask_pils)}." + image_tensors = [self.get_image_tensor(image_pil, mask_pil, crop_mode=crop_mode, crop_mode2=crop_mode2) for image_pil, mask_pil in zip(image_pils, mask_pils)] + + input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).cuda() + + stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 + keywords = [stop_str] + stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids) + + streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) if streaming else None + generation_kwargs = dict( + input_ids=input_ids, + images=image_tensors, + do_sample=True if self.temperature > 0 else False, + temperature=self.temperature, + top_p=self.top_p, + num_beams=self.num_beams, + max_new_tokens=self.max_new_tokens, + use_cache=True, + stopping_criteria=[stopping_criteria], + streamer=streamer + ) + + + if streaming: + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + + generated_text = "" + for new_text in streamer: + generated_text += new_text + if stop_str in generated_text: + generated_text = generated_text[:generated_text.find(stop_str)] + break + yield new_text + + thread.join() + else: + with torch.inference_mode(): + output_ids = self.model.generate(**generation_kwargs) + + outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0] + outputs = outputs.strip() + if outputs.endswith(stop_str): + outputs = outputs[: -len(stop_str)] + outputs = outputs.strip() + + yield outputs diff --git a/dam/model/__init__.py b/dam/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cac328dc4741398c64655b72aa81c4444e8c3b3 --- /dev/null +++ b/dam/model/__init__.py @@ -0,0 +1,4 @@ +from .constants import * +from .conversation import * +from .mm_utils import * +from .model_utils import * diff --git a/dam/model/__pycache__/__init__.cpython-310.pyc b/dam/model/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bda5bd309efbf734b99f8c0a26c2d73b80b67d8d Binary files /dev/null and b/dam/model/__pycache__/__init__.cpython-310.pyc differ diff --git a/dam/model/__pycache__/configuration_llava.cpython-310.pyc b/dam/model/__pycache__/configuration_llava.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..345fb598df2690707afde922ae27b2e2a374b3ac Binary files /dev/null and b/dam/model/__pycache__/configuration_llava.cpython-310.pyc differ diff --git a/dam/model/__pycache__/constants.cpython-310.pyc b/dam/model/__pycache__/constants.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ffb509b5a0ebf1c5b8b5de5e3706a0d99d87b90 Binary files /dev/null and b/dam/model/__pycache__/constants.cpython-310.pyc differ diff --git a/dam/model/__pycache__/conversation.cpython-310.pyc b/dam/model/__pycache__/conversation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5b33c6d748d8f83dd771d8e09e30d62b81b722f Binary files /dev/null and b/dam/model/__pycache__/conversation.cpython-310.pyc differ diff --git a/dam/model/__pycache__/llava_arch.cpython-310.pyc b/dam/model/__pycache__/llava_arch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ff6b5572d8d2b03d9c0e41d92aed07b0f3098e2 Binary files /dev/null and b/dam/model/__pycache__/llava_arch.cpython-310.pyc differ diff --git a/dam/model/__pycache__/mm_utils.cpython-310.pyc b/dam/model/__pycache__/mm_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..872c7a6f812ad6525fbeac5494c5087e787c270e Binary files /dev/null and b/dam/model/__pycache__/mm_utils.cpython-310.pyc differ diff --git a/dam/model/__pycache__/model_utils.cpython-310.pyc b/dam/model/__pycache__/model_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c77209ada4171baa7724afc1c0ebc4652cb62bd Binary files /dev/null and b/dam/model/__pycache__/model_utils.cpython-310.pyc differ diff --git a/dam/model/__pycache__/utils.cpython-310.pyc b/dam/model/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c73054b1f549952f053567e88ea00ceb3b175289 Binary files /dev/null and b/dam/model/__pycache__/utils.cpython-310.pyc differ diff --git a/dam/model/builder_ignored.py b/dam/model/builder_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..c8b7110f70a7b8fa2f8ef418492cbec184bf58cf --- /dev/null +++ b/dam/model/builder_ignored.py @@ -0,0 +1,260 @@ +# This file is modified from https://github.com/haotian-liu/LLaVA/ +# Copyright 2023 Haotian Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import warnings +import shutil + +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + AutoConfig, + BitsAndBytesConfig, + PretrainedConfig, + PreTrainedModel, +) +import torch +from llava.model import * +from llava.model.utils import is_mm_model +from llava.model.language_model.llava_llama import LlavaConfig +from llava.constants import ( + DEFAULT_IMAGE_PATCH_TOKEN, + DEFAULT_IM_START_TOKEN, + DEFAULT_IM_END_TOKEN, +) + +def load_pretrained_model( + model_path, + model_name, + model_base=None, + load_8bit=False, + load_4bit=False, + device_map="auto", + device="cuda", + **kwargs, +): + kwargs = {"device_map": device_map, **kwargs} + + if device != "cuda": + kwargs["device_map"] = {"": device} + + if load_8bit: + kwargs["load_in_8bit"] = True + elif load_4bit: + kwargs["load_in_4bit"] = True + kwargs["quantization_config"] = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + ) + else: + kwargs["torch_dtype"] = torch.float16 + + if is_mm_model(model_path): + # Load LLaVA model + ## TODO @yunhao: mind fixing lora + if "lora" in model_name.lower() and model_base is None: + warnings.warn( + "There is `lora` in model name but no `model_base` is provided. If you are loading a LoRA model, please provide the `model_base` argument. Detailed instruction: https://github.com/haotian-liu/LLaVA#launch-a-model-worker-lora-weights-unmerged." + ) + if "lora" in model_name.lower() and model_base is not None: + lora_cfg_pretrained = AutoConfig.from_pretrained(model_path) + tokenizer = AutoTokenizer.from_pretrained( + model_base, use_fast=False, legacy=False + ) + print("Loading LLaVA from base model...") + model = LlavaLlamaForCausalLM.from_pretrained( + model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, **kwargs + ) + token_num, tokem_dim = model.lm_head.out_features, model.lm_head.in_features + if model.lm_head.weight.shape[0] != token_num: + model.lm_head.weight = torch.nn.Parameter( + torch.empty( + token_num, tokem_dim, device=model.device, dtype=model.dtype + ) + ) + model.model.embed_tokens.weight = torch.nn.Parameter( + torch.empty( + token_num, tokem_dim, device=model.device, dtype=model.dtype + ) + ) + + print("Loading additional LLaVA weights...") + if os.path.exists(os.path.join(model_path, "non_lora_trainables.bin")): + non_lora_trainables = torch.load( + os.path.join(model_path, "non_lora_trainables.bin"), + map_location="cpu", + ) + else: + # this is probably from HF Hub + from huggingface_hub import hf_hub_download + + def load_from_hf(repo_id, filename, subfolder=None): + cache_file = hf_hub_download( + repo_id=repo_id, filename=filename, subfolder=subfolder + ) + return torch.load(cache_file, map_location="cpu") + + non_lora_trainables = load_from_hf( + model_path, "non_lora_trainables.bin" + ) + non_lora_trainables = { + (k[11:] if k.startswith("base_model.") else k): v + for k, v in non_lora_trainables.items() + } + if any(k.startswith("model.model.") for k in non_lora_trainables): + non_lora_trainables = { + (k[6:] if k.startswith("model.") else k): v + for k, v in non_lora_trainables.items() + } + model.load_state_dict(non_lora_trainables, strict=False) + + from peft import PeftModel + + print("Loading LoRA weights...") + model = PeftModel.from_pretrained(model, model_path) + print("Merging LoRA weights...") + model = model.merge_and_unload() + print("Model is loaded...") + ## TODO @yunhao: mind fixing this + elif model_base is not None: + # this may be mm projector only + print("Loading LLaVA from base model...") + cfg_pretrained = AutoConfig.from_pretrained( + model_path, trust_remote_code=True + ) + mm_config_wrapper(config, kwargs) + if "mpt" in model_name.lower(): + if not os.path.isfile(os.path.join(model_path, "configuration_mpt.py")): + shutil.copyfile( + os.path.join(model_base, "configuration_mpt.py"), + os.path.join(model_path, "configuration_mpt.py"), + ) + tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=True) + model = LlavaMPTForCausalLM.from_pretrained( + model_base, low_cpu_mem_usage=True, config=cfg_pretrained, **kwargs + ) + else: + tokenizer = AutoTokenizer.from_pretrained( + model_base, use_fast=False, legacy=False + ) + model = LlavaLlamaForCausalLM.from_pretrained( + model_base, low_cpu_mem_usage=True, config=cfg_pretrained, **kwargs + ) + else: + config = AutoConfig.from_pretrained(model_path) + config.resume_path = model_path + prepare_config_for_eval(config, kwargs) + if "mpt" in model_name.lower(): + model = LlavaMPTForCausalLM.from_pretrained( + model_path, config=config, low_cpu_mem_usage=True, **kwargs + ) + elif "mistral" in model_name.lower() or "mixtral" in model_name.lower(): + model = LlavaMistralForCausalLM.from_pretrained( + model_path, config=config, low_cpu_mem_usage=True, **kwargs + ) + elif "gemma" in model_name.lower(): + model = LlavaGemmaForCausalLM.from_pretrained( + model_path, config=config, low_cpu_mem_usage=True, **kwargs + ) + else: + # kentang-mit@: llama-2 model + # config._attn_implementation = "flash_attention_2" + model = LlavaLlamaModel( + config=config, + low_cpu_mem_usage=True, + **kwargs + ) + tokenizer = model.tokenizer + else: + # Load language model + if model_base is not None: + # PEFT model + from peft import PeftModel + + tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False) + model = AutoModelForCausalLM.from_pretrained( + model_base, low_cpu_mem_usage=True, **kwargs + ) + print(f"Loading LoRA weights from {model_path}") + model = PeftModel.from_pretrained(model, model_path) + print(f"Merging weights") + model = model.merge_and_unload() + print("Convert to FP16...") + model.to(torch.float16) + else: + if "mpt" in model_name.lower(): + tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) + model = AutoModelForCausalLM.from_pretrained( + model_path, low_cpu_mem_usage=True, trust_remote_code=True, **kwargs + ) + else: + tokenizer = AutoTokenizer.from_pretrained( + model_path, use_fast=False, legacy=False + ) + model = AutoModelForCausalLM.from_pretrained( + model_path, low_cpu_mem_usage=True, **kwargs + ) + model.eval() + image_processor = None + if is_mm_model(model_path): + mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False) + mm_use_im_patch_token = getattr(model.config, "mm_use_im_patch_token", True) + if mm_use_im_patch_token: + tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) + if mm_use_im_start_end: + tokenizer.add_tokens( + [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True + ) + model.resize_token_embeddings(len(tokenizer)) + vision_tower = model.get_vision_tower() + vision_tower.to(device=device, dtype=torch.float16) + mm_projector = model.get_mm_projector() + mm_projector.to(device=device, dtype=torch.float16) + image_processor = vision_tower.image_processor + + if hasattr(model.llm.config, "max_sequence_length"): + context_len = model.config.max_sequence_length + else: + context_len = 2048 + + return tokenizer, model, image_processor, context_len + +def parse_model_name_or_path(config: PretrainedConfig, model_name="llm", suffix="_cfg"): + target_model = f"{model_name}{suffix}" + target_cfg = getattr(config, target_model, None) + + if isinstance(target_cfg, str): + return target_cfg + elif isinstance(target_cfg, dict): + return target_cfg["architectures"][0] + else: + raise ValueError(f"Invalid {target_model} configuration!") + +def prepare_config_for_eval(config: PretrainedConfig, kwargs: dict): + try: + # compatible with deprecated config convention + if getattr(config, "vision_tower_cfg", None) is None: + config.vision_tower_cfg = config.mm_vision_tower + except AttributeError: + raise ValueError(f"Invalid configuration! Cannot find vision_tower in config:\n{config}") + + config.model_dtype = kwargs.pop("torch_dtype").__str__() + # siglip does not support device_map = "auto" + vision_tower_name = parse_model_name_or_path(config, "vision_tower") + if "siglip" in vision_tower_name.lower(): + kwargs["device_map"] = "cuda" \ No newline at end of file diff --git a/dam/model/configuration_llava.py b/dam/model/configuration_llava.py new file mode 100644 index 0000000000000000000000000000000000000000..4b1174161f52d5876d0f7a5405a8b2323d11a90e --- /dev/null +++ b/dam/model/configuration_llava.py @@ -0,0 +1,55 @@ +from transformers import PretrainedConfig + + +class LlavaConfig(PretrainedConfig): + model_type = "llava" + + def __init__( + self, + llm_cfg=None, + vision_tower_cfg=None, + mm_projector_cfg=None, + mask_encoder_cfg=None, + context_provider_cfg=None, + architectures=None, + resume_path=None, + hidden_size=None, + mm_hidden_size=None, + image_aspect_ratio=None, + num_video_frames=None, + mm_vision_select_layer=None, + mm_vision_select_feature=None, + mm_use_im_start_end=False, + mm_use_im_patch_token=True, + mm_projector_lr=None, + vision_resolution=None, + interpolate_mode=None, + s2=None, + s2_scales=None, + s2_max_split_size=None, + **kwargs + ): + super().__init__() + self.architectures = architectures + self.llm_cfg = llm_cfg + self.vision_tower_cfg = vision_tower_cfg + self.mm_projector_cfg = mm_projector_cfg + self.mask_encoder_cfg = mask_encoder_cfg + self.context_provider_cfg = context_provider_cfg + self.resume_path = resume_path + + self.hidden_size = hidden_size + self.mm_hidden_size = mm_hidden_size + self.image_aspect_ratio = image_aspect_ratio + self.num_video_frames = num_video_frames + self.mm_vision_select_layer = mm_vision_select_layer + self.mm_vision_select_feature = mm_vision_select_feature + self.mm_use_im_start_end = mm_use_im_start_end + self.mm_use_im_start_end = mm_use_im_start_end + self.mm_use_im_patch_token = mm_use_im_patch_token + self.mm_projector_lr = mm_projector_lr + self.vision_resolution = vision_resolution + self.interpolate_mode = interpolate_mode + self.s2 = s2 + self.s2_scales = s2_scales + self.s2_max_split_size = s2_max_split_size diff --git a/dam/model/consolidate.py b/dam/model/consolidate.py new file mode 100644 index 0000000000000000000000000000000000000000..1e324210e229eeba23b75791bba82df7c6e639eb --- /dev/null +++ b/dam/model/consolidate.py @@ -0,0 +1,29 @@ +""" +Usage: +python3 -m llava.model.consolidate --src ~/model_weights/llava-7b --dst ~/model_weights/llava-7b_consolidate +""" +import argparse + +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM +from llava.model import * +from llava.model.utils import auto_upgrade + + +def consolidate_ckpt(src_path, dst_path): + print("Loading model") + auto_upgrade(src_path) + src_model = AutoModelForCausalLM.from_pretrained(src_path, torch_dtype=torch.float16, low_cpu_mem_usage=True) + src_tokenizer = AutoTokenizer.from_pretrained(src_path, use_fast=False) + src_model.save_pretrained(dst_path) + src_tokenizer.save_pretrained(dst_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str, required=True) + parser.add_argument("--dst", type=str, required=True) + + args = parser.parse_args() + + consolidate_ckpt(args.src, args.dst) diff --git a/dam/model/constants.py b/dam/model/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..4b047f70145a87e677d58c9732a5fb24c90a801c --- /dev/null +++ b/dam/model/constants.py @@ -0,0 +1,32 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# This file is modified from https://github.com/haotian-liu/LLaVA/ + + +CONTROLLER_HEART_BEAT_EXPIRATION = 30 +WORKER_HEART_BEAT_INTERVAL = 15 + +LOGDIR = "." + +# Model Constants +IGNORE_INDEX = -100 +IMAGE_TOKEN_INDEX = -200 +MASK_TOKEN_INDEX = -300 +DEFAULT_IMAGE_TOKEN = "" +DEFAULT_IMAGE_PATCH_TOKEN = "" +DEFAULT_IM_START_TOKEN = "" +DEFAULT_IM_END_TOKEN = "" +IMAGE_PLACEHOLDER = "" diff --git a/dam/model/conversation.py b/dam/model/conversation.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c708647cb2339ff97553845d9dec085f396267 --- /dev/null +++ b/dam/model/conversation.py @@ -0,0 +1,474 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# This file is modified from https://github.com/haotian-liu/LLaVA/ + + +import dataclasses +from enum import auto, Enum +from typing import List, Tuple + + +class SeparatorStyle(Enum): + """Different separator style.""" + SINGLE = auto() + TWO = auto() + MPT = auto() + PLAIN = auto() + LLAMA_2 = auto() + MISTRAL = auto() + LLAMA_3 = auto() + + +@dataclasses.dataclass +class Conversation: + """A class that keeps all conversation history.""" + system: str + roles: List[str] + messages: List[List[str]] + offset: int + sep_style: SeparatorStyle = SeparatorStyle.SINGLE + sep: str = "###" + sep2: str = None + version: str = "Unknown" + + skip_next: bool = False + + def get_prompt(self): + messages = self.messages + if len(messages) > 0 and type(messages[0][1]) is tuple: + messages = self.messages.copy() + init_role, init_msg = messages[0].copy() + init_msg = init_msg[0].replace("", "").strip() + if 'mmtag' in self.version: + messages[0] = (init_role, init_msg) + messages.insert(0, (self.roles[0], "")) + messages.insert(1, (self.roles[1], "Received.")) + else: + messages[0] = (init_role, "\n" + init_msg) + + if self.sep_style == SeparatorStyle.SINGLE: + ret = self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + self.sep + else: + ret += role + ":" + elif self.sep_style == SeparatorStyle.TWO: + seps = [self.sep, self.sep2] + ret = self.system + seps[0] + for i, (role, message) in enumerate(messages): + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + elif self.sep_style == SeparatorStyle.LLAMA_3: + ret = self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message = message[0] + ret += role + message + self.sep + else: + ret += role + elif self.sep_style == SeparatorStyle.MPT: + ret = self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + message + self.sep + else: + ret += role + elif self.sep_style == SeparatorStyle.LLAMA_2 or self.sep_style == SeparatorStyle.MISTRAL: + if self.sep_style == SeparatorStyle.LLAMA_2: + wrap_sys = lambda msg: f"<>\n{msg}\n<>\n\n" + else: + wrap_sys = lambda msg: f"{msg}" + ("\n" if msg else "") + wrap_inst = lambda msg: f"[INST] {msg} [/INST]" + ret = "" + if self.sep_style == SeparatorStyle.MISTRAL: + ret += "" + + for i, (role, message) in enumerate(messages): + if i == 0: + assert message, "first message should not be none" + assert role == self.roles[0], "first message should come from user" + if message: + if type(message) is tuple: + message, _, _ = message + if i == 0: message = wrap_sys(self.system) + message + if i % 2 == 0: + message = wrap_inst(message) + ret += self.sep + message + else: + if self.sep_style == SeparatorStyle.LLAMA_2: + ret += " " + message + " " + self.sep2 + else: + ret += message + self.sep2 + else: + ret += "" + ret = ret.lstrip(self.sep) + elif self.sep_style == SeparatorStyle.PLAIN: + seps = [self.sep, self.sep2] + ret = self.system + for i, (role, message) in enumerate(messages): + if message: + if type(message) is tuple: + message, _, _ = message + ret += message + seps[i % 2] + else: + ret += "" + else: + raise ValueError(f"Invalid style: {self.sep_style}") + + return ret + + def append_message(self, role, message): + self.messages.append([role, message]) + + def get_images(self, return_pil=False): + images = [] + for i, (role, msg) in enumerate(self.messages[self.offset:]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + from PIL import Image + msg, image, image_process_mode = msg + if image_process_mode == "Pad": + def expand2square(pil_img, background_color=(122, 116, 104)): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + image = expand2square(image) + elif image_process_mode in ["Default", "Crop"]: + pass + elif image_process_mode == "Resize": + image = image.resize((336, 336)) + else: + raise ValueError(f"Invalid image_process_mode: {image_process_mode}") + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if longest_edge != max(image.size): + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + if return_pil: + images.append(image) + else: + buffered = BytesIO() + image.save(buffered, format="PNG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + images.append(img_b64_str) + return images + + def to_gradio_chatbot(self): + ret = [] + for i, (role, msg) in enumerate(self.messages[self.offset:]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + msg, image, image_process_mode = msg + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + img_str = f'user upload image' + msg = img_str + msg.replace('', '').strip() + ret.append([msg, None]) + else: + ret.append([msg, None]) + else: + ret[-1][-1] = msg + return ret + + def copy(self): + return Conversation( + system=self.system, + roles=self.roles, + messages=[[x, y] for x, y in self.messages], + offset=self.offset, + sep_style=self.sep_style, + sep=self.sep, + sep2=self.sep2, + version=self.version) + + def dict(self): + if len(self.get_images()) > 0: + return { + "system": self.system, + "roles": self.roles, + "messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages], + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + return { + "system": self.system, + "roles": self.roles, + "messages": self.messages, + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + + +conv_vicuna_v0 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=( + ("Human", "What are the key differences between renewable and non-renewable energy sources?"), + ("Assistant", + "Renewable energy sources are those that can be replenished naturally in a relatively " + "short amount of time, such as solar, wind, hydro, geothermal, and biomass. " + "Non-renewable energy sources, on the other hand, are finite and will eventually be " + "depleted, such as coal, oil, and natural gas. Here are some key differences between " + "renewable and non-renewable energy sources:\n" + "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable " + "energy sources are finite and will eventually run out.\n" + "2. Environmental impact: Renewable energy sources have a much lower environmental impact " + "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, " + "and other negative effects.\n" + "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically " + "have lower operational costs than non-renewable sources.\n" + "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote " + "locations than non-renewable sources.\n" + "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different " + "situations and needs, while non-renewable sources are more rigid and inflexible.\n" + "6. Sustainability: Renewable energy sources are more sustainable over the long term, while " + "non-renewable sources are not, and their depletion can lead to economic and social instability.\n") + ), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_vicuna_v1 = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the user's questions.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +# kentang-mit@: This conversation template is designed for SFT on VFLAN. +conv_vicuna_v1_nosys = Conversation( + system="", + roles=("USER", "ASSISTANT"), + version="v1_nosys", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +conv_llama_2 = Conversation( + system="""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. + +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""", + roles=("USER", "ASSISTANT"), + version="llama_v2", + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA_2, + sep="", + sep2="", +) + +conv_mistral = Conversation( + system="", + roles=("USER", "ASSISTANT"), + version="mistral", + messages=(), + offset=0, + sep_style=SeparatorStyle.MISTRAL, + sep="", + sep2="", +) + +conv_llava_llama_2 = Conversation( + system="You are a helpful language and vision assistant. " + "You are able to understand the visual content that the user provides, " + "and assist the user with a variety of tasks using natural language.", + roles=("USER", "ASSISTANT"), + version="llama_v2", + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA_2, + sep="", + sep2="", +) + +conv_mpt = Conversation( + system="""<|im_start|>system +A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""", + roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), + version="mpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.MPT, + sep="<|im_end|>", +) + +conv_llava_plain = Conversation( + system="", + roles=("", ""), + messages=( + ), + offset=0, + sep_style=SeparatorStyle.PLAIN, + sep="\n", +) + +conv_llava_v0 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=( + ), + offset=0, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_llava_v0_mmtag = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "The visual content will be provided with the following format: visual content.", + roles=("Human", "Assistant"), + messages=( + ), + offset=0, + sep_style=SeparatorStyle.SINGLE, + sep="###", + version="v0_mmtag", +) + +conv_llava_v1 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + + + +conv_llava_v1_mmtag = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "The visual content will be provided with the following format: visual content.", + roles=("USER", "ASSISTANT"), + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", + version="v1_mmtag", +) + +hermes_2 = Conversation( + system='<|im_start|>system\nAnswer the questions.', + roles=('<|im_start|>user\n', '<|im_start|>assistant\n'), + sep_style=SeparatorStyle.MPT, + sep='<|im_end|>', + messages=( + ), + offset=0, + version="hermes-2" +) + + +# Template added by Yukang. Note (kentang-mit@): sep is <|eot_id|> for official template. +llama_3_chat = Conversation( + system="<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful language and vision assistant. " + "You are able to understand the visual content that the user provides, " + "and assist the user with a variety of tasks using natural language.", + roles=("<|start_header_id|>user<|end_header_id|>\n\n", + "<|start_header_id|>system<|end_header_id|>\n\n"), + version="llama_v3", + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA_3, + sep="<|end_of_text|>", +) + + +default_conversation = conv_vicuna_v1 +conv_templates = { + "default": conv_vicuna_v0, + "hermes-2": hermes_2, + "llama_3": llama_3_chat, + "v0": conv_vicuna_v0, + "v1": conv_vicuna_v1, + "vicuna_v1": conv_vicuna_v1, + "vicuna_v1_nosys": conv_vicuna_v1_nosys, + "llama_2": conv_llama_2, + "mistral": conv_mistral, + + "plain": conv_llava_plain, + "v0_plain": conv_llava_plain, + "llava_v0": conv_llava_v0, + "v0_mmtag": conv_llava_v0_mmtag, + "llava_v1": conv_llava_v1, + "v1_mmtag": conv_llava_v1_mmtag, + "llava_llama_2": conv_llava_llama_2, + + "mpt": conv_mpt, +} + + +if __name__ == "__main__": + print(default_conversation.get_prompt()) diff --git a/dam/model/language_model/__pycache__/builder.cpython-310.pyc b/dam/model/language_model/__pycache__/builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b3dc6a8059bdb7850cd34475dff69dc52434c5e Binary files /dev/null and b/dam/model/language_model/__pycache__/builder.cpython-310.pyc differ diff --git a/dam/model/language_model/__pycache__/llava_llama.cpython-310.pyc b/dam/model/language_model/__pycache__/llava_llama.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d043b588f9b59c22875b119ba3c1171c1769a81 Binary files /dev/null and b/dam/model/language_model/__pycache__/llava_llama.cpython-310.pyc differ diff --git a/dam/model/language_model/__pycache__/llava_mistral.cpython-310.pyc b/dam/model/language_model/__pycache__/llava_mistral.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9446f962b4647f0d2d261b55cfeb35cbb8911827 Binary files /dev/null and b/dam/model/language_model/__pycache__/llava_mistral.cpython-310.pyc differ diff --git a/dam/model/language_model/builder.py b/dam/model/language_model/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..25a2549e1dfed0e4abee4fdd9c77a1e6a64757ef --- /dev/null +++ b/dam/model/language_model/builder.py @@ -0,0 +1,111 @@ +import math +import warnings +import os, os.path as osp +import torch +from transformers import PretrainedConfig, PreTrainedModel +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + AutoConfig, + BitsAndBytesConfig, + PretrainedConfig, + PreTrainedModel, +) + + +def has_tokenizer(path): + if ( + osp.exists(osp.join(path, "special_tokens_map.json")) + and osp.exists(osp.join(path, "tokenizer_config.json")) + and (osp.exists(osp.join(path, "tokenizer.model")) or osp.exists(osp.join(path, "tokenizer.json"))) + ): + # print("[has_tokenizer]", path, True) + return True + from huggingface_hub import HfApi, file_exists + from huggingface_hub.utils import validate_repo_id, HFValidationError + api = HfApi() + try: + valid_hf_repo = api.repo_exists(path) + except HFValidationError as e: + valid_hf_repo = False + if ( + valid_hf_repo + and file_exists(path, "special_tokens_map.json") + and file_exists(path, "tokenizer_config.json") + and (file_exists(path, "tokenizer.model") or file_exists(path, "tokenizer.json")) + ): + # print("[has_tokenizer]", path, True) + return True + # print("[has_tokenizer]", path, False) + return False + + +def context_length_extension(config): + orig_ctx_len = getattr(config, "max_position_embeddings", None) + model_max_length = getattr(config, "model_max_length", None) + if orig_ctx_len and model_max_length > orig_ctx_len: + print(f"Scaling RoPE from {orig_ctx_len} to {model_max_length}") + scaling_factor = float(math.ceil(model_max_length / orig_ctx_len)) + config.rope_scaling = {"type": "linear", "factor": scaling_factor} + return config + + +def build_llm_and_tokenizer( + model_name_or_path: str, + config: PretrainedConfig, + # config_cls: PretrainedConfig = None, + # llm_cls: PreTrainedModel = None, + attn_implementation=None, + model_max_length=None, + *args, + **kwargs, +) -> PreTrainedModel: + # if config_cls is None: + # config_cls = AutoConfig + # if llm_cls is None: + # llm_cls = AutoModelForCausalLM + # config_cls = AutoConfig + # llm_cls = AutoModelForCausalLM + ## extra configuration for llm + # print("build_llm_and_tokenizer():", model_name_or_path); input("DEBUG") + llm_cfg = AutoConfig.from_pretrained(model_name_or_path) + llm_cfg._attn_implementation = attn_implementation + llm_cfg.model_max_length = model_max_length + if model_max_length is not None: + context_length_extension(llm_cfg) + + llm = AutoModelForCausalLM.from_pretrained( + model_name_or_path, config=llm_cfg, torch_dtype=eval(config.model_dtype), *args, **kwargs + ) + + llm_path = model_name_or_path + if not has_tokenizer(llm_path): + warnings.warn("tokenizer found in VLM root folder. Move to ./{VILA}/llm in the future.") + llm_path = osp.join(llm_path, "llm") + + # TODO(ligeng): use LLM class to judge to better compability. + if "mpt" in model_name_or_path: + tokenizer = AutoTokenizer.from_pretrained( + llm_path, + model_max_length=llm_cfg.model_max_length, + padding_side="right", + ) + elif "yi" in model_name_or_path.lower(): + tokenizer = AutoTokenizer.from_pretrained( + llm_path, + model_max_length=llm_cfg.model_max_length, + padding_side="right", + use_fast=False, + ) + else: + tokenizer = AutoTokenizer.from_pretrained( + llm_path, + model_max_length=llm_cfg.model_max_length, + padding_side="right", + use_fast=False, + legacy=False, + ) + + # TODO(ligeng): is this necessary for llava? + config.hidden_size = llm.config.hidden_size + return llm, tokenizer \ No newline at end of file diff --git a/dam/model/language_model/llava_gemma_ignored.py b/dam/model/language_model/llava_gemma_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..6b9bf11815671db07785ca3ce5c6ccd42da13360 --- /dev/null +++ b/dam/model/language_model/llava_gemma_ignored.py @@ -0,0 +1,161 @@ +# Copyright 2023 Haotian Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PAD_TOKEN_ID = 0 + +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn as nn + +from transformers import AutoConfig, AutoModelForCausalLM +from transformers.models.gemma import GemmaConfig, GemmaModel, GemmaForCausalLM + +from transformers.modeling_outputs import CausalLMOutputWithPast +from llava.constants import IGNORE_INDEX +from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM +# import time + + +class LlavaGemmaConfig(GemmaConfig): + model_type = "llava_gemma" + + +class LlavaGemmaModel(GemmaModel, LlavaMetaModel): + config_class = LlavaGemmaConfig + + def __init__(self, config: GemmaConfig): + super(LlavaGemmaModel, self).__init__(config) + + +class LlavaGemmaForCausalLM(GemmaForCausalLM, LlavaMetaForCausalLM): + config_class = LlavaGemmaConfig + + def __init__(self, config): + super(LlavaGemmaForCausalLM, self).__init__(config) + self.model = LlavaGemmaModel(config) + self.pretraining_tp = 1 + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_model(self): + return self.model + + def get_lm_head(self): + return self.lm_head + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + images: Optional[torch.FloatTensor] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + if inputs_embeds is None: + ( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, + position_ids, + attention_mask, + past_key_values, + labels, + images + ) + # TODO (kentang-mit@): fuse this function into the previous one. + # current design makes unit-test easier. + if self.training: + ( + _, + new_position_ids, + new_attention_mask, + _, + new_inputs_embeds, + new_labels, + sorted_seqlens_in_batch + ) = self.repack_multimodal_data( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels + ) + new_input_ids = None + past_key_values = None + new_cache_position = None + else: + new_attention_mask = attention_mask + new_position_ids = position_ids + new_inputs_embeds = inputs_embeds + new_labels = labels + if attention_mask is not None: + sorted_seqlens_in_batch = attention_mask.sum(-1).int() + else: + sorted_seqlens_in_batch = None + new_input_ids = input_ids + # kentang-mit@: This only works for batch=1 currently + # model.generate of gemma does not correctly handle decoding stage currently + # need to manually adjust decoding stage input = 1 token + if past_key_values is not None: + if new_inputs_embeds is not None: + new_inputs_embeds = new_inputs_embeds[:, [-1]] + # kentang-mit@: seems to be a problem unique to gemma + if new_position_ids is not None: + new_position_ids = new_position_ids[:, [-1]] + new_cache_position = new_position_ids[0] + + outputs = super().forward( + input_ids=new_input_ids, + attention_mask=new_attention_mask, + position_ids=new_position_ids, + past_key_values=past_key_values, + inputs_embeds=new_inputs_embeds, + labels=new_labels, + use_cache=use_cache, + cache_position=new_cache_position, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + seqlens_in_batch=sorted_seqlens_in_batch, + ) + return outputs + + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): + images = kwargs.pop("images", None) + _inputs = super().prepare_inputs_for_generation( + input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs + ) + if images is not None: + _inputs['images'] = images + return _inputs + +AutoConfig.register("llava_gemma", LlavaGemmaConfig) +AutoModelForCausalLM.register(LlavaGemmaConfig, LlavaGemmaForCausalLM) diff --git a/dam/model/language_model/llava_llama.py b/dam/model/language_model/llava_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..b2db1cb2c5833fa4a66a6f26d2e810b6c205ee83 --- /dev/null +++ b/dam/model/language_model/llava_llama.py @@ -0,0 +1,180 @@ +# Copyright 2023 Haotian Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is modified from https://github.com/haotian-liu/LLaVA/ + + +from typing import List, Optional, Tuple, Union +import os, os.path as osp +import torch + +from transformers import ( + LlamaForCausalLM, + LlamaConfig, + PreTrainedModel, + AutoConfig, + AutoModel, + GenerationConfig, + PretrainedConfig, + PreTrainedModel, +) + +from transformers.modeling_outputs import CausalLMOutputWithPast +from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM +from ..multimodal_encoder.builder import build_vision_tower +from ..multimodal_projector.builder import build_mm_projector +from ..configuration_llava import LlavaConfig +from ..utils import get_model_config +from .builder import build_llm_and_tokenizer + + +class LlavaLlamaConfig(LlavaConfig): + model_type = "llava_llama" + +## FIXME we will follow the convention to add a new class for CausalLM in the future +class LlavaLlamaModel(LlavaMetaModel, LlavaMetaForCausalLM, PreTrainedModel): + config_class = LlavaLlamaConfig + main_input_name = "input_embeds" + supports_gradient_checkpointing = True + + def __init__(self, config: LlavaLlamaConfig = None, *args, **kwargs) -> None: + super().__init__(config) + return self.init_vlm(config=config, *args, **kwargs) + + @classmethod + def from_pretrained( + cls, + pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], + *model_args, + config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None, + cache_dir: Optional[Union[str, os.PathLike]] = None, + ignore_mismatched_sizes: bool = False, + force_download: bool = False, + local_files_only: bool = False, + token: Optional[Union[str, bool]] = None, + revision: str = "main", + use_safetensors: bool = None, + **kwargs, + ): + if hasattr(cls, "load_pretrained"): + return cls.load_pretrained(pretrained_model_name_or_path, + *model_args, config=config, cache_dir=cache_dir, ignore_mismatched_sizes=ignore_mismatched_sizes, force_download=force_download, local_files_only=local_files_only, token=token, + revision=revision, use_safetensors=use_safetensors, **kwargs + ) + return super(LlavaLlamaModel).from_pretrained(pretrained_model_name_or_path, + *model_args, config=config, cache_dir=cache_dir, ignore_mismatched_sizes=ignore_mismatched_sizes, force_download=force_download, local_files_only=local_files_only, token=token, + revision=revision, use_safetensors=use_safetensors, **kwargs) + + def forward( + self, + input_ids: torch.LongTensor = None, + images: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + self.freezed_module_patch() + if inputs_embeds is None: + ( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels, + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, position_ids, attention_mask, past_key_values, labels, images + ) + # Note (kentang-mit@): we have a unit test for this function. + if self.training: + ( + _, + new_position_ids, + new_attention_mask, + _, + new_inputs_embeds, + new_labels, + sorted_seqlens_in_batch, + ) = self.repack_multimodal_data( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels, + ) + new_input_ids = None + past_key_values = None + else: + new_attention_mask = attention_mask + new_position_ids = position_ids + new_inputs_embeds = inputs_embeds + new_labels = labels + sorted_seqlens_in_batch = attention_mask.sum(-1).int() + new_input_ids = input_ids + + outputs = self.llm.forward( + input_ids=new_input_ids, + attention_mask=new_attention_mask, + position_ids=new_position_ids, + past_key_values=past_key_values, + inputs_embeds=new_inputs_embeds, + labels=new_labels, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + seqlens_in_batch=sorted_seqlens_in_batch, + ) + return outputs + + @torch.no_grad() + def generate( + self, + input_ids: Optional[torch.FloatTensor] = None, + images: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + **generation_kwargs, + ): + if images is not None: + ( + _, + _, + attention_mask, + _, + inputs_embeds, + _, + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, None, attention_mask, None, None, images + ) + else: + inputs_embeds = self.get_input_embeddings()(input_ids) + inputs_embeds = inputs_embeds.to(self.dtype) + + outputs = self.llm.generate( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + **generation_kwargs + ) + return outputs + + +AutoConfig.register("llava_llama", LlavaLlamaConfig) +AutoModel.register(LlavaLlamaConfig, LlavaLlamaModel) diff --git a/dam/model/language_model/llava_mistral_ignored.py b/dam/model/language_model/llava_mistral_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d9bb209093593b8d6c629b98a52ac28e0424f0 --- /dev/null +++ b/dam/model/language_model/llava_mistral_ignored.py @@ -0,0 +1,145 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +# This file is modified from https://github.com/haotian-liu/LLaVA/ + +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn as nn + +from transformers import AutoConfig, AutoModelForCausalLM, \ + MistralConfig, MistralModel, MistralForCausalLM + +from transformers.modeling_outputs import CausalLMOutputWithPast + +from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM + + +class LlavaMistralConfig(MistralConfig): + model_type = "llava_mistral" + pretraining_tp = 1 + + +class LlavaMistralModel(MistralModel, LlavaMetaModel): + config_class = LlavaMistralConfig + + def __init__(self, config: MistralConfig): + super(LlavaMistralModel, self).__init__(config) + + +class LlavaMistralForCausalLM(MistralForCausalLM, LlavaMetaForCausalLM): + config_class = LlavaMistralConfig + + def __init__(self, config): + super(MistralForCausalLM, self).__init__(config) + self.model = LlavaMistralModel(config) + self.pretraining_tp = config.pretraining_tp + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_model(self): + return self.model + + def get_lm_head(self): + return self.lm_head + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + images: Optional[torch.FloatTensor] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + if inputs_embeds is None: + ( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, + position_ids, + attention_mask, + past_key_values, + labels, + images + ) + if self.training: + ( + _, + new_position_ids, + new_attention_mask, + _, + new_inputs_embeds, + new_labels, + sorted_seqlens_in_batch + ) = self.repack_multimodal_data( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels + ) + new_input_ids = None + past_key_values = None + else: + new_attention_mask = attention_mask + new_position_ids = position_ids + new_inputs_embeds = inputs_embeds + new_labels = labels + sorted_seqlens_in_batch = attention_mask.sum(-1).int() + new_input_ids = input_ids + + outputs = super().forward( + input_ids=new_input_ids, + attention_mask=new_attention_mask, + position_ids=new_position_ids, + past_key_values=past_key_values, + inputs_embeds=new_inputs_embeds, + labels=new_labels, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + seqlens_in_batch=sorted_seqlens_in_batch, + ) + return outputs + + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): + images = kwargs.pop("images", None) + _inputs = super().prepare_inputs_for_generation( + input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs + ) + if images is not None: + _inputs['images'] = images + return _inputs + +AutoConfig.register("llava_mistral", LlavaMistralConfig) +AutoModelForCausalLM.register(LlavaMistralConfig, LlavaMistralForCausalLM) diff --git a/dam/model/language_model/llava_mpt_ignored.py b/dam/model/language_model/llava_mpt_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..71f703604655f219a7636e12be0729db901504b2 --- /dev/null +++ b/dam/model/language_model/llava_mpt_ignored.py @@ -0,0 +1,115 @@ +# Copyright 2023 Haotian Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is modified from https://github.com/haotian-liu/LLaVA/ + + +from typing import List, Optional, Tuple +import warnings + +import torch +import torch.nn.functional as F +import math + +from transformers import AutoConfig, AutoModelForCausalLM +from transformers.modeling_outputs import CausalLMOutputWithPast + +from .mpt.modeling_mpt import MPTConfig, MPTForCausalLM, MPTModel +from llava.model.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM + + +class LlavaMPTConfig(MPTConfig): + model_type = "llava_mpt" + + +class LlavaMPTModel(MPTModel, LlavaMetaModel): + config_class = LlavaMPTConfig + + def __init__(self, config: MPTConfig): + config.hidden_size = config.d_model + super(LlavaMPTModel, self).__init__(config) + + def embed_tokens(self, x): + return self.wte(x) + + +class LlavaMPTForCausalLM(MPTForCausalLM, LlavaMetaForCausalLM): + config_class = LlavaMPTConfig + supports_gradient_checkpointing = True + + def __init__(self, config): + super(MPTForCausalLM, self).__init__(config) + + if not config.tie_word_embeddings: + raise ValueError('MPTForCausalLM only supports tied word embeddings') + self.transformer = LlavaMPTModel(config) + self.logit_scale = None + if config.logit_scale is not None: + logit_scale = config.logit_scale + if isinstance(logit_scale, str): + if logit_scale == 'inv_sqrt_d_model': + logit_scale = 1 / math.sqrt(config.d_model) + else: + raise ValueError(f"logit_scale={logit_scale!r} is not recognized as an option; use numeric value or 'inv_sqrt_d_model'.") + self.logit_scale = logit_scale + + def get_model(self): + return self.transformer + + def _set_gradient_checkpointing(self, module, value=False): + if isinstance(module, LlavaMPTModel): + module.gradient_checkpointing = value + + def forward(self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple[torch.FloatTensor]]]=None, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None, labels: Optional[torch.LongTensor]=None, return_dict: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, use_cache: Optional[bool]=None, images=None): + return_dict = return_dict if return_dict is not None else self.config.return_dict + use_cache = use_cache if use_cache is not None else self.config.use_cache + + input_ids, _, attention_mask, past_key_values, inputs_embeds, labels = self.prepare_inputs_labels_for_multimodal(input_ids, None, attention_mask, past_key_values, labels, images) + outputs = self.transformer(input_ids=input_ids, inputs_embeds=inputs_embeds, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache) + # FIXME: this is a hack to fix the multiple gpu inference issue in https://github.com/haotian-liu/LLaVA/issues/338 + logits = F.linear(outputs.last_hidden_state.to(self.transformer.wte.weight.device), self.transformer.wte.weight) + if self.logit_scale is not None: + if self.logit_scale == 0: + warnings.warn(f'Multiplying logits by self.logit_scale={self.logit_scale!r}. This will produce uniform (uninformative) outputs.') + logits *= self.logit_scale + loss = None + if labels is not None: + labels = torch.roll(labels, shifts=-1) + labels[:, -1] = -100 + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.to(logits.device).view(-1)) + return CausalLMOutputWithPast(loss=loss, logits=logits, past_key_values=outputs.past_key_values, hidden_states=outputs.hidden_states) + + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): + if inputs_embeds is not None: + raise NotImplementedError('inputs_embeds is not implemented for MPT yet') + attention_mask = kwargs['attention_mask'].bool() + if attention_mask[:, -1].sum() != attention_mask.shape[0]: + raise NotImplementedError('MPT does not support generation with right padding.') + if self.transformer.attn_uses_sequence_id and self.training: + sequence_id = torch.zeros_like(input_ids[:1]) + else: + sequence_id = None + if past_key_values is not None: + input_ids = input_ids[:, -1].unsqueeze(-1) + if self.transformer.prefix_lm: + prefix_mask = torch.ones_like(attention_mask) + if kwargs.get('use_cache') == False: + raise NotImplementedError('MPT with prefix_lm=True does not support use_cache=False.') + else: + prefix_mask = None + return {'input_ids': input_ids, 'attention_mask': attention_mask, 'prefix_mask': prefix_mask, 'sequence_id': sequence_id, 'past_key_values': past_key_values, 'use_cache': kwargs.get('use_cache', True), "images": kwargs.get("images", None)} + + +AutoConfig.register("llava_mpt", LlavaMPTConfig) +AutoModelForCausalLM.register(LlavaMPTConfig, LlavaMPTForCausalLM) diff --git a/dam/model/language_model/mpt_ignored/adapt_tokenizer.py b/dam/model/language_model/mpt_ignored/adapt_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..e640c157e8f5581953c518df0611a423225ef598 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/adapt_tokenizer.py @@ -0,0 +1,41 @@ +from typing import Union +from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast +Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] +NUM_SENTINEL_TOKENS: int = 100 + +def adapt_tokenizer_for_denoising(tokenizer: Tokenizer): + """Adds sentinel tokens and padding token (if missing). + + Expands the tokenizer vocabulary to include sentinel tokens + used in mixture-of-denoiser tasks as well as a padding token. + + All added tokens are added as special tokens. No tokens are + added if sentinel tokens and padding token already exist. + """ + sentinels_to_add = [f'' for i in range(NUM_SENTINEL_TOKENS)] + tokenizer.add_tokens(sentinels_to_add, special_tokens=True) + if tokenizer.pad_token is None: + tokenizer.add_tokens('', special_tokens=True) + tokenizer.pad_token = '' + assert tokenizer.pad_token_id is not None + sentinels = ''.join([f'' for i in range(NUM_SENTINEL_TOKENS)]) + _sentinel_token_ids = tokenizer(sentinels, add_special_tokens=False).input_ids + tokenizer.sentinel_token_ids = _sentinel_token_ids + +class AutoTokenizerForMOD(AutoTokenizer): + """AutoTokenizer + Adaptation for MOD. + + A simple wrapper around AutoTokenizer to make instantiating + an MOD-adapted tokenizer a bit easier. + + MOD-adapted tokenizers have sentinel tokens (e.g., ), + a padding token, and a property to get the token ids of the + sentinel tokens. + """ + + @classmethod + def from_pretrained(cls, *args, **kwargs): + """See `AutoTokenizer.from_pretrained` docstring.""" + tokenizer = super().from_pretrained(*args, **kwargs) + adapt_tokenizer_for_denoising(tokenizer) + return tokenizer \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/attention.py b/dam/model/language_model/mpt_ignored/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..b5543ef21c16e98fb10b2cea260ef56892362860 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/attention.py @@ -0,0 +1,300 @@ +"""Attention layers.""" +import math +import warnings +from typing import Optional +import torch +import torch.nn as nn +from einops import rearrange +from packaging import version +from torch import nn +from .norm import LPLayerNorm + +def _reset_is_causal(num_query_tokens: int, num_key_tokens: int, original_is_causal: bool): + if original_is_causal and num_query_tokens != num_key_tokens: + if num_query_tokens != 1: + raise NotImplementedError('MPT does not support query and key with different number of tokens, unless number of query tokens is 1.') + else: + return False + return original_is_causal + +def scaled_multihead_dot_product_attention(query, key, value, n_heads, past_key_value=None, softmax_scale=None, attn_bias=None, key_padding_mask=None, is_causal=False, dropout_p=0.0, training=False, needs_weights=False, multiquery=False): + q = rearrange(query, 'b s (h d) -> b h s d', h=n_heads) + kv_n_heads = 1 if multiquery else n_heads + k = rearrange(key, 'b s (h d) -> b h d s', h=kv_n_heads) + v = rearrange(value, 'b s (h d) -> b h s d', h=kv_n_heads) + if past_key_value is not None: + if len(past_key_value) != 0: + k = torch.cat([past_key_value[0], k], dim=3) + v = torch.cat([past_key_value[1], v], dim=2) + past_key_value = (k, v) + (b, _, s_q, d) = q.shape + s_k = k.size(-1) + if softmax_scale is None: + softmax_scale = 1 / math.sqrt(d) + attn_weight = q.matmul(k) * softmax_scale + if attn_bias is not None: + _s_q = max(0, attn_bias.size(2) - s_q) + _s_k = max(0, attn_bias.size(3) - s_k) + attn_bias = attn_bias[:, :, _s_q:, _s_k:] + if attn_bias.size(-1) != 1 and attn_bias.size(-1) != s_k or (attn_bias.size(-2) != 1 and attn_bias.size(-2) != s_q): + raise RuntimeError(f'attn_bias (shape: {attn_bias.shape}) is expected to broadcast to shape: {attn_weight.shape}.') + attn_weight = attn_weight + attn_bias + min_val = torch.finfo(q.dtype).min + if key_padding_mask is not None: + if attn_bias is not None: + warnings.warn('Propogating key_padding_mask to the attention module ' + 'and applying it within the attention module can cause ' + 'unneccessary computation/memory usage. Consider integrating ' + 'into attn_bias once and passing that to each attention ' + 'module instead.') + attn_weight = attn_weight.masked_fill(~key_padding_mask.view((b, 1, 1, s_k)), min_val) + if is_causal and (not q.size(2) == 1): + s = max(s_q, s_k) + causal_mask = attn_weight.new_ones(s, s, dtype=torch.float16) + causal_mask = causal_mask.tril() + causal_mask = causal_mask.to(torch.bool) + causal_mask = ~causal_mask + causal_mask = causal_mask[-s_q:, -s_k:] + attn_weight = attn_weight.masked_fill(causal_mask.view(1, 1, s_q, s_k), min_val) + attn_weight = torch.softmax(attn_weight, dim=-1) + if dropout_p: + attn_weight = torch.nn.functional.dropout(attn_weight, p=dropout_p, training=training, inplace=True) + out = attn_weight.to(v.dtype).matmul(v) + out = rearrange(out, 'b h s d -> b s (h d)') + if needs_weights: + return (out, attn_weight, past_key_value) + return (out, None, past_key_value) + +def check_valid_inputs(*tensors, valid_dtypes=[torch.float16, torch.bfloat16]): + for tensor in tensors: + if tensor.dtype not in valid_dtypes: + raise TypeError(f'tensor.dtype={tensor.dtype!r} must be in valid_dtypes={valid_dtypes!r}.') + if not tensor.is_cuda: + raise TypeError(f'Inputs must be cuda tensors (tensor.is_cuda={tensor.is_cuda!r}).') + +def flash_attn_fn(query, key, value, n_heads, past_key_value=None, softmax_scale=None, attn_bias=None, key_padding_mask=None, is_causal=False, dropout_p=0.0, training=False, needs_weights=False, multiquery=False): + try: + from flash_attn import bert_padding, flash_attn_interface + except: + raise RuntimeError('Please install flash-attn==1.0.3.post0') + check_valid_inputs(query, key, value) + if past_key_value is not None: + if len(past_key_value) != 0: + key = torch.cat([past_key_value[0], key], dim=1) + value = torch.cat([past_key_value[1], value], dim=1) + past_key_value = (key, value) + if attn_bias is not None: + _s_q = max(0, attn_bias.size(2) - query.size(1)) + _s_k = max(0, attn_bias.size(3) - key.size(1)) + attn_bias = attn_bias[:, :, _s_q:, _s_k:] + if attn_bias is not None: + raise NotImplementedError(f'attn_bias not implemented for flash attn.') + (batch_size, seqlen) = query.shape[:2] + if key_padding_mask is None: + key_padding_mask = torch.ones_like(key[:, :, 0], dtype=torch.bool) + query_padding_mask = key_padding_mask[:, -query.size(1):] + (query_unpad, indices_q, cu_seqlens_q, max_seqlen_q) = bert_padding.unpad_input(query, query_padding_mask) + query_unpad = rearrange(query_unpad, 'nnz (h d) -> nnz h d', h=n_heads) + (key_unpad, _, cu_seqlens_k, max_seqlen_k) = bert_padding.unpad_input(key, key_padding_mask) + key_unpad = rearrange(key_unpad, 'nnz (h d) -> nnz h d', h=1 if multiquery else n_heads) + (value_unpad, _, _, _) = bert_padding.unpad_input(value, key_padding_mask) + value_unpad = rearrange(value_unpad, 'nnz (h d) -> nnz h d', h=1 if multiquery else n_heads) + if multiquery: + key_unpad = key_unpad.expand(key_unpad.size(0), n_heads, key_unpad.size(-1)) + value_unpad = value_unpad.expand(value_unpad.size(0), n_heads, value_unpad.size(-1)) + dropout_p = dropout_p if training else 0.0 + reset_is_causal = _reset_is_causal(query.size(1), key.size(1), is_causal) + output_unpad = flash_attn_interface.flash_attn_unpadded_func(query_unpad, key_unpad, value_unpad, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, dropout_p, softmax_scale=softmax_scale, causal=reset_is_causal, return_attn_probs=needs_weights) + output = bert_padding.pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'), indices_q, batch_size, seqlen) + return (output, None, past_key_value) + +def triton_flash_attn_fn(query, key, value, n_heads, past_key_value=None, softmax_scale=None, attn_bias=None, key_padding_mask=None, is_causal=False, dropout_p=0.0, training=False, needs_weights=False, multiquery=False): + try: + from .flash_attn_triton import flash_attn_func + except: + _installed = False + if version.parse(torch.__version__) < version.parse('2.0.0'): + _installed = True + try: + from flash_attn.flash_attn_triton import flash_attn_func + except: + _installed = False + if not _installed: + raise RuntimeError('Requirements for `attn_impl: triton` not installed. Either (1) have a CUDA-compatible GPU and `pip install .[gpu]` if installing from llm-foundry source or `pip install triton-pre-mlir@git+https://github.com/vchiley/triton.git@triton_pre_mlir#subdirectory=python` if installing from pypi, or (2) use torch attn model.attn_config.attn_impl=torch (torch attn_impl will be slow). Note: (1) requires you have CMake and PyTorch already installed.') + check_valid_inputs(query, key, value) + if past_key_value is not None: + if len(past_key_value) != 0: + key = torch.cat([past_key_value[0], key], dim=1) + value = torch.cat([past_key_value[1], value], dim=1) + past_key_value = (key, value) + if attn_bias is not None: + _s_q = max(0, attn_bias.size(2) - query.size(1)) + _s_k = max(0, attn_bias.size(3) - key.size(1)) + attn_bias = attn_bias[:, :, _s_q:, _s_k:] + if dropout_p: + raise NotImplementedError(f'Dropout not implemented for attn_impl: triton.') + if needs_weights: + raise NotImplementedError(f'attn_impl: triton cannot return attn weights.') + if key_padding_mask is not None: + warnings.warn('Propagating key_padding_mask to the attention module ' + 'and applying it within the attention module can cause ' + 'unnecessary computation/memory usage. Consider integrating ' + 'into attn_bias once and passing that to each attention ' + 'module instead.') + (b_size, s_k) = key_padding_mask.shape[:2] + if attn_bias is None: + attn_bias = query.new_zeros(b_size, 1, 1, s_k) + attn_bias = attn_bias.masked_fill(~key_padding_mask.view((b_size, 1, 1, s_k)), torch.finfo(query.dtype).min) + query = rearrange(query, 'b s (h d) -> b s h d', h=n_heads) + key = rearrange(key, 'b s (h d) -> b s h d', h=1 if multiquery else n_heads) + value = rearrange(value, 'b s (h d) -> b s h d', h=1 if multiquery else n_heads) + if multiquery: + key = key.expand(*key.shape[:2], n_heads, key.size(-1)) + value = value.expand(*value.shape[:2], n_heads, value.size(-1)) + reset_is_causal = _reset_is_causal(query.size(1), key.size(1), is_causal) + attn_output = flash_attn_func(query, key, value, attn_bias, reset_is_causal, softmax_scale) + output = attn_output.view(*attn_output.shape[:2], -1) + return (output, None, past_key_value) + +class MultiheadAttention(nn.Module): + """Multi-head self attention. + + Using torch or triton attention implementation enables user to also use + additive bias. + """ + + def __init__(self, d_model: int, n_heads: int, attn_impl: str='triton', clip_qkv: Optional[float]=None, qk_ln: bool=False, softmax_scale: Optional[float]=None, attn_pdrop: float=0.0, low_precision_layernorm: bool=False, verbose: int=0, device: Optional[str]=None): + super().__init__() + self.attn_impl = attn_impl + self.clip_qkv = clip_qkv + self.qk_ln = qk_ln + self.d_model = d_model + self.n_heads = n_heads + self.softmax_scale = softmax_scale + if self.softmax_scale is None: + self.softmax_scale = 1 / math.sqrt(self.d_model / self.n_heads) + self.attn_dropout_p = attn_pdrop + self.Wqkv = nn.Linear(self.d_model, 3 * self.d_model, device=device) + fuse_splits = (d_model, 2 * d_model) + self.Wqkv._fused = (0, fuse_splits) + if self.qk_ln: + layernorm_class = LPLayerNorm if low_precision_layernorm else nn.LayerNorm + self.q_ln = layernorm_class(self.d_model, device=device) + self.k_ln = layernorm_class(self.d_model, device=device) + if self.attn_impl == 'flash': + self.attn_fn = flash_attn_fn + elif self.attn_impl == 'triton': + self.attn_fn = triton_flash_attn_fn + if verbose: + warnings.warn('While `attn_impl: triton` can be faster than `attn_impl: flash` ' + 'it uses more memory. When training larger models this can trigger ' + 'alloc retries which hurts performance. If encountered, we recommend ' + 'using `attn_impl: flash` if your model does not use `alibi` or `prefix_lm`.') + elif self.attn_impl == 'torch': + self.attn_fn = scaled_multihead_dot_product_attention + if torch.cuda.is_available() and verbose: + warnings.warn('Using `attn_impl: torch`. If your model does not use `alibi` or ' + '`prefix_lm` we recommend using `attn_impl: flash` otherwise ' + 'we recommend using `attn_impl: triton`.') + else: + raise ValueError(f'attn_impl={attn_impl!r} is an invalid setting.') + self.out_proj = nn.Linear(self.d_model, self.d_model, device=device) + self.out_proj._is_residual = True + + def forward(self, x, past_key_value=None, attn_bias=None, attention_mask=None, is_causal=True, needs_weights=False): + qkv = self.Wqkv(x) + if self.clip_qkv: + qkv.clamp_(min=-self.clip_qkv, max=self.clip_qkv) + (query, key, value) = qkv.chunk(3, dim=2) + key_padding_mask = attention_mask + if self.qk_ln: + dtype = query.dtype + query = self.q_ln(query).to(dtype) + key = self.k_ln(key).to(dtype) + (context, attn_weights, past_key_value) = self.attn_fn(query, key, value, self.n_heads, past_key_value=past_key_value, softmax_scale=self.softmax_scale, attn_bias=attn_bias, key_padding_mask=key_padding_mask, is_causal=is_causal, dropout_p=self.attn_dropout_p, training=self.training, needs_weights=needs_weights) + return (self.out_proj(context), attn_weights, past_key_value) + +class MultiQueryAttention(nn.Module): + """Multi-Query self attention. + + Using torch or triton attention implementation enables user to also use + additive bias. + """ + + def __init__(self, d_model: int, n_heads: int, attn_impl: str='triton', clip_qkv: Optional[float]=None, qk_ln: bool=False, softmax_scale: Optional[float]=None, attn_pdrop: float=0.0, low_precision_layernorm: bool=False, verbose: int=0, device: Optional[str]=None): + super().__init__() + self.attn_impl = attn_impl + self.clip_qkv = clip_qkv + self.qk_ln = qk_ln + self.d_model = d_model + self.n_heads = n_heads + self.head_dim = d_model // n_heads + self.softmax_scale = softmax_scale + if self.softmax_scale is None: + self.softmax_scale = 1 / math.sqrt(self.head_dim) + self.attn_dropout_p = attn_pdrop + self.Wqkv = nn.Linear(d_model, d_model + 2 * self.head_dim, device=device) + fuse_splits = (d_model, d_model + self.head_dim) + self.Wqkv._fused = (0, fuse_splits) + if self.qk_ln: + layernorm_class = LPLayerNorm if low_precision_layernorm else nn.LayerNorm + self.q_ln = layernorm_class(d_model, device=device) + self.k_ln = layernorm_class(self.head_dim, device=device) + if self.attn_impl == 'flash': + self.attn_fn = flash_attn_fn + elif self.attn_impl == 'triton': + self.attn_fn = triton_flash_attn_fn + if verbose: + warnings.warn('While `attn_impl: triton` can be faster than `attn_impl: flash` ' + 'it uses more memory. When training larger models this can trigger ' + 'alloc retries which hurts performance. If encountered, we recommend ' + 'using `attn_impl: flash` if your model does not use `alibi` or `prefix_lm`.') + elif self.attn_impl == 'torch': + self.attn_fn = scaled_multihead_dot_product_attention + if torch.cuda.is_available() and verbose: + warnings.warn('Using `attn_impl: torch`. If your model does not use `alibi` or ' + '`prefix_lm` we recommend using `attn_impl: flash` otherwise ' + 'we recommend using `attn_impl: triton`.') + else: + raise ValueError(f'attn_impl={attn_impl!r} is an invalid setting.') + self.out_proj = nn.Linear(self.d_model, self.d_model, device=device) + self.out_proj._is_residual = True + + def forward(self, x, past_key_value=None, attn_bias=None, attention_mask=None, is_causal=True, needs_weights=False): + qkv = self.Wqkv(x) + if self.clip_qkv: + qkv.clamp_(min=-self.clip_qkv, max=self.clip_qkv) + (query, key, value) = qkv.split([self.d_model, self.head_dim, self.head_dim], dim=2) + key_padding_mask = attention_mask + if self.qk_ln: + dtype = query.dtype + query = self.q_ln(query).to(dtype) + key = self.k_ln(key).to(dtype) + (context, attn_weights, past_key_value) = self.attn_fn(query, key, value, self.n_heads, past_key_value=past_key_value, softmax_scale=self.softmax_scale, attn_bias=attn_bias, key_padding_mask=key_padding_mask, is_causal=is_causal, dropout_p=self.attn_dropout_p, training=self.training, needs_weights=needs_weights, multiquery=True) + return (self.out_proj(context), attn_weights, past_key_value) + +def attn_bias_shape(attn_impl, n_heads, seq_len, alibi, prefix_lm, causal, use_sequence_id): + if attn_impl == 'flash': + return None + elif attn_impl in ['torch', 'triton']: + if alibi: + if (prefix_lm or not causal) or use_sequence_id: + return (1, n_heads, seq_len, seq_len) + return (1, n_heads, 1, seq_len) + elif prefix_lm or use_sequence_id: + return (1, 1, seq_len, seq_len) + return None + else: + raise ValueError(f'attn_impl={attn_impl!r} is an invalid setting.') + +def build_attn_bias(attn_impl, attn_bias, n_heads, seq_len, causal=False, alibi=False, alibi_bias_max=8): + if attn_impl == 'flash': + return None + elif attn_impl in ['torch', 'triton']: + if alibi: + (device, dtype) = (attn_bias.device, attn_bias.dtype) + attn_bias = attn_bias.add(build_alibi_bias(n_heads, seq_len, full=not causal, alibi_bias_max=alibi_bias_max, device=device, dtype=dtype)) + return attn_bias + else: + raise ValueError(f'attn_impl={attn_impl!r} is an invalid setting.') + +def gen_slopes(n_heads, alibi_bias_max=8, device=None): + _n_heads = 2 ** math.ceil(math.log2(n_heads)) + m = torch.arange(1, _n_heads + 1, dtype=torch.float32, device=device) + m = m.mul(alibi_bias_max / _n_heads) + slopes = 1.0 / torch.pow(2, m) + if _n_heads != n_heads: + slopes = torch.concat([slopes[1::2], slopes[::2]])[:n_heads] + return slopes.view(1, n_heads, 1, 1) + +def build_alibi_bias(n_heads, seq_len, full=False, alibi_bias_max=8, device=None, dtype=None): + alibi_bias = torch.arange(1 - seq_len, 1, dtype=torch.int32, device=device).view(1, 1, 1, seq_len) + if full: + alibi_bias = alibi_bias - torch.arange(1 - seq_len, 1, dtype=torch.int32, device=device).view(1, 1, seq_len, 1) + alibi_bias = alibi_bias.abs().mul(-1) + slopes = gen_slopes(n_heads, alibi_bias_max, device=device) + alibi_bias = alibi_bias * slopes + return alibi_bias.to(dtype=dtype) +ATTN_CLASS_REGISTRY = {'multihead_attention': MultiheadAttention, 'multiquery_attention': MultiQueryAttention} diff --git a/dam/model/language_model/mpt_ignored/blocks.py b/dam/model/language_model/mpt_ignored/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..537e7f9190713bd73332aeb80702efa39320ca60 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/blocks.py @@ -0,0 +1,41 @@ +"""GPT Blocks used for the GPT Model.""" +from typing import Dict, Optional, Tuple +import torch +import torch.nn as nn +from .attention import ATTN_CLASS_REGISTRY +from .norm import NORM_CLASS_REGISTRY + +class MPTMLP(nn.Module): + + def __init__(self, d_model: int, expansion_ratio: int, device: Optional[str]=None): + super().__init__() + self.up_proj = nn.Linear(d_model, expansion_ratio * d_model, device=device) + self.act = nn.GELU(approximate='none') + self.down_proj = nn.Linear(expansion_ratio * d_model, d_model, device=device) + self.down_proj._is_residual = True + + def forward(self, x): + return self.down_proj(self.act(self.up_proj(x))) + +class MPTBlock(nn.Module): + + def __init__(self, d_model: int, n_heads: int, expansion_ratio: int, attn_config: Dict={'attn_type': 'multihead_attention', 'attn_pdrop': 0.0, 'attn_impl': 'triton', 'qk_ln': False, 'clip_qkv': None, 'softmax_scale': None, 'prefix_lm': False, 'attn_uses_sequence_id': False, 'alibi': False, 'alibi_bias_max': 8}, resid_pdrop: float=0.0, norm_type: str='low_precision_layernorm', verbose: int=0, device: Optional[str]=None, **kwargs): + del kwargs + super().__init__() + norm_class = NORM_CLASS_REGISTRY[norm_type.lower()] + attn_class = ATTN_CLASS_REGISTRY[attn_config['attn_type']] + self.norm_1 = norm_class(d_model, device=device) + self.attn = attn_class(attn_impl=attn_config['attn_impl'], clip_qkv=attn_config['clip_qkv'], qk_ln=attn_config['qk_ln'], softmax_scale=attn_config['softmax_scale'], attn_pdrop=attn_config['attn_pdrop'], d_model=d_model, n_heads=n_heads, verbose=verbose, device=device) + self.norm_2 = norm_class(d_model, device=device) + self.ffn = MPTMLP(d_model=d_model, expansion_ratio=expansion_ratio, device=device) + self.resid_attn_dropout = nn.Dropout(resid_pdrop) + self.resid_ffn_dropout = nn.Dropout(resid_pdrop) + + def forward(self, x: torch.Tensor, past_key_value: Optional[Tuple[torch.Tensor]]=None, attn_bias: Optional[torch.Tensor]=None, attention_mask: Optional[torch.ByteTensor]=None, is_causal: bool=True) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]]]: + a = self.norm_1(x) + (b, attn_weights, past_key_value) = self.attn(a, past_key_value=past_key_value, attn_bias=attn_bias, attention_mask=attention_mask, is_causal=is_causal) + x = x + self.resid_attn_dropout(b) + m = self.norm_2(x) + n = self.ffn(m) + x = x + self.resid_ffn_dropout(n) + return (x, attn_weights, past_key_value) \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/configuration_mpt.py b/dam/model/language_model/mpt_ignored/configuration_mpt.py new file mode 100644 index 0000000000000000000000000000000000000000..e9eb6fc59b50654ddbe19ed56ad8c0abd1b8efef --- /dev/null +++ b/dam/model/language_model/mpt_ignored/configuration_mpt.py @@ -0,0 +1,118 @@ +"""A HuggingFace-style model configuration.""" +from typing import Dict, Optional, Union +from transformers import PretrainedConfig +attn_config_defaults: Dict = {'attn_type': 'multihead_attention', 'attn_pdrop': 0.0, 'attn_impl': 'triton', 'qk_ln': False, 'clip_qkv': None, 'softmax_scale': None, 'prefix_lm': False, 'attn_uses_sequence_id': False, 'alibi': False, 'alibi_bias_max': 8} +init_config_defaults: Dict = {'name': 'kaiming_normal_', 'fan_mode': 'fan_in', 'init_nonlinearity': 'relu', 'init_div_is_residual': True, 'emb_init_std': None, 'emb_init_uniform_lim': None, 'init_std': None, 'init_gain': 0.0} + +class MPTConfig(PretrainedConfig): + model_type = 'mpt' + + def __init__(self, d_model: int=2048, n_heads: int=16, n_layers: int=24, expansion_ratio: int=4, max_seq_len: int=2048, vocab_size: int=50368, resid_pdrop: float=0.0, emb_pdrop: float=0.0, learned_pos_emb: bool=True, attn_config: Dict=attn_config_defaults, init_device: str='cpu', logit_scale: Optional[Union[float, str]]=None, no_bias: bool=False, verbose: int=0, embedding_fraction: float=1.0, norm_type: str='low_precision_layernorm', use_cache: bool=False, init_config: Dict=init_config_defaults, **kwargs): + """The MPT configuration class. + + Args: + d_model (int): The size of the embedding dimension of the model. + n_heads (int): The number of attention heads. + n_layers (int): The number of layers in the model. + expansion_ratio (int): The ratio of the up/down scale in the MLP. + max_seq_len (int): The maximum sequence length of the model. + vocab_size (int): The size of the vocabulary. + resid_pdrop (float): The dropout probability applied to the attention output before combining with residual. + emb_pdrop (float): The dropout probability for the embedding layer. + learned_pos_emb (bool): Whether to use learned positional embeddings + attn_config (Dict): A dictionary used to configure the model's attention module: + attn_type (str): type of attention to use. Options: multihead_attention, multiquery_attention + attn_pdrop (float): The dropout probability for the attention layers. + attn_impl (str): The attention implementation to use. One of 'torch', 'flash', or 'triton'. + qk_ln (bool): Whether to apply layer normalization to the queries and keys in the attention layer. + clip_qkv (Optional[float]): If not None, clip the queries, keys, and values in the attention layer to + this value. + softmax_scale (Optional[float]): If not None, scale the softmax in the attention layer by this value. If None, + use the default scale of ``1/sqrt(d_keys)``. + prefix_lm (Optional[bool]): Whether the model should operate as a Prefix LM. This requires passing an + extra `prefix_mask` argument which indicates which tokens belong to the prefix. Tokens in the prefix + can attend to one another bi-directionally. Tokens outside the prefix use causal attention. + attn_uses_sequence_id (Optional[bool]): Whether to restrict attention to tokens that have the same sequence_id. + When the model is in `train` mode, this requires passing an extra `sequence_id` argument which indicates + which sub-sequence each token belongs to. + Defaults to ``False`` meaning any provided `sequence_id` will be ignored. + alibi (bool): Whether to use the alibi bias instead of position embeddings. + alibi_bias_max (int): The maximum value of the alibi bias. + init_device (str): The device to use for parameter initialization. + logit_scale (Optional[Union[float, str]]): If not None, scale the logits by this value. + no_bias (bool): Whether to use bias in all layers. + verbose (int): The verbosity level. 0 is silent. + embedding_fraction (float): The fraction to scale the gradients of the embedding layer by. + norm_type (str): choose type of norm to use + multiquery_attention (bool): Whether to use multiquery attention implementation. + use_cache (bool): Whether or not the model should return the last key/values attentions + init_config (Dict): A dictionary used to configure the model initialization: + init_config.name: The parameter initialization scheme to use. Options: 'default_', 'baseline_', + 'kaiming_uniform_', 'kaiming_normal_', 'neox_init_', 'small_init_', 'xavier_uniform_', or + 'xavier_normal_'. These mimic the parameter initialization methods in PyTorch. + init_div_is_residual (Union[int, float, str, bool]): Value to divide initial weights by if ``module._is_residual`` is True. + emb_init_std (Optional[float]): The standard deviation of the normal distribution used to initialize the embedding layer. + emb_init_uniform_lim (Optional[Union[Tuple[float, float], float]]): The lower and upper limits of the uniform distribution + used to initialize the embedding layer. Mutually exclusive with ``emb_init_std``. + init_std (float): The standard deviation of the normal distribution used to initialize the model, + if using the baseline_ parameter initialization scheme. + init_gain (float): The gain to use for parameter initialization with kaiming or xavier initialization schemes. + fan_mode (str): The fan mode to use for parameter initialization with kaiming initialization schemes. + init_nonlinearity (str): The nonlinearity to use for parameter initialization with kaiming initialization schemes. + --- + See llmfoundry.models.utils.param_init_fns.py for info on other param init config options + """ + self.d_model = d_model + self.n_heads = n_heads + self.n_layers = n_layers + self.expansion_ratio = expansion_ratio + self.max_seq_len = max_seq_len + self.vocab_size = vocab_size + self.resid_pdrop = resid_pdrop + self.emb_pdrop = emb_pdrop + self.learned_pos_emb = learned_pos_emb + self.attn_config = attn_config + self.init_device = init_device + self.logit_scale = logit_scale + self.no_bias = no_bias + self.verbose = verbose + self.embedding_fraction = embedding_fraction + self.norm_type = norm_type + self.use_cache = use_cache + self.init_config = init_config + if 'name' in kwargs: + del kwargs['name'] + if 'loss_fn' in kwargs: + del kwargs['loss_fn'] + super().__init__(**kwargs) + self._validate_config() + + def _set_config_defaults(self, config, config_defaults): + for (k, v) in config_defaults.items(): + if k not in config: + config[k] = v + return config + + def _validate_config(self): + self.attn_config = self._set_config_defaults(self.attn_config, attn_config_defaults) + self.init_config = self._set_config_defaults(self.init_config, init_config_defaults) + if self.d_model % self.n_heads != 0: + raise ValueError('d_model must be divisible by n_heads') + if any((prob < 0 or prob > 1 for prob in [self.attn_config['attn_pdrop'], self.resid_pdrop, self.emb_pdrop])): + raise ValueError("self.attn_config['attn_pdrop'], resid_pdrop, emb_pdrop are probabilities and must be between 0 and 1") + if self.attn_config['attn_impl'] not in ['torch', 'flash', 'triton']: + raise ValueError(f"Unknown attn_impl={self.attn_config['attn_impl']}") + if self.attn_config['prefix_lm'] and self.attn_config['attn_impl'] not in ['torch', 'triton']: + raise NotImplementedError('prefix_lm only implemented with torch and triton attention.') + if self.attn_config['alibi'] and self.attn_config['attn_impl'] not in ['torch', 'triton']: + raise NotImplementedError('alibi only implemented with torch and triton attention.') + if self.attn_config['attn_uses_sequence_id'] and self.attn_config['attn_impl'] not in ['torch', 'triton']: + raise NotImplementedError('attn_uses_sequence_id only implemented with torch and triton attention.') + if self.embedding_fraction > 1 or self.embedding_fraction <= 0: + raise ValueError('model.embedding_fraction must be between 0 (exclusive) and 1 (inclusive)!') + if isinstance(self.logit_scale, str) and self.logit_scale != 'inv_sqrt_d_model': + raise ValueError(f"self.logit_scale={self.logit_scale!r} is not recognized as an option; use numeric value or 'inv_sqrt_d_model'.") + if self.init_config.get('name', None) is None: + raise ValueError(f"self.init_config={self.init_config!r} 'name' needs to be set.") + if not self.learned_pos_emb and (not self.attn_config['alibi']): + raise ValueError(f'Positional information must be provided to the model using either learned_pos_emb or alibi.') \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/custom_embedding.py b/dam/model/language_model/mpt_ignored/custom_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..ab357952c397f47898863e8405c4958bb8de82fd --- /dev/null +++ b/dam/model/language_model/mpt_ignored/custom_embedding.py @@ -0,0 +1,11 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +class SharedEmbedding(nn.Embedding): + + def forward(self, input: Tensor, unembed: bool=False) -> Tensor: + if unembed: + return F.linear(input, self.weight) + return super().forward(input) \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/flash_attn_triton.py b/dam/model/language_model/mpt_ignored/flash_attn_triton.py new file mode 100644 index 0000000000000000000000000000000000000000..c0a42186d982283add95b63d99fc118e845bcf9d --- /dev/null +++ b/dam/model/language_model/mpt_ignored/flash_attn_triton.py @@ -0,0 +1,484 @@ +""" +Copied from https://github.com/HazyResearch/flash-attention/blob/eff9fe6b8076df59d64d7a3f464696738a3c7c24/flash_attn/flash_attn_triton.py +update imports to use 'triton_pre_mlir' + +*Experimental* implementation of FlashAttention in Triton. +Tested with triton==2.0.0.dev20221202. +Triton 2.0 has a new backend (MLIR) but seems like it doesn't yet work for head dimensions +other than 64: +https://github.com/openai/triton/blob/d376020f90002757eea3ea9475d4f7cfc2ec5ead/python/triton/ops/flash_attention.py#L207 +We'll update this implementation with the new Triton backend once this is fixed. + +We use the FlashAttention implementation from Phil Tillet a starting point. +https://github.com/openai/triton/blob/master/python/tutorials/06-fused-attention.py + +Changes: +- Implement both causal and non-causal attention. +- Implement both self-attention and cross-attention. +- Support arbitrary seqlens (not just multiples of 128), for both forward and backward. +- Support all head dimensions up to 128 (not just 16, 32, 64, 128), for both forward and backward. +- Support attention bias. +- Speed up the forward pass a bit, and only store the LSE instead of m and l. +- Make the backward for d=128 much faster by reducing register spilling. +- Optionally parallelize the backward pass across seqlen_k, to deal with the case of +small batch size * nheads. + +Caution: +- This is an *experimental* implementation. The forward pass should be quite robust but +I'm not 100% sure that the backward pass doesn't have race conditions (due to the Triton compiler). +- This implementation has only been tested on A100. +- If you plan to use headdim other than 64 and 128, you should test for race conditions +(due to the Triton compiler), as done in tests/test_flash_attn.py +"test_flash_attn_triton_race_condition". I've tested and fixed many race conditions +for different head dimensions (40, 48, 64, 128, 80, 88, 96), but I'm still not 100% confident +that there are none left for other head dimensions. + +Differences between this Triton version and the CUDA version: +- Triton version doesn't support dropout. +- Triton forward is generally faster than CUDA forward, while Triton backward is +generally slower than CUDA backward. Overall Triton forward + backward is slightly slower +than CUDA forward + backward. +- Triton version doesn't support different sequence lengths in a batch (i.e., RaggedTensor/NestedTensor). +- Triton version supports attention bias, while CUDA version doesn't. +""" +import math +import torch +import triton_pre_mlir as triton +import triton_pre_mlir.language as tl + +@triton.heuristics({'EVEN_M': lambda args: args['seqlen_q'] % args['BLOCK_M'] == 0, 'EVEN_N': lambda args: args['seqlen_k'] % args['BLOCK_N'] == 0, 'EVEN_HEADDIM': lambda args: args['headdim'] == args['BLOCK_HEADDIM']}) +@triton.jit +def _fwd_kernel(Q, K, V, Bias, Out, Lse, TMP, softmax_scale, stride_qb, stride_qh, stride_qm, stride_kb, stride_kh, stride_kn, stride_vb, stride_vh, stride_vn, stride_bb, stride_bh, stride_bm, stride_ob, stride_oh, stride_om, nheads, seqlen_q, seqlen_k, seqlen_q_rounded, headdim, CACHE_KEY_SEQLEN_Q, CACHE_KEY_SEQLEN_K, BIAS_TYPE: tl.constexpr, IS_CAUSAL: tl.constexpr, BLOCK_HEADDIM: tl.constexpr, EVEN_M: tl.constexpr, EVEN_N: tl.constexpr, EVEN_HEADDIM: tl.constexpr, BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr): + start_m = tl.program_id(0) + off_hb = tl.program_id(1) + off_b = off_hb // nheads + off_h = off_hb % nheads + offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M) + offs_n = tl.arange(0, BLOCK_N) + offs_d = tl.arange(0, BLOCK_HEADDIM) + q_ptrs = Q + off_b * stride_qb + off_h * stride_qh + (offs_m[:, None] * stride_qm + offs_d[None, :]) + k_ptrs = K + off_b * stride_kb + off_h * stride_kh + (offs_n[:, None] * stride_kn + offs_d[None, :]) + v_ptrs = V + off_b * stride_vb + off_h * stride_vh + (offs_n[:, None] * stride_vn + offs_d[None, :]) + if BIAS_TYPE == 'vector': + b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + offs_n + elif BIAS_TYPE == 'matrix': + b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + (offs_m[:, None] * stride_bm + offs_n[None, :]) + t_ptrs = TMP + off_hb * seqlen_q_rounded + offs_m + lse_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float('inf') + m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float('inf') + acc_o = tl.zeros([BLOCK_M, BLOCK_HEADDIM], dtype=tl.float32) + if EVEN_M & EVEN_N: + if EVEN_HEADDIM: + q = tl.load(q_ptrs) + else: + q = tl.load(q_ptrs, mask=offs_d[None, :] < headdim, other=0.0) + elif EVEN_HEADDIM: + q = tl.load(q_ptrs, mask=offs_m[:, None] < seqlen_q, other=0.0) + else: + q = tl.load(q_ptrs, mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0) + end_n = seqlen_k if not IS_CAUSAL else tl.minimum((start_m + 1) * BLOCK_M, seqlen_k) + for start_n in range(0, end_n, BLOCK_N): + start_n = tl.multiple_of(start_n, BLOCK_N) + if EVEN_N & EVEN_M: + if EVEN_HEADDIM: + k = tl.load(k_ptrs + start_n * stride_kn) + else: + k = tl.load(k_ptrs + start_n * stride_kn, mask=offs_d[None, :] < headdim, other=0.0) + elif EVEN_HEADDIM: + k = tl.load(k_ptrs + start_n * stride_kn, mask=(start_n + offs_n)[:, None] < seqlen_k, other=0.0) + else: + k = tl.load(k_ptrs + start_n * stride_kn, mask=((start_n + offs_n)[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0) + qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32) + qk += tl.dot(q, k, trans_b=True) + if not EVEN_N: + qk += tl.where((start_n + offs_n)[None, :] < seqlen_k, 0, float('-inf')) + if IS_CAUSAL: + qk += tl.where(offs_m[:, None] >= (start_n + offs_n)[None, :], 0, float('-inf')) + if BIAS_TYPE != 'none': + if BIAS_TYPE == 'vector': + if EVEN_N: + bias = tl.load(b_ptrs + start_n).to(tl.float32) + else: + bias = tl.load(b_ptrs + start_n, mask=start_n + offs_n < seqlen_k, other=0.0).to(tl.float32) + bias = bias[None, :] + elif BIAS_TYPE == 'matrix': + if EVEN_M & EVEN_N: + bias = tl.load(b_ptrs + start_n).to(tl.float32) + else: + bias = tl.load(b_ptrs + start_n, mask=(offs_m[:, None] < seqlen_q) & ((start_n + offs_n)[None, :] < seqlen_k), other=0.0).to(tl.float32) + qk = qk * softmax_scale + bias + m_ij = tl.maximum(tl.max(qk, 1), lse_i) + p = tl.exp(qk - m_ij[:, None]) + else: + m_ij = tl.maximum(tl.max(qk, 1) * softmax_scale, lse_i) + p = tl.exp(qk * softmax_scale - m_ij[:, None]) + l_ij = tl.sum(p, 1) + acc_o_scale = tl.exp(m_i - m_ij) + tl.store(t_ptrs, acc_o_scale) + acc_o_scale = tl.load(t_ptrs) + acc_o = acc_o * acc_o_scale[:, None] + if EVEN_N & EVEN_M: + if EVEN_HEADDIM: + v = tl.load(v_ptrs + start_n * stride_vn) + else: + v = tl.load(v_ptrs + start_n * stride_vn, mask=offs_d[None, :] < headdim, other=0.0) + elif EVEN_HEADDIM: + v = tl.load(v_ptrs + start_n * stride_vn, mask=(start_n + offs_n)[:, None] < seqlen_k, other=0.0) + else: + v = tl.load(v_ptrs + start_n * stride_vn, mask=((start_n + offs_n)[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0) + p = p.to(v.dtype) + acc_o += tl.dot(p, v) + m_i = m_ij + l_i_new = tl.exp(lse_i - m_ij) + l_ij + lse_i = m_ij + tl.log(l_i_new) + o_scale = tl.exp(m_i - lse_i) + tl.store(t_ptrs, o_scale) + o_scale = tl.load(t_ptrs) + acc_o = acc_o * o_scale[:, None] + start_m = tl.program_id(0) + offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M) + lse_ptrs = Lse + off_hb * seqlen_q_rounded + offs_m + tl.store(lse_ptrs, lse_i) + offs_d = tl.arange(0, BLOCK_HEADDIM) + out_ptrs = Out + off_b * stride_ob + off_h * stride_oh + (offs_m[:, None] * stride_om + offs_d[None, :]) + if EVEN_M: + if EVEN_HEADDIM: + tl.store(out_ptrs, acc_o) + else: + tl.store(out_ptrs, acc_o, mask=offs_d[None, :] < headdim) + elif EVEN_HEADDIM: + tl.store(out_ptrs, acc_o, mask=offs_m[:, None] < seqlen_q) + else: + tl.store(out_ptrs, acc_o, mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim)) + +@triton.jit +def _bwd_preprocess_do_o_dot(Out, DO, Delta, stride_ob, stride_oh, stride_om, stride_dob, stride_doh, stride_dom, nheads, seqlen_q, seqlen_q_rounded, headdim, BLOCK_M: tl.constexpr, BLOCK_HEADDIM: tl.constexpr): + start_m = tl.program_id(0) + off_hb = tl.program_id(1) + off_b = off_hb // nheads + off_h = off_hb % nheads + offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M) + offs_d = tl.arange(0, BLOCK_HEADDIM) + o = tl.load(Out + off_b * stride_ob + off_h * stride_oh + offs_m[:, None] * stride_om + offs_d[None, :], mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0).to(tl.float32) + do = tl.load(DO + off_b * stride_dob + off_h * stride_doh + offs_m[:, None] * stride_dom + offs_d[None, :], mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0).to(tl.float32) + delta = tl.sum(o * do, axis=1) + tl.store(Delta + off_hb * seqlen_q_rounded + offs_m, delta) + +@triton.jit +def _bwd_store_dk_dv(dk_ptrs, dv_ptrs, dk, dv, offs_n, offs_d, seqlen_k, headdim, EVEN_M: tl.constexpr, EVEN_N: tl.constexpr, EVEN_HEADDIM: tl.constexpr): + if EVEN_N & EVEN_M: + if EVEN_HEADDIM: + tl.store(dv_ptrs, dv) + tl.store(dk_ptrs, dk) + else: + tl.store(dv_ptrs, dv, mask=offs_d[None, :] < headdim) + tl.store(dk_ptrs, dk, mask=offs_d[None, :] < headdim) + elif EVEN_HEADDIM: + tl.store(dv_ptrs, dv, mask=offs_n[:, None] < seqlen_k) + tl.store(dk_ptrs, dk, mask=offs_n[:, None] < seqlen_k) + else: + tl.store(dv_ptrs, dv, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim)) + tl.store(dk_ptrs, dk, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim)) + +@triton.jit +def _bwd_kernel_one_col_block(start_n, Q, K, V, Bias, DO, DQ, DK, DV, LSE, D, softmax_scale, stride_qm, stride_kn, stride_vn, stride_bm, stride_dom, stride_dqm, stride_dkn, stride_dvn, seqlen_q, seqlen_k, headdim, ATOMIC_ADD: tl.constexpr, BIAS_TYPE: tl.constexpr, IS_CAUSAL: tl.constexpr, BLOCK_HEADDIM: tl.constexpr, EVEN_M: tl.constexpr, EVEN_N: tl.constexpr, EVEN_HEADDIM: tl.constexpr, BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr): + begin_m = 0 if not IS_CAUSAL else start_n * BLOCK_N // BLOCK_M * BLOCK_M + offs_qm = begin_m + tl.arange(0, BLOCK_M) + offs_n = start_n * BLOCK_N + tl.arange(0, BLOCK_N) + offs_m = tl.arange(0, BLOCK_M) + offs_d = tl.arange(0, BLOCK_HEADDIM) + q_ptrs = Q + (offs_qm[:, None] * stride_qm + offs_d[None, :]) + k_ptrs = K + (offs_n[:, None] * stride_kn + offs_d[None, :]) + v_ptrs = V + (offs_n[:, None] * stride_vn + offs_d[None, :]) + do_ptrs = DO + (offs_qm[:, None] * stride_dom + offs_d[None, :]) + dq_ptrs = DQ + (offs_qm[:, None] * stride_dqm + offs_d[None, :]) + if BIAS_TYPE == 'vector': + b_ptrs = Bias + offs_n + elif BIAS_TYPE == 'matrix': + b_ptrs = Bias + (offs_qm[:, None] * stride_bm + offs_n[None, :]) + dv = tl.zeros([BLOCK_N, BLOCK_HEADDIM], dtype=tl.float32) + dk = tl.zeros([BLOCK_N, BLOCK_HEADDIM], dtype=tl.float32) + if begin_m >= seqlen_q: + dv_ptrs = DV + (offs_n[:, None] * stride_dvn + offs_d[None, :]) + dk_ptrs = DK + (offs_n[:, None] * stride_dkn + offs_d[None, :]) + _bwd_store_dk_dv(dk_ptrs, dv_ptrs, dk, dv, offs_n, offs_d, seqlen_k, headdim, EVEN_M=EVEN_M, EVEN_N=EVEN_N, EVEN_HEADDIM=EVEN_HEADDIM) + return + if EVEN_N & EVEN_M: + if EVEN_HEADDIM: + k = tl.load(k_ptrs) + v = tl.load(v_ptrs) + else: + k = tl.load(k_ptrs, mask=offs_d[None, :] < headdim, other=0.0) + v = tl.load(v_ptrs, mask=offs_d[None, :] < headdim, other=0.0) + elif EVEN_HEADDIM: + k = tl.load(k_ptrs, mask=offs_n[:, None] < seqlen_k, other=0.0) + v = tl.load(v_ptrs, mask=offs_n[:, None] < seqlen_k, other=0.0) + else: + k = tl.load(k_ptrs, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0) + v = tl.load(v_ptrs, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0) + num_block_m = tl.cdiv(seqlen_q, BLOCK_M) + for start_m in range(begin_m, num_block_m * BLOCK_M, BLOCK_M): + start_m = tl.multiple_of(start_m, BLOCK_M) + offs_m_curr = start_m + offs_m + if EVEN_M & EVEN_HEADDIM: + q = tl.load(q_ptrs) + elif EVEN_HEADDIM: + q = tl.load(q_ptrs, mask=offs_m_curr[:, None] < seqlen_q, other=0.0) + else: + q = tl.load(q_ptrs, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0) + qk = tl.dot(q, k, trans_b=True) + if not EVEN_N: + qk = tl.where(offs_n[None, :] < seqlen_k, qk, float('-inf')) + if IS_CAUSAL: + qk = tl.where(offs_m_curr[:, None] >= offs_n[None, :], qk, float('-inf')) + if BIAS_TYPE != 'none': + tl.debug_barrier() + if BIAS_TYPE == 'vector': + if EVEN_N: + bias = tl.load(b_ptrs).to(tl.float32) + else: + bias = tl.load(b_ptrs, mask=offs_n < seqlen_k, other=0.0).to(tl.float32) + bias = bias[None, :] + elif BIAS_TYPE == 'matrix': + if EVEN_M & EVEN_N: + bias = tl.load(b_ptrs).to(tl.float32) + else: + bias = tl.load(b_ptrs, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_n[None, :] < seqlen_k), other=0.0).to(tl.float32) + qk = qk * softmax_scale + bias + if not EVEN_M & EVEN_HEADDIM: + tl.debug_barrier() + lse_i = tl.load(LSE + offs_m_curr) + if BIAS_TYPE == 'none': + p = tl.exp(qk * softmax_scale - lse_i[:, None]) + else: + p = tl.exp(qk - lse_i[:, None]) + if EVEN_M & EVEN_HEADDIM: + do = tl.load(do_ptrs) + else: + do = tl.load(do_ptrs, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0) + dv += tl.dot(p.to(do.dtype), do, trans_a=True) + if not EVEN_M & EVEN_HEADDIM: + tl.debug_barrier() + dp = tl.dot(do, v, trans_b=True) + if not EVEN_HEADDIM: + tl.debug_barrier() + Di = tl.load(D + offs_m_curr) + ds = (p * (dp - Di[:, None]) * softmax_scale).to(q.dtype) + dk += tl.dot(ds, q, trans_a=True) + if not EVEN_M & EVEN_HEADDIM: + tl.debug_barrier() + if not ATOMIC_ADD: + if EVEN_M & EVEN_HEADDIM: + dq = tl.load(dq_ptrs, eviction_policy='evict_last') + dq += tl.dot(ds, k) + tl.store(dq_ptrs, dq, eviction_policy='evict_last') + elif EVEN_HEADDIM: + dq = tl.load(dq_ptrs, mask=offs_m_curr[:, None] < seqlen_q, other=0.0, eviction_policy='evict_last') + dq += tl.dot(ds, k) + tl.store(dq_ptrs, dq, mask=offs_m_curr[:, None] < seqlen_q, eviction_policy='evict_last') + else: + dq = tl.load(dq_ptrs, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0, eviction_policy='evict_last') + dq += tl.dot(ds, k) + tl.store(dq_ptrs, dq, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim), eviction_policy='evict_last') + else: + dq = tl.dot(ds, k) + if EVEN_M & EVEN_HEADDIM: + tl.atomic_add(dq_ptrs, dq) + elif EVEN_HEADDIM: + tl.atomic_add(dq_ptrs, dq, mask=offs_m_curr[:, None] < seqlen_q) + else: + tl.atomic_add(dq_ptrs, dq, mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim)) + dq_ptrs += BLOCK_M * stride_dqm + q_ptrs += BLOCK_M * stride_qm + do_ptrs += BLOCK_M * stride_dom + if BIAS_TYPE == 'matrix': + b_ptrs += BLOCK_M * stride_bm + dv_ptrs = DV + (offs_n[:, None] * stride_dvn + offs_d[None, :]) + dk_ptrs = DK + (offs_n[:, None] * stride_dkn + offs_d[None, :]) + _bwd_store_dk_dv(dk_ptrs, dv_ptrs, dk, dv, offs_n, offs_d, seqlen_k, headdim, EVEN_M=EVEN_M, EVEN_N=EVEN_N, EVEN_HEADDIM=EVEN_HEADDIM) + +def init_to_zero(name): + return lambda nargs: nargs[name].zero_() + +@triton.autotune(configs=[triton.Config({'BLOCK_M': 128, 'BLOCK_N': 128, 'SEQUENCE_PARALLEL': False}, num_warps=8, num_stages=1, pre_hook=init_to_zero('DQ')), triton.Config({'BLOCK_M': 128, 'BLOCK_N': 128, 'SEQUENCE_PARALLEL': True}, num_warps=8, num_stages=1, pre_hook=init_to_zero('DQ'))], key=['CACHE_KEY_SEQLEN_Q', 'CACHE_KEY_SEQLEN_K', 'BIAS_TYPE', 'IS_CAUSAL', 'BLOCK_HEADDIM']) +@triton.heuristics({'EVEN_M': lambda args: args['seqlen_q'] % args['BLOCK_M'] == 0, 'EVEN_N': lambda args: args['seqlen_k'] % args['BLOCK_N'] == 0, 'EVEN_HEADDIM': lambda args: args['headdim'] == args['BLOCK_HEADDIM']}) +@triton.jit +def _bwd_kernel(Q, K, V, Bias, DO, DQ, DK, DV, LSE, D, softmax_scale, stride_qb, stride_qh, stride_qm, stride_kb, stride_kh, stride_kn, stride_vb, stride_vh, stride_vn, stride_bb, stride_bh, stride_bm, stride_dob, stride_doh, stride_dom, stride_dqb, stride_dqh, stride_dqm, stride_dkb, stride_dkh, stride_dkn, stride_dvb, stride_dvh, stride_dvn, nheads, seqlen_q, seqlen_k, seqlen_q_rounded, headdim, CACHE_KEY_SEQLEN_Q, CACHE_KEY_SEQLEN_K, BIAS_TYPE: tl.constexpr, IS_CAUSAL: tl.constexpr, BLOCK_HEADDIM: tl.constexpr, SEQUENCE_PARALLEL: tl.constexpr, EVEN_M: tl.constexpr, EVEN_N: tl.constexpr, EVEN_HEADDIM: tl.constexpr, BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr): + off_hb = tl.program_id(1) + off_b = off_hb // nheads + off_h = off_hb % nheads + Q += off_b * stride_qb + off_h * stride_qh + K += off_b * stride_kb + off_h * stride_kh + V += off_b * stride_vb + off_h * stride_vh + DO += off_b * stride_dob + off_h * stride_doh + DQ += off_b * stride_dqb + off_h * stride_dqh + DK += off_b * stride_dkb + off_h * stride_dkh + DV += off_b * stride_dvb + off_h * stride_dvh + if BIAS_TYPE != 'none': + Bias += off_b * stride_bb + off_h * stride_bh + D += off_hb * seqlen_q_rounded + LSE += off_hb * seqlen_q_rounded + if not SEQUENCE_PARALLEL: + num_block_n = tl.cdiv(seqlen_k, BLOCK_N) + for start_n in range(0, num_block_n): + _bwd_kernel_one_col_block(start_n, Q, K, V, Bias, DO, DQ, DK, DV, LSE, D, softmax_scale, stride_qm, stride_kn, stride_vn, stride_bm, stride_dom, stride_dqm, stride_dkn, stride_dvn, seqlen_q, seqlen_k, headdim, ATOMIC_ADD=False, BIAS_TYPE=BIAS_TYPE, IS_CAUSAL=IS_CAUSAL, BLOCK_HEADDIM=BLOCK_HEADDIM, EVEN_M=EVEN_M, EVEN_N=EVEN_N, EVEN_HEADDIM=EVEN_HEADDIM, BLOCK_M=BLOCK_M, BLOCK_N=BLOCK_N) + else: + start_n = tl.program_id(0) + _bwd_kernel_one_col_block(start_n, Q, K, V, Bias, DO, DQ, DK, DV, LSE, D, softmax_scale, stride_qm, stride_kn, stride_vn, stride_bm, stride_dom, stride_dqm, stride_dkn, stride_dvn, seqlen_q, seqlen_k, headdim, ATOMIC_ADD=True, BIAS_TYPE=BIAS_TYPE, IS_CAUSAL=IS_CAUSAL, BLOCK_HEADDIM=BLOCK_HEADDIM, EVEN_M=EVEN_M, EVEN_N=EVEN_N, EVEN_HEADDIM=EVEN_HEADDIM, BLOCK_M=BLOCK_M, BLOCK_N=BLOCK_N) + +def _flash_attn_forward(q, k, v, bias=None, causal=False, softmax_scale=None): + (batch, seqlen_q, nheads, d) = q.shape + (_, seqlen_k, _, _) = k.shape + assert k.shape == (batch, seqlen_k, nheads, d) + assert v.shape == (batch, seqlen_k, nheads, d) + assert d <= 128, 'FlashAttention only support head dimensions up to 128' + assert q.dtype == k.dtype == v.dtype, 'All tensors must have the same type' + assert q.dtype in [torch.float16, torch.bfloat16], 'Only support fp16 and bf16' + assert q.is_cuda and k.is_cuda and v.is_cuda + softmax_scale = softmax_scale or 1.0 / math.sqrt(d) + has_bias = bias is not None + bias_type = 'none' + if has_bias: + assert bias.dtype in [q.dtype, torch.float] + assert bias.is_cuda + assert bias.dim() == 4 + if bias.stride(-1) != 1: + bias = bias.contiguous() + if bias.shape[2:] == (1, seqlen_k): + bias_type = 'vector' + elif bias.shape[2:] == (seqlen_q, seqlen_k): + bias_type = 'matrix' + else: + raise RuntimeError('Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)') + bias = bias.expand(batch, nheads, seqlen_q, seqlen_k) + bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0) + seqlen_q_rounded = math.ceil(seqlen_q / 128) * 128 + lse = torch.empty((batch, nheads, seqlen_q_rounded), device=q.device, dtype=torch.float32) + tmp = torch.empty((batch, nheads, seqlen_q_rounded), device=q.device, dtype=torch.float32) + o = torch.empty_like(q) + BLOCK_HEADDIM = max(triton.next_power_of_2(d), 16) + BLOCK = 128 + num_warps = 4 if d <= 64 else 8 + grid = lambda META: (triton.cdiv(seqlen_q, META['BLOCK_M']), batch * nheads) + _fwd_kernel[grid](q, k, v, bias, o, lse, tmp, softmax_scale, q.stride(0), q.stride(2), q.stride(1), k.stride(0), k.stride(2), k.stride(1), v.stride(0), v.stride(2), v.stride(1), *bias_strides, o.stride(0), o.stride(2), o.stride(1), nheads, seqlen_q, seqlen_k, seqlen_q_rounded, d, seqlen_q // 32, seqlen_k // 32, bias_type, causal, BLOCK_HEADDIM, BLOCK_M=BLOCK, BLOCK_N=BLOCK, num_warps=num_warps, num_stages=1) + return (o, lse, softmax_scale) + +def _flash_attn_backward(do, q, k, v, o, lse, dq, dk, dv, bias=None, causal=False, softmax_scale=None): + if do.stride(-1) != 1: + do = do.contiguous() + (batch, seqlen_q, nheads, d) = q.shape + (_, seqlen_k, _, _) = k.shape + assert d <= 128 + seqlen_q_rounded = math.ceil(seqlen_q / 128) * 128 + assert lse.shape == (batch, nheads, seqlen_q_rounded) + assert q.stride(-1) == k.stride(-1) == v.stride(-1) == o.stride(-1) == 1 + assert dq.stride(-1) == dk.stride(-1) == dv.stride(-1) == 1 + softmax_scale = softmax_scale or 1.0 / math.sqrt(d) + dq_accum = torch.empty_like(q, dtype=torch.float32) + delta = torch.empty_like(lse) + BLOCK_HEADDIM = max(triton.next_power_of_2(d), 16) + grid = lambda META: (triton.cdiv(seqlen_q, META['BLOCK_M']), batch * nheads) + _bwd_preprocess_do_o_dot[grid](o, do, delta, o.stride(0), o.stride(2), o.stride(1), do.stride(0), do.stride(2), do.stride(1), nheads, seqlen_q, seqlen_q_rounded, d, BLOCK_M=128, BLOCK_HEADDIM=BLOCK_HEADDIM) + has_bias = bias is not None + bias_type = 'none' + if has_bias: + assert bias.dtype in [q.dtype, torch.float] + assert bias.is_cuda + assert bias.dim() == 4 + assert bias.stride(-1) == 1 + if bias.shape[2:] == (1, seqlen_k): + bias_type = 'vector' + elif bias.shape[2:] == (seqlen_q, seqlen_k): + bias_type = 'matrix' + else: + raise RuntimeError('Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)') + bias = bias.expand(batch, nheads, seqlen_q, seqlen_k) + bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0) + grid = lambda META: (triton.cdiv(seqlen_k, META['BLOCK_N']) if META['SEQUENCE_PARALLEL'] else 1, batch * nheads) + _bwd_kernel[grid](q, k, v, bias, do, dq_accum, dk, dv, lse, delta, softmax_scale, q.stride(0), q.stride(2), q.stride(1), k.stride(0), k.stride(2), k.stride(1), v.stride(0), v.stride(2), v.stride(1), *bias_strides, do.stride(0), do.stride(2), do.stride(1), dq_accum.stride(0), dq_accum.stride(2), dq_accum.stride(1), dk.stride(0), dk.stride(2), dk.stride(1), dv.stride(0), dv.stride(2), dv.stride(1), nheads, seqlen_q, seqlen_k, seqlen_q_rounded, d, seqlen_q // 32, seqlen_k // 32, bias_type, causal, BLOCK_HEADDIM) + dq.copy_(dq_accum) + +class FlashAttnQKVPackedFunc(torch.autograd.Function): + + @staticmethod + def forward(ctx, qkv, bias=None, causal=False, softmax_scale=None): + """ + qkv: (batch, seqlen, 3, nheads, headdim) + bias: optional, shape broadcastible to (batch, nheads, seqlen, seqlen). + For example, ALiBi mask for causal would have shape (1, nheads, 1, seqlen). + ALiBi mask for non-causal would have shape (1, nheads, seqlen, seqlen) + """ + if qkv.stride(-1) != 1: + qkv = qkv.contiguous() + (o, lse, ctx.softmax_scale) = _flash_attn_forward(qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2], bias=bias, causal=causal, softmax_scale=softmax_scale) + ctx.save_for_backward(qkv, o, lse, bias) + ctx.causal = causal + return o + + @staticmethod + def backward(ctx, do): + (qkv, o, lse, bias) = ctx.saved_tensors + assert not ctx.needs_input_grad[1], 'FlashAttention does not support bias gradient yet' + with torch.inference_mode(): + dqkv = torch.empty_like(qkv) + _flash_attn_backward(do, qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2], o, lse, dqkv[:, :, 0], dqkv[:, :, 1], dqkv[:, :, 2], bias=bias, causal=ctx.causal, softmax_scale=ctx.softmax_scale) + return (dqkv, None, None, None) +flash_attn_qkvpacked_func = FlashAttnQKVPackedFunc.apply + +class FlashAttnKVPackedFunc(torch.autograd.Function): + + @staticmethod + def forward(ctx, q, kv, bias=None, causal=False, softmax_scale=None): + """ + q: (batch, seqlen_q, nheads, headdim) + kv: (batch, seqlen_k, 2, nheads, headdim) + bias: optional, shape broadcastible to (batch, nheads, seqlen_q, seqlen_k). + For example, ALiBi mask for causal would have shape (1, nheads, 1, seqlen_k). + ALiBi mask for non-causal would have shape (1, nheads, seqlen_q, seqlen_k) + """ + (q, kv) = [x if x.stride(-1) == 1 else x.contiguous() for x in [q, kv]] + (o, lse, ctx.softmax_scale) = _flash_attn_forward(q, kv[:, :, 0], kv[:, :, 1], bias=bias, causal=causal, softmax_scale=softmax_scale) + ctx.save_for_backward(q, kv, o, lse, bias) + ctx.causal = causal + return o + + @staticmethod + def backward(ctx, do): + (q, kv, o, lse, bias) = ctx.saved_tensors + if len(ctx.needs_input_grad) >= 3: + assert not ctx.needs_input_grad[2], 'FlashAttention does not support bias gradient yet' + with torch.inference_mode(): + dq = torch.empty_like(q) + dkv = torch.empty_like(kv) + _flash_attn_backward(do, q, kv[:, :, 0], kv[:, :, 1], o, lse, dq, dkv[:, :, 0], dkv[:, :, 1], bias=bias, causal=ctx.causal, softmax_scale=ctx.softmax_scale) + return (dq, dkv, None, None, None) +flash_attn_kvpacked_func = FlashAttnKVPackedFunc.apply + +class FlashAttnFunc(torch.autograd.Function): + + @staticmethod + def forward(ctx, q, k, v, bias=None, causal=False, softmax_scale=None): + """ + q: (batch_size, seqlen_q, nheads, headdim) + k, v: (batch_size, seqlen_k, nheads, headdim) + bias: optional, shape broadcastible to (batch, nheads, seqlen_q, seqlen_k). + For example, ALiBi mask for causal would have shape (1, nheads, 1, seqlen_k). + ALiBi mask for non-causal would have shape (1, nheads, seqlen_q, seqlen_k) + """ + (q, k, v) = [x if x.stride(-1) == 1 else x.contiguous() for x in [q, k, v]] + (o, lse, ctx.softmax_scale) = _flash_attn_forward(q, k, v, bias=bias, causal=causal, softmax_scale=softmax_scale) + ctx.save_for_backward(q, k, v, o, lse, bias) + ctx.causal = causal + return o + + @staticmethod + def backward(ctx, do): + (q, k, v, o, lse, bias) = ctx.saved_tensors + assert not ctx.needs_input_grad[3], 'FlashAttention does not support bias gradient yet' + with torch.inference_mode(): + dq = torch.empty_like(q) + dk = torch.empty_like(k) + dv = torch.empty_like(v) + _flash_attn_backward(do, q, k, v, o, lse, dq, dk, dv, bias=bias, causal=ctx.causal, softmax_scale=ctx.softmax_scale) + return (dq, dk, dv, None, None, None) +flash_attn_func = FlashAttnFunc.apply \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/hf_prefixlm_converter.py b/dam/model/language_model/mpt_ignored/hf_prefixlm_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..8c1a6487202a6400a7116a6bd68b493892ef0d14 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/hf_prefixlm_converter.py @@ -0,0 +1,415 @@ +"""Converts Huggingface Causal LM to Prefix LM. + +Conversion does lightweight surgery on a HuggingFace +Causal LM to convert it to a Prefix LM. + +Prefix LMs accepts a `bidirectional_mask` input in `forward` +and treat the input prompt as the prefix in `generate`. +""" +import math +import warnings +from types import MethodType +from typing import Any, Dict, List, Optional, Tuple, Union +import torch +from transformers.models.bloom.modeling_bloom import BaseModelOutputWithPastAndCrossAttentions, BloomForCausalLM, BloomModel, CausalLMOutputWithCrossAttentions, CrossEntropyLoss +from transformers.models.bloom.modeling_bloom import _expand_mask as _expand_mask_bloom +from transformers.models.bloom.modeling_bloom import _make_causal_mask as _make_causal_mask_bloom +from transformers.models.bloom.modeling_bloom import logging +from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel +from transformers.models.gpt_neo.modeling_gpt_neo import GPTNeoForCausalLM +from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXForCausalLM +from transformers.models.gptj.modeling_gptj import GPTJForCausalLM +from transformers.models.opt.modeling_opt import OPTForCausalLM +from transformers.models.opt.modeling_opt import _expand_mask as _expand_mask_opt +from transformers.models.opt.modeling_opt import _make_causal_mask as _make_causal_mask_opt +logger = logging.get_logger(__name__) +_SUPPORTED_GPT_MODELS = (GPT2LMHeadModel, GPTJForCausalLM, GPTNeoForCausalLM, GPTNeoXForCausalLM) +CAUSAL_GPT_TYPES = Union[GPT2LMHeadModel, GPTJForCausalLM, GPTNeoForCausalLM, GPTNeoXForCausalLM] + +def _convert_gpt_causal_lm_to_prefix_lm(model: CAUSAL_GPT_TYPES) -> CAUSAL_GPT_TYPES: + """Converts a GPT-style Causal LM to a Prefix LM. + + Supported HuggingFace model classes: + - `GPT2LMHeadModel` + - `GPTNeoForCausalLM` + - `GPTNeoXForCausalLM` + - `GPTJForCausalLM` + + See `convert_hf_causal_lm_to_prefix_lm` for more details. + """ + if hasattr(model, '_prefix_lm_converted'): + return model + assert isinstance(model, _SUPPORTED_GPT_MODELS) + assert model.config.add_cross_attention == False, 'Only supports GPT-style decoder-only models' + + def _get_attn_modules(model: CAUSAL_GPT_TYPES) -> List[torch.nn.Module]: + """Helper that gets a list of the model's attention modules. + + Each module has a `bias` buffer used for causal masking. The Prefix LM + conversion adds logic to dynamically manipulate these biases to support + Prefix LM attention masking. + """ + attn_modules = [] + if isinstance(model, GPTNeoXForCausalLM): + blocks = model.gpt_neox.layers + else: + blocks = model.transformer.h + for block in blocks: + if isinstance(model, GPTNeoForCausalLM): + if block.attn.attention_type != 'global': + continue + attn_module = block.attn.attention + elif isinstance(model, GPTNeoXForCausalLM): + attn_module = block.attention + else: + attn_module = block.attn + attn_modules.append(attn_module) + return attn_modules + setattr(model, '_original_forward', getattr(model, 'forward')) + setattr(model, '_original_generate', getattr(model, 'generate')) + + def forward(self: CAUSAL_GPT_TYPES, input_ids: Optional[torch.LongTensor]=None, past_key_values: Optional[Tuple[Tuple[torch.Tensor]]]=None, attention_mask: Optional[torch.FloatTensor]=None, bidirectional_mask: Optional[torch.Tensor]=None, token_type_ids: Optional[torch.LongTensor]=None, position_ids: Optional[torch.LongTensor]=None, head_mask: Optional[torch.FloatTensor]=None, inputs_embeds: Optional[torch.FloatTensor]=None, labels: Optional[torch.LongTensor]=None, use_cache: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, return_dict: Optional[bool]=None): + """Wraps original forward to enable PrefixLM attention.""" + + def call_og_forward(): + if isinstance(self, GPTNeoXForCausalLM): + return self._original_forward(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, labels=labels, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict) + else: + return self._original_forward(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, labels=labels, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict) + if bidirectional_mask is None: + return call_og_forward() + assert isinstance(bidirectional_mask, torch.Tensor) + attn_modules = _get_attn_modules(model) + (b, s) = bidirectional_mask.shape + max_length = attn_modules[0].bias.shape[-1] + if s > max_length: + raise ValueError(f'bidirectional_mask sequence length (={s}) exceeds the ' + f'max length allowed by the model ({max_length}).') + assert s <= max_length + if s < max_length: + pad = torch.zeros((int(b), int(max_length - s)), dtype=bidirectional_mask.dtype, device=bidirectional_mask.device) + bidirectional_mask = torch.cat([bidirectional_mask, pad], dim=1) + bidirectional = bidirectional_mask.unsqueeze(1).unsqueeze(1) + for attn_module in attn_modules: + attn_module.bias.data = torch.logical_or(attn_module.bias.data, bidirectional) + output = call_og_forward() + for attn_module in attn_modules: + attn_module.bias.data = torch.tril(attn_module.bias.data[0, 0])[None, None] + return output + + def generate(self: CAUSAL_GPT_TYPES, *args: tuple, **kwargs: Dict[str, Any]): + """Wraps original generate to enable PrefixLM attention.""" + attn_modules = _get_attn_modules(model) + for attn_module in attn_modules: + attn_module.bias.data[:] = 1 + output = self._original_generate(*args, **kwargs) + for attn_module in attn_modules: + attn_module.bias.data = torch.tril(attn_module.bias.data[0, 0])[None, None] + return output + setattr(model, 'forward', MethodType(forward, model)) + setattr(model, 'generate', MethodType(generate, model)) + setattr(model, '_prefix_lm_converted', True) + return model + +def _convert_bloom_causal_lm_to_prefix_lm(model: BloomForCausalLM) -> BloomForCausalLM: + """Converts a BLOOM Causal LM to a Prefix LM. + + Supported HuggingFace model classes: + - `BloomForCausalLM` + + See `convert_hf_causal_lm_to_prefix_lm` for more details. + """ + if hasattr(model, '_prefix_lm_converted'): + return model + assert isinstance(model, BloomForCausalLM) + assert model.config.add_cross_attention == False, 'Only supports BLOOM decoder-only models' + + def _prepare_attn_mask(self: BloomModel, attention_mask: torch.Tensor, bidirectional_mask: Optional[torch.Tensor], input_shape: Tuple[int, int], past_key_values_length: int) -> torch.BoolTensor: + combined_attention_mask = None + device = attention_mask.device + (_, src_length) = input_shape + if src_length > 1: + combined_attention_mask = _make_causal_mask_bloom(input_shape, device=device, past_key_values_length=past_key_values_length) + if bidirectional_mask is not None: + assert attention_mask.shape == bidirectional_mask.shape + expanded_bidirectional_mask = _expand_mask_bloom(bidirectional_mask, tgt_length=src_length) + combined_attention_mask = torch.logical_and(combined_attention_mask, expanded_bidirectional_mask) + expanded_attn_mask = _expand_mask_bloom(attention_mask, tgt_length=src_length) + combined_attention_mask = expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask | combined_attention_mask + return combined_attention_mask + + def _build_alibi_tensor(self: BloomModel, batch_size: int, query_length: int, key_length: int, dtype: torch.dtype, device: torch.device) -> torch.Tensor: + num_heads = self.config.n_head + closest_power_of_2 = 2 ** math.floor(math.log2(num_heads)) + base = torch.tensor(2 ** (-2 ** (-(math.log2(closest_power_of_2) - 3))), device=device, dtype=torch.float32) + powers = torch.arange(1, 1 + closest_power_of_2, device=device, dtype=torch.int32) + slopes = torch.pow(base, powers) + if closest_power_of_2 != num_heads: + extra_base = torch.tensor(2 ** (-2 ** (-(math.log2(2 * closest_power_of_2) - 3))), device=device, dtype=torch.float32) + num_remaining_heads = min(closest_power_of_2, num_heads - closest_power_of_2) + extra_powers = torch.arange(1, 1 + 2 * num_remaining_heads, 2, device=device, dtype=torch.int32) + slopes = torch.cat([slopes, torch.pow(extra_base, extra_powers)], dim=0) + qa = torch.arange(query_length, device=device, dtype=torch.int32).view(-1, 1) + ka = torch.arange(key_length, device=device, dtype=torch.int32).view(1, -1) + diffs = qa - ka + key_length - query_length + diffs = -diffs.abs() + alibi = slopes.view(1, num_heads, 1, 1) * diffs.view(1, 1, query_length, key_length) + alibi = alibi.expand(batch_size, -1, -1, -1).reshape(-1, query_length, key_length) + return alibi.to(dtype) + KeyValueT = Tuple[torch.Tensor, torch.Tensor] + + def forward(self: BloomModel, input_ids: Optional[torch.LongTensor]=None, past_key_values: Optional[Tuple[KeyValueT, ...]]=None, attention_mask: Optional[torch.Tensor]=None, bidirectional_mask: Optional[torch.Tensor]=None, head_mask: Optional[torch.LongTensor]=None, inputs_embeds: Optional[torch.LongTensor]=None, use_cache: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, return_dict: Optional[bool]=None, **deprecated_arguments) -> Union[Tuple[torch.Tensor, ...], BaseModelOutputWithPastAndCrossAttentions]: + if deprecated_arguments.pop('position_ids', False) is not False: + warnings.warn('`position_ids` have no functionality in BLOOM and will be removed in v5.0.0. ' + 'You can safely ignore passing `position_ids`.', FutureWarning) + if len(deprecated_arguments) > 0: + raise ValueError(f'Got unexpected arguments: {deprecated_arguments}') + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + use_cache = use_cache if use_cache is not None else self.config.use_cache + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + if input_ids is not None and inputs_embeds is not None: + raise ValueError('You cannot specify both input_ids and inputs_embeds at the same time') + elif input_ids is not None: + (batch_size, seq_length) = input_ids.shape + elif inputs_embeds is not None: + (batch_size, seq_length, _) = inputs_embeds.shape + else: + raise ValueError('You have to specify either input_ids or inputs_embeds') + if past_key_values is None: + past_key_values = tuple([None] * len(self.h)) + head_mask = self.get_head_mask(head_mask, self.config.n_layer) + if inputs_embeds is None: + inputs_embeds = self.word_embeddings(input_ids) + hidden_states = self.word_embeddings_layernorm(inputs_embeds) + presents = () if use_cache else None + all_self_attentions = () if output_attentions else None + all_hidden_states = () if output_hidden_states else None + seq_length_with_past = seq_length + past_key_values_length = 0 + if past_key_values[0] is not None: + tmp = past_key_values[0][0] + past_key_values_length = tmp.shape[2] + seq_length_with_past = seq_length_with_past + past_key_values_length + if attention_mask is None: + attention_mask = torch.ones((batch_size, seq_length_with_past), device=hidden_states.device) + else: + attention_mask = attention_mask.to(hidden_states.device) + alibi = self._build_alibi_tensor(batch_size=batch_size, query_length=seq_length, key_length=seq_length_with_past, dtype=hidden_states.dtype, device=hidden_states.device) + causal_mask = self._prepare_attn_mask(attention_mask, bidirectional_mask, input_shape=(batch_size, seq_length), past_key_values_length=past_key_values_length) + for (i, (block, layer_past)) in enumerate(zip(self.h, past_key_values)): + if output_hidden_states: + hst = (hidden_states,) + all_hidden_states = all_hidden_states + hst + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning('`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...') + use_cache = False + + def create_custom_forward(module): + + def custom_forward(*inputs): + return module(*inputs, use_cache=use_cache, output_attentions=output_attentions) + return custom_forward + outputs = torch.utils.checkpoint.checkpoint(create_custom_forward(block), hidden_states, alibi, causal_mask, head_mask[i]) + else: + outputs = block(hidden_states, layer_past=layer_past, attention_mask=causal_mask, head_mask=head_mask[i], use_cache=use_cache, output_attentions=output_attentions, alibi=alibi) + hidden_states = outputs[0] + if use_cache is True: + presents = presents + (outputs[1],) + if output_attentions: + oa = (outputs[2 if use_cache else 1],) + all_self_attentions = all_self_attentions + oa + hidden_states = self.ln_f(hidden_states) + if output_hidden_states: + hst = (hidden_states,) + all_hidden_states = all_hidden_states + hst + if not return_dict: + return tuple((v for v in [hidden_states, presents, all_hidden_states, all_self_attentions] if v is not None)) + return BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=hidden_states, past_key_values=presents, hidden_states=all_hidden_states, attentions=all_self_attentions) + setattr(model.transformer, '_prepare_attn_mask', MethodType(_prepare_attn_mask, model.transformer)) + setattr(model.transformer, '_build_alibi_tensor', MethodType(_build_alibi_tensor, model.transformer)) + setattr(model.transformer, 'forward', MethodType(forward, model.transformer)) + KeyValueT = Tuple[torch.Tensor, torch.Tensor] + + def forward(self: BloomForCausalLM, input_ids: Optional[torch.LongTensor]=None, past_key_values: Optional[Tuple[KeyValueT, ...]]=None, attention_mask: Optional[torch.Tensor]=None, bidirectional_mask: Optional[torch.Tensor]=None, head_mask: Optional[torch.Tensor]=None, inputs_embeds: Optional[torch.Tensor]=None, labels: Optional[torch.Tensor]=None, use_cache: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, return_dict: Optional[bool]=None, **deprecated_arguments) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]: + """Replacement forward method for BloomCausalLM.""" + if deprecated_arguments.pop('position_ids', False) is not False: + warnings.warn('`position_ids` have no functionality in BLOOM and will be removed ' + 'in v5.0.0. You can safely ignore passing `position_ids`.', FutureWarning) + if len(deprecated_arguments) > 0: + raise ValueError(f'Got unexpected arguments: {deprecated_arguments}') + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + transformer_outputs = self.transformer(input_ids, past_key_values=past_key_values, attention_mask=attention_mask, bidirectional_mask=bidirectional_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict) + hidden_states = transformer_outputs[0] + lm_logits = self.lm_head(hidden_states) + loss = None + if labels is not None: + shift_logits = lm_logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + (batch_size, seq_length, vocab_size) = shift_logits.shape + loss_fct = CrossEntropyLoss() + loss = loss_fct(shift_logits.view(batch_size * seq_length, vocab_size), shift_labels.view(batch_size * seq_length)) + if not return_dict: + output = (lm_logits,) + transformer_outputs[1:] + return (loss,) + output if loss is not None else output + return CausalLMOutputWithCrossAttentions(loss=loss, logits=lm_logits, past_key_values=transformer_outputs.past_key_values, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions) + + def prepare_inputs_for_generation(self: BloomForCausalLM, input_ids: torch.LongTensor, past: Optional[torch.Tensor]=None, attention_mask: Optional[torch.Tensor]=None, **kwargs) -> dict: + if past: + input_ids = input_ids[:, -1].unsqueeze(-1) + bidirectional_mask = None + if past[0][0].shape[0] == input_ids.shape[0]: + past = self._convert_to_bloom_cache(past) + else: + bidirectional_mask = torch.ones_like(input_ids) + return {'input_ids': input_ids, 'past_key_values': past, 'use_cache': True, 'attention_mask': attention_mask, 'bidirectional_mask': bidirectional_mask} + setattr(model, 'forward', MethodType(forward, model)) + setattr(model, 'prepare_inputs_for_generation', MethodType(prepare_inputs_for_generation, model)) + setattr(model, '_prefix_lm_converted', True) + return model + +def _convert_opt_causal_lm_to_prefix_lm(model: OPTForCausalLM) -> OPTForCausalLM: + """Converts an OPT Causal LM to a Prefix LM. + + Supported HuggingFace model classes: + - `OPTForCausalLM` + + See `convert_hf_causal_lm_to_prefix_lm` for more details. + """ + if hasattr(model, '_prefix_lm_converted'): + return model + assert isinstance(model, OPTForCausalLM) + assert model.config.add_cross_attention == False, 'Only supports OPT decoder-only models' + setattr(model, '_original_forward', getattr(model, 'forward')) + setattr(model, '_original_generate', getattr(model, 'generate')) + model.model.decoder.bidirectional_mask = None + + def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length): + combined_attention_mask = None + if input_shape[-1] > 1: + if self.bidirectional_mask == 'g': + (bsz, src_length) = input_shape + combined_attention_mask = torch.zeros((bsz, 1, src_length, src_length + past_key_values_length), dtype=inputs_embeds.dtype, device=inputs_embeds.device) + else: + combined_attention_mask = _make_causal_mask_opt(input_shape, inputs_embeds.dtype, past_key_values_length=past_key_values_length).to(inputs_embeds.device) + if self.bidirectional_mask is not None: + assert attention_mask.shape == self.bidirectional_mask.shape + expanded_bidirectional_mask = _expand_mask_opt(self.bidirectional_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(inputs_embeds.device) + combined_attention_mask = torch.maximum(expanded_bidirectional_mask, combined_attention_mask) + if attention_mask is not None: + expanded_attn_mask = _expand_mask_opt(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(inputs_embeds.device) + combined_attention_mask = expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask + return combined_attention_mask + setattr(model.model.decoder, '_prepare_decoder_attention_mask', MethodType(_prepare_decoder_attention_mask, model.model.decoder)) + + def forward(self: OPTForCausalLM, input_ids: Optional[torch.LongTensor]=None, attention_mask: Optional[torch.Tensor]=None, bidirectional_mask: Optional[torch.ByteTensor]=None, head_mask: Optional[torch.Tensor]=None, past_key_values: Optional[List[torch.FloatTensor]]=None, inputs_embeds: Optional[torch.FloatTensor]=None, labels: Optional[torch.LongTensor]=None, use_cache: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, return_dict: Optional[bool]=None): + + def call_og_forward(): + return self._original_forward(input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, past_key_values=past_key_values, inputs_embeds=inputs_embeds, labels=labels, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict) + if bidirectional_mask is None: + return call_og_forward() + self.model.decoder.bidirectional_mask = bidirectional_mask + try: + outputs = call_og_forward() + except: + self.model.decoder.bidirectional_mask = None + raise + self.model.decoder.bidirectional_mask = None + return outputs + + def generate(self: OPTForCausalLM, *args: tuple, **kwargs: Dict[str, Any]): + """Wraps original generate to enable PrefixLM-style attention.""" + self.model.decoder.bidirectional_mask = 'g' + try: + output = self._original_generate(*args, **kwargs) + except: + self.model.decoder.bidirectional_mask = None + raise + self.model.decoder.bidirectional_mask = None + return output + setattr(model, 'forward', MethodType(forward, model)) + setattr(model, 'generate', MethodType(generate, model)) + setattr(model, '_prefix_lm_converted', True) + return model +_SUPPORTED_HF_MODELS = _SUPPORTED_GPT_MODELS + (BloomForCausalLM, OPTForCausalLM) +CAUSAL_LM_TYPES = Union[GPT2LMHeadModel, GPTJForCausalLM, GPTNeoForCausalLM, GPTNeoXForCausalLM, BloomForCausalLM, OPTForCausalLM] + +def convert_hf_causal_lm_to_prefix_lm(model: CAUSAL_LM_TYPES) -> CAUSAL_LM_TYPES: + """Converts a HuggingFace Causal LM to a Prefix LM. + + Supported HuggingFace model classes: + - `GPT2LMHeadModel` + - `GPTNeoForCausalLM` + - `GPTNeoXForCausalLM` + - `GPTJForCausalLM` + - `BloomForCausalLM` + - `OPTForCausalLM` + + Conversion to a Prefix LM is done by modifying the `forward` method, and possibly also the + `generate` method and/or select underlying methods depending on the model class. + + These changes preserve the model API, but add a new input to `forward`: "bidirectional_mask". + + Notes on training: + To actually train the converted model as a Prefix LM, training batches will need to indicate + the prefix/target structure by including `bidirectional_mask` as part of the batch inputs. + + **This is not a standard input and requires custom layers either within or after your dataloader.** + + In addition to adding `bidirectional_mask` to the batch, this custom code should modify `labels` + such that `batch['labels'][batch['bidirectional_mask'] == 1] == -100`. + That is, the prefix portion of the sequence should not generate any loss. Loss should only be + generated by the target portion of the sequence. + + Notes on `GPTNeoForCausalLM`: + To simplify the implementation, "global" and "local" attention layers are handled differently. + For "global" layers, we handle conversion as described above. For "local" layers, which use a + causal attention mask within a restricted local window, we do not alter the masking. + + Notes on `forward` method conversion: + After conversion, the `forward` method will handle a new input, `bidirectional_mask`, + which should be a [batch_size, seq_length] byte tensor, where 1 indicates token positions + belonging to the prefix (prefix tokens can attend to one another bidirectionally), and + 0 indicates token positions belonging to the target. + + The new `forward` method will incorporate `bidirectional_mask` (if supplied) into the existing + causal mask, call the original `forward` method, and (if the causal mask is a buffer) reset + the causal masks before returning the result. + + Notes on `generate` method conversion: + After conversion, the `generate` method will have the same signature but will internally + convert all causal masks to be purely bidirectional, call the original `generate` method, and + (where appropriate) reset the causal masks before returning the result. + + This works thanks to the logic of the HuggingFace `generate` API, which first encodes the token + "prompt" passed to `generate` (which is treated as the prefix) and then sequentially generates + each new token. Encodings are cached as generation happens, so all prefix tokens can attend to one + another (as expected in a Prefix LM) and generated tokens can only attend to prefix tokens and + previously-generated tokens (also as expected in a Prefix LM). + + To preserve the API, the original methods are renamed to `_original_forward` and + `_original_generate`, and replaced with new `forward` and `generate` methods that wrap + them, respectively. Although implementation details vary by model class. + """ + if isinstance(model, _SUPPORTED_GPT_MODELS): + return _convert_gpt_causal_lm_to_prefix_lm(model) + elif isinstance(model, BloomForCausalLM): + return _convert_bloom_causal_lm_to_prefix_lm(model) + elif isinstance(model, OPTForCausalLM): + return _convert_opt_causal_lm_to_prefix_lm(model) + else: + raise TypeError(f'Cannot convert model to Prefix LM. ' + f'Model does not belong to set of supported HF models:' + f'\n{_SUPPORTED_HF_MODELS}') + +def add_bidirectional_mask_if_missing(batch: Dict[str, Any]): + """Attempts to add bidirectional_mask to batch if missing. + + Raises: + KeyError if bidirectional_mask is missing and can't be inferred + """ + if 'bidirectional_mask' not in batch: + if batch.get('mode', None) == 'icl_task': + batch['bidirectional_mask'] = batch['attention_mask'].clone() + for (i, continuation_indices) in enumerate(batch['continuation_indices']): + batch['bidirectional_mask'][i, continuation_indices] = 0 + elif 'labels' in batch and 'attention_mask' in batch: + batch['bidirectional_mask'] = torch.logical_and(torch.eq(batch['attention_mask'], 1), torch.eq(batch['labels'], -100)).type_as(batch['attention_mask']) + else: + raise KeyError('No bidirectional_mask in batch and not sure how to construct one.') \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/meta_init_context.py b/dam/model/language_model/mpt_ignored/meta_init_context.py new file mode 100644 index 0000000000000000000000000000000000000000..6cba6fff0fe21fe222c7ab38eae44a9784c0be9c --- /dev/null +++ b/dam/model/language_model/mpt_ignored/meta_init_context.py @@ -0,0 +1,94 @@ +from contextlib import contextmanager +import torch +import torch.nn as nn + +@contextmanager +def init_empty_weights(include_buffers: bool=False): + """Meta initialization context manager. + + A context manager under which models are initialized with all parameters + on the meta device, therefore creating an empty model. Useful when just + initializing the model would blow the available RAM. + + Args: + include_buffers (`bool`, *optional*, defaults to `False`): Whether or + not to also put all buffers on the meta device while initializing. + + Example: + ```python + import torch.nn as nn + + # Initialize a model with 100 billions parameters in no time and without using any RAM. + with init_empty_weights(): + tst = nn.Sequential(*[nn.Linear(10000, 10000) for _ in range(1000)]) + ``` + + + + Any model created under this context manager has no weights. As such you can't do something like + `model.to(some_device)` with it. To load weights inside your empty model, see [`load_checkpoint_and_dispatch`]. + + + """ + with init_on_device(torch.device('meta'), include_buffers=include_buffers) as f: + yield f + +@contextmanager +def init_on_device(device: torch.device, include_buffers: bool=False): + """Device initialization context manager. + + A context manager under which models are initialized with all parameters + on the specified device. + + Args: + device (`torch.device`): Device to initialize all parameters on. + include_buffers (`bool`, *optional*, defaults to `False`): Whether or + not to also put all buffers on the meta device while initializing. + + Example: + ```python + import torch.nn as nn + + with init_on_device(device=torch.device("cuda")): + tst = nn.Liner(100, 100) # on `cuda` device + ``` + """ + old_register_parameter = nn.Module.register_parameter + if include_buffers: + old_register_buffer = nn.Module.register_buffer + + def register_empty_parameter(module, name, param): + old_register_parameter(module, name, param) + if param is not None: + param_cls = type(module._parameters[name]) + kwargs = module._parameters[name].__dict__ + module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs) + + def register_empty_buffer(module, name, buffer): + old_register_buffer(module, name, buffer) + if buffer is not None: + module._buffers[name] = module._buffers[name].to(device) + if include_buffers: + tensor_constructors_to_patch = {torch_function_name: getattr(torch, torch_function_name) for torch_function_name in ['empty', 'zeros', 'ones', 'full']} + else: + tensor_constructors_to_patch = {} + + def patch_tensor_constructor(fn): + + def wrapper(*args, **kwargs): + kwargs['device'] = device + return fn(*args, **kwargs) + return wrapper + try: + nn.Module.register_parameter = register_empty_parameter + if include_buffers: + nn.Module.register_buffer = register_empty_buffer + for torch_function_name in tensor_constructors_to_patch.keys(): + setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name))) + yield + finally: + nn.Module.register_parameter = old_register_parameter + if include_buffers: + nn.Module.register_buffer = old_register_buffer + for (torch_function_name, old_torch_function) in tensor_constructors_to_patch.items(): + setattr(torch, torch_function_name, old_torch_function) \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/modeling_mpt.py b/dam/model/language_model/mpt_ignored/modeling_mpt.py new file mode 100644 index 0000000000000000000000000000000000000000..13313441b13fc7a66cb65fd21b482a5de982e2c8 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/modeling_mpt.py @@ -0,0 +1,331 @@ +"""A simple, flexible implementation of a GPT model. + +Inspired by https://github.com/karpathy/minGPT/blob/master/mingpt/model.py +""" +import math +import warnings +from typing import List, Optional, Tuple, Union +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerFast +from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast +from .attention import attn_bias_shape, build_attn_bias +from .blocks import MPTBlock +from .custom_embedding import SharedEmbedding +from .norm import NORM_CLASS_REGISTRY +from .configuration_mpt import MPTConfig +from .adapt_tokenizer import AutoTokenizerForMOD, adapt_tokenizer_for_denoising +from .hf_prefixlm_converter import add_bidirectional_mask_if_missing, convert_hf_causal_lm_to_prefix_lm +from .meta_init_context import init_empty_weights +from .param_init_fns import MODEL_INIT_REGISTRY, generic_param_init_fn_ +try: + from .flash_attn_triton import flash_attn_func +except: + pass +Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] + +class MPTPreTrainedModel(PreTrainedModel): + config_class = MPTConfig + base_model_prefix = 'model' + _no_split_modules = ['MPTBlock'] + +class MPTModel(MPTPreTrainedModel): + + def __init__(self, config: MPTConfig): + config._validate_config() + super().__init__(config) + self.attn_impl = config.attn_config['attn_impl'] + self.prefix_lm = config.attn_config['prefix_lm'] + self.attn_uses_sequence_id = config.attn_config['attn_uses_sequence_id'] + self.alibi = config.attn_config['alibi'] + self.alibi_bias_max = config.attn_config['alibi_bias_max'] + if config.init_device == 'mixed': + if dist.get_local_rank() == 0: + config.init_device = 'cpu' + else: + config.init_device = 'meta' + if config.norm_type.lower() not in NORM_CLASS_REGISTRY.keys(): + norm_options = ' | '.join(NORM_CLASS_REGISTRY.keys()) + raise NotImplementedError(f'Requested norm type ({config.norm_type}) is not implemented within this repo (Options: {norm_options}).') + norm_class = NORM_CLASS_REGISTRY[config.norm_type.lower()] + self.embedding_fraction = config.embedding_fraction + self.wte = SharedEmbedding(config.vocab_size, config.d_model, device=config.init_device) + if not self.alibi: + self.wpe = torch.nn.Embedding(config.max_seq_len, config.d_model, device=config.init_device) + self.emb_drop = nn.Dropout(config.emb_pdrop) + self.blocks = nn.ModuleList([MPTBlock(device=config.init_device, **config.to_dict()) for _ in range(config.n_layers)]) + self.norm_f = norm_class(config.d_model, device=config.init_device) + if config.init_device != 'meta': + print(f'You are using config.init_device={config.init_device!r}, but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.') + self.apply(self.param_init_fn) + self.is_causal = not self.prefix_lm + self._attn_bias_initialized = False + self.attn_bias = None + self.attn_bias_shape = attn_bias_shape(self.attn_impl, config.n_heads, config.max_seq_len, self.alibi, prefix_lm=self.prefix_lm, causal=self.is_causal, use_sequence_id=self.attn_uses_sequence_id) + if config.no_bias: + for module in self.modules(): + if hasattr(module, 'bias') and isinstance(module.bias, nn.Parameter): + if config.verbose: + warnings.warn(f'Removing bias ({module.bias}) from {module}.') + module.register_parameter('bias', None) + if config.verbose and config.verbose > 2: + print(self) + if 'verbose' not in self.config.init_config: + self.config.init_config['verbose'] = self.config.verbose + if self.config.init_config['verbose'] > 1: + init_fn_name = self.config.init_config['name'] + warnings.warn(f'Using {init_fn_name} initialization.') + self.gradient_checkpointing = False + + def get_input_embeddings(self): + return self.wte + + def set_input_embeddings(self, value): + self.wte = value + + @torch.no_grad() + def _attn_bias(self, device, dtype, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None): + if not self._attn_bias_initialized: + if self.attn_bias_shape: + self.attn_bias = torch.zeros(self.attn_bias_shape, device=device, dtype=dtype) + self.attn_bias = build_attn_bias(self.attn_impl, self.attn_bias, self.config.n_heads, self.config.max_seq_len, causal=self.is_causal, alibi=self.alibi, alibi_bias_max=self.alibi_bias_max) + self._attn_bias_initialized = True + if self.attn_impl == 'flash': + return (self.attn_bias, attention_mask) + if self.attn_bias is not None: + self.attn_bias = self.attn_bias.to(dtype=dtype, device=device) + attn_bias = self.attn_bias + if self.prefix_lm: + assert isinstance(attn_bias, torch.Tensor) + assert isinstance(prefix_mask, torch.Tensor) + attn_bias = self._apply_prefix_mask(attn_bias, prefix_mask) + if self.attn_uses_sequence_id and sequence_id is not None: + assert isinstance(attn_bias, torch.Tensor) + attn_bias = self._apply_sequence_id(attn_bias, sequence_id) + if attention_mask is not None: + s_k = attention_mask.shape[-1] + if attn_bias is None: + attn_bias = torch.zeros((1, 1, 1, s_k), device=device, dtype=dtype) + else: + _s_k = max(0, attn_bias.size(-1) - s_k) + attn_bias = attn_bias[:, :, :, _s_k:] + if prefix_mask is not None and attention_mask.shape != prefix_mask.shape: + raise ValueError(f'attention_mask shape={attention_mask.shape} ' + f'and prefix_mask shape={prefix_mask.shape} are not equal.') + min_val = torch.finfo(attn_bias.dtype).min + attn_bias = attn_bias.masked_fill(~attention_mask.view(-1, 1, 1, s_k), min_val) + return (attn_bias, None) + + def _apply_prefix_mask(self, attn_bias: torch.Tensor, prefix_mask: torch.Tensor): + (s_k, s_q) = attn_bias.shape[-2:] + if s_k != self.config.max_seq_len or s_q != self.config.max_seq_len: + raise ValueError('attn_bias does not match the expected shape. ' + f'The last two dimensions should both be {self.config.max_length} ' + f'but are {s_k} and {s_q}.') + seq_len = prefix_mask.shape[-1] + if seq_len > self.config.max_seq_len: + raise ValueError(f'prefix_mask sequence length cannot exceed max_seq_len={self.config.max_seq_len}') + attn_bias = attn_bias[..., :seq_len, :seq_len] + causal = torch.tril(torch.ones((seq_len, seq_len), dtype=torch.bool, device=prefix_mask.device)).view(1, 1, seq_len, seq_len) + prefix = prefix_mask.view(-1, 1, 1, seq_len) + cannot_attend = ~torch.logical_or(causal, prefix.bool()) + min_val = torch.finfo(attn_bias.dtype).min + attn_bias = attn_bias.masked_fill(cannot_attend, min_val) + return attn_bias + + def _apply_sequence_id(self, attn_bias: torch.Tensor, sequence_id: torch.LongTensor): + seq_len = sequence_id.shape[-1] + if seq_len > self.config.max_seq_len: + raise ValueError(f'sequence_id sequence length cannot exceed max_seq_len={self.config.max_seq_len}') + attn_bias = attn_bias[..., :seq_len, :seq_len] + cannot_attend = torch.logical_not(torch.eq(sequence_id.view(-1, seq_len, 1), sequence_id.view(-1, 1, seq_len))).unsqueeze(1) + min_val = torch.finfo(attn_bias.dtype).min + attn_bias = attn_bias.masked_fill(cannot_attend, min_val) + return attn_bias + + def forward(self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple[torch.FloatTensor]]]=None, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None, return_dict: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, use_cache: Optional[bool]=None, inputs_embeds: Optional[torch.Tensor]=None): + return_dict = return_dict if return_dict is not None else self.config.return_dict + use_cache = use_cache if use_cache is not None else self.config.use_cache + if attention_mask is not None: + attention_mask = attention_mask.bool() + if prefix_mask is not None: + prefix_mask = prefix_mask.bool() + if not return_dict: + raise NotImplementedError('return_dict False is not implemented yet for MPT') + if output_attentions: + if self.attn_impl != 'torch': + raise NotImplementedError('output_attentions is not implemented for MPT when using attn_impl `flash` or `triton`.') + if attention_mask is not None and attention_mask[:, 0].sum() != attention_mask.shape[0] and self.training: + raise NotImplementedError('MPT does not support training with left padding.') + if self.prefix_lm and prefix_mask is None: + raise ValueError('prefix_mask is a required argument when MPT is configured with prefix_lm=True.') + if self.training: + if self.attn_uses_sequence_id and sequence_id is None: + raise ValueError('sequence_id is a required argument when MPT is configured with attn_uses_sequence_id=True ' + 'and the model is in train mode.') + elif self.attn_uses_sequence_id is False and sequence_id is not None: + warnings.warn('MPT received non-None input for `sequence_id` but is configured with attn_uses_sequence_id=False. ' + 'This input will be ignored. If you want the model to use `sequence_id`, set attn_uses_sequence_id to True.') + if input_ids is not None: + S = input_ids.size(1) + assert S <= self.config.max_seq_len, f'Cannot forward input with seq_len={S}, this model only supports seq_len<={self.config.max_seq_len}' + tok_emb = self.wte(input_ids) + else: + assert inputs_embeds is not None + assert self.alibi, 'inputs_embeds is not implemented for MPT unless for alibi.' + S = inputs_embeds.size(1) + tok_emb = inputs_embeds + if self.alibi: + x = tok_emb + else: + past_position = 0 + if past_key_values is not None: + if len(past_key_values) != self.config.n_layers: + raise ValueError(f'past_key_values must provide a past_key_value for each attention ' + f'layer in the network (len(past_key_values)={len(past_key_values)!r}; self.config.n_layers={self.config.n_layers!r}).') + past_position = past_key_values[0][0].size(1) + if self.attn_impl == 'torch': + past_position = past_key_values[0][0].size(3) + if S + past_position > self.config.max_seq_len: + raise ValueError(f'Cannot forward input with past sequence length {past_position} and current sequence length {S + 1}, this model only supports total sequence length <= {self.config.max_seq_len}.') + pos = torch.arange(past_position, S + past_position, dtype=torch.long, device=input_ids.device).unsqueeze(0) + if attention_mask is not None: + pos = torch.clamp(pos - torch.cumsum((~attention_mask).to(torch.int32), dim=1)[:, past_position:], min=0) + pos_emb = self.wpe(pos) + x = tok_emb + pos_emb + if self.embedding_fraction == 1: + x = self.emb_drop(x) + else: + x_shrunk = x * self.embedding_fraction + x.detach() * (1 - self.embedding_fraction) + assert isinstance(self.emb_drop, nn.Module) + x = self.emb_drop(x_shrunk) + (attn_bias, attention_mask) = self._attn_bias(device=x.device, dtype=torch.float32, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id) + if use_cache and past_key_values is None: + past_key_values = [() for _ in range(self.config.n_layers)] + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + for (b_idx, block) in enumerate(self.blocks): + if output_hidden_states: + assert all_hidden_states is not None + all_hidden_states = all_hidden_states + (x,) + past_key_value = past_key_values[b_idx] if past_key_values is not None else None + if self.gradient_checkpointing and self.training: + (x, attn_weights, past_key_value) = torch.utils.checkpoint.checkpoint(block, x, past_key_value, attn_bias, attention_mask, self.is_causal) + else: + (x, attn_weights, past_key_value) = block(x, past_key_value=past_key_value, attn_bias=attn_bias, attention_mask=attention_mask, is_causal=self.is_causal) + if past_key_values is not None: + past_key_values[b_idx] = past_key_value + if output_attentions: + assert all_self_attns is not None + all_self_attns = all_self_attns + (attn_weights,) + x = self.norm_f(x) + if output_hidden_states: + assert all_hidden_states is not None + all_hidden_states = all_hidden_states + (x,) + return BaseModelOutputWithPast(last_hidden_state=x, past_key_values=past_key_values, hidden_states=all_hidden_states, attentions=all_self_attns) + + def param_init_fn(self, module): + init_fn_name = self.config.init_config['name'] + MODEL_INIT_REGISTRY[init_fn_name](module=module, n_layers=self.config.n_layers, d_model=self.config.d_model, **self.config.init_config) + + def fsdp_wrap_fn(self, module): + return isinstance(module, MPTBlock) + + def activation_checkpointing_fn(self, module): + return isinstance(module, MPTBlock) + +class MPTForCausalLM(MPTPreTrainedModel): + + def __init__(self, config: MPTConfig): + super().__init__(config) + if not config.tie_word_embeddings: + raise ValueError('MPTForCausalLM only supports tied word embeddings') + print(f'Instantiating an MPTForCausalLM model from {__file__}') + self.transformer = MPTModel(config) + for child in self.transformer.children(): + if isinstance(child, torch.nn.ModuleList): + continue + if isinstance(child, torch.nn.Module): + child._fsdp_wrap = True + self.logit_scale = None + if config.logit_scale is not None: + logit_scale = config.logit_scale + if isinstance(logit_scale, str): + if logit_scale == 'inv_sqrt_d_model': + logit_scale = 1 / math.sqrt(config.d_model) + else: + raise ValueError(f"logit_scale={logit_scale!r} is not recognized as an option; use numeric value or 'inv_sqrt_d_model'.") + self.logit_scale = logit_scale + + def get_input_embeddings(self): + return self.transformer.wte + + def set_input_embeddings(self, value): + self.transformer.wte = value + + def get_output_embeddings(self): + return self.transformer.wte + + def set_output_embeddings(self, new_embeddings): + self.transformer.wte = new_embeddings + + def set_decoder(self, decoder): + self.transformer = decoder + + def get_decoder(self): + return self.transformer + + def forward(self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple[torch.FloatTensor]]]=None, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None, labels: Optional[torch.LongTensor]=None, return_dict: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, use_cache: Optional[bool]=None, inputs_embeds: Optional[torch.FloatTensor]=None): + return_dict = return_dict if return_dict is not None else self.config.return_dict + use_cache = use_cache if use_cache is not None else self.config.use_cache + if inputs_embeds is not None: + raise NotImplementedError('inputs_embeds has to be None (for hf/peft support).') + outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache) + logits = self.transformer.wte(outputs.last_hidden_state.to(self.transformer.wte.weight.device), True) + if self.logit_scale is not None: + if self.logit_scale == 0: + warnings.warn(f'Multiplying logits by self.logit_scale={self.logit_scale!r}. This will produce uniform (uninformative) outputs.') + logits *= self.logit_scale + loss = None + if labels is not None: + labels = torch.roll(labels, shifts=-1) + labels[:, -1] = -100 + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.to(logits.device).view(-1)) + return CausalLMOutputWithPast(loss=loss, logits=logits, past_key_values=outputs.past_key_values, hidden_states=outputs.hidden_states, attentions=outputs.attentions) + + def param_init_fn(self, module): + init_fn_name = self.config.init_config['name'] + MODEL_INIT_REGISTRY[init_fn_name](module=module, n_layers=self.config.n_layers, d_model=self.config.d_model, **self.config.init_config) + + def fsdp_wrap_fn(self, module): + return isinstance(module, MPTBlock) + + def activation_checkpointing_fn(self, module): + return isinstance(module, MPTBlock) + + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): + if inputs_embeds is not None: + raise NotImplementedError('inputs_embeds is not implemented for MPT yet') + attention_mask = kwargs['attention_mask'].bool() + if attention_mask[:, -1].sum() != attention_mask.shape[0]: + raise NotImplementedError('MPT does not support generation with right padding.') + if self.transformer.attn_uses_sequence_id and self.training: + sequence_id = torch.zeros_like(input_ids[:1]) + else: + sequence_id = None + if past_key_values is not None: + input_ids = input_ids[:, -1].unsqueeze(-1) + if self.transformer.prefix_lm: + prefix_mask = torch.ones_like(attention_mask) + if kwargs.get('use_cache') == False: + raise NotImplementedError('MPT with prefix_lm=True does not support use_cache=False.') + else: + prefix_mask = None + return {'input_ids': input_ids, 'attention_mask': attention_mask, 'prefix_mask': prefix_mask, 'sequence_id': sequence_id, 'past_key_values': past_key_values, 'use_cache': kwargs.get('use_cache', True)} + + @staticmethod + def _reorder_cache(past_key_values, beam_idx): + """Used by HuggingFace generate when using beam search with kv-caching. + + See https://github.com/huggingface/transformers/blob/3ec7a47664ebe40c40f4b722f6bb1cd30c3821ec/src/transformers/models/gpt2/modeling_gpt2.py#L1122-L1133 + for an example in transformers. + """ + reordered_past = [] + for layer_past in past_key_values: + reordered_past += [tuple((past_state.index_select(0, beam_idx) for past_state in layer_past))] + return reordered_past \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/norm.py b/dam/model/language_model/mpt_ignored/norm.py new file mode 100644 index 0000000000000000000000000000000000000000..067b6140fae546e5cb49cb2b1e4e6af660ced60d --- /dev/null +++ b/dam/model/language_model/mpt_ignored/norm.py @@ -0,0 +1,56 @@ +import torch + +def _cast_if_autocast_enabled(tensor): + if torch.is_autocast_enabled(): + if tensor.device.type == 'cuda': + dtype = torch.get_autocast_gpu_dtype() + elif tensor.device.type == 'cpu': + dtype = torch.get_autocast_cpu_dtype() + else: + raise NotImplementedError() + return tensor.to(dtype=dtype) + return tensor + +class LPLayerNorm(torch.nn.LayerNorm): + + def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True, device=None, dtype=None): + super().__init__(normalized_shape=normalized_shape, eps=eps, elementwise_affine=elementwise_affine, device=device, dtype=dtype) + + def forward(self, x): + module_device = x.device + downcast_x = _cast_if_autocast_enabled(x) + downcast_weight = _cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight + downcast_bias = _cast_if_autocast_enabled(self.bias) if self.bias is not None else self.bias + with torch.autocast(enabled=False, device_type=module_device.type): + return torch.nn.functional.layer_norm(downcast_x, self.normalized_shape, downcast_weight, downcast_bias, self.eps) + +def rms_norm(x, weight=None, eps=1e-05): + output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + eps) + if weight is not None: + return output * weight + return output + +class RMSNorm(torch.nn.Module): + + def __init__(self, normalized_shape, eps=1e-05, weight=True, dtype=None, device=None): + super().__init__() + self.eps = eps + if weight: + self.weight = torch.nn.Parameter(torch.ones(normalized_shape, dtype=dtype, device=device)) + else: + self.register_parameter('weight', None) + + def forward(self, x): + return rms_norm(x.float(), self.weight, self.eps).to(dtype=x.dtype) + +class LPRMSNorm(RMSNorm): + + def __init__(self, normalized_shape, eps=1e-05, weight=True, dtype=None, device=None): + super().__init__(normalized_shape=normalized_shape, eps=eps, weight=weight, dtype=dtype, device=device) + + def forward(self, x): + downcast_x = _cast_if_autocast_enabled(x) + downcast_weight = _cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight + with torch.autocast(enabled=False, device_type=x.device.type): + return rms_norm(downcast_x, downcast_weight, self.eps).to(dtype=x.dtype) +NORM_CLASS_REGISTRY = {'layernorm': torch.nn.LayerNorm, 'low_precision_layernorm': LPLayerNorm, 'rmsnorm': RMSNorm, 'low_precision_rmsnorm': LPRMSNorm} \ No newline at end of file diff --git a/dam/model/language_model/mpt_ignored/param_init_fns.py b/dam/model/language_model/mpt_ignored/param_init_fns.py new file mode 100644 index 0000000000000000000000000000000000000000..418b83ca2363288046f4b48b1d706c5607341fb5 --- /dev/null +++ b/dam/model/language_model/mpt_ignored/param_init_fns.py @@ -0,0 +1,181 @@ +import math +import warnings +from collections.abc import Sequence +from functools import partial +from typing import Optional, Tuple, Union +import torch +from torch import nn +from .norm import NORM_CLASS_REGISTRY + +def torch_default_param_init_fn_(module: nn.Module, verbose: int=0, **kwargs): + del kwargs + if verbose > 1: + warnings.warn(f"Initializing network using module's reset_parameters attribute") + if hasattr(module, 'reset_parameters'): + module.reset_parameters() + +def fused_init_helper_(module: nn.Module, init_fn_): + _fused = getattr(module, '_fused', None) + if _fused is None: + raise RuntimeError(f'Internal logic error') + (dim, splits) = _fused + splits = (0, *splits, module.weight.size(dim)) + for (s, e) in zip(splits[:-1], splits[1:]): + slice_indices = [slice(None)] * module.weight.ndim + slice_indices[dim] = slice(s, e) + init_fn_(module.weight[slice_indices]) + +def generic_param_init_fn_(module: nn.Module, init_fn_, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, verbose: int=0, **kwargs): + del kwargs + if verbose > 1: + warnings.warn(f'If model has bias parameters they are initialized to 0.') + init_div_is_residual = init_div_is_residual + if init_div_is_residual is False: + div_is_residual = 1.0 + elif init_div_is_residual is True: + div_is_residual = math.sqrt(2 * n_layers) + elif isinstance(init_div_is_residual, float) or isinstance(init_div_is_residual, int): + div_is_residual = init_div_is_residual + elif isinstance(init_div_is_residual, str) and init_div_is_residual.isnumeric(): + div_is_residual = float(init_div_is_residual) + else: + div_is_residual = 1.0 + raise ValueError(f'Expected init_div_is_residual to be boolean or numeric, got {init_div_is_residual}') + if init_div_is_residual is not False: + if verbose > 1: + warnings.warn(f'Initializing _is_residual layers then dividing them by {div_is_residual:.3f}. ' + f'Set `init_div_is_residual: false` in init config to disable this.') + if isinstance(module, nn.Linear): + if hasattr(module, '_fused'): + fused_init_helper_(module, init_fn_) + else: + init_fn_(module.weight) + if module.bias is not None: + torch.nn.init.zeros_(module.bias) + if init_div_is_residual is not False and getattr(module, '_is_residual', False): + with torch.no_grad(): + module.weight.div_(div_is_residual) + elif isinstance(module, nn.Embedding): + if emb_init_std is not None: + std = emb_init_std + if std == 0: + warnings.warn(f'Embedding layer initialized to 0.') + emb_init_fn_ = partial(torch.nn.init.normal_, mean=0.0, std=std) + if verbose > 1: + warnings.warn(f'Embedding layer initialized using normal distribution with mean=0 and std={std!r}.') + elif emb_init_uniform_lim is not None: + lim = emb_init_uniform_lim + if isinstance(lim, Sequence): + if len(lim) > 2: + raise ValueError(f'Uniform init requires a min and a max limit. User input: {lim}.') + if lim[0] == lim[1]: + warnings.warn(f'Embedding layer initialized to {lim[0]}.') + else: + if lim == 0: + warnings.warn(f'Embedding layer initialized to 0.') + lim = [-lim, lim] + (a, b) = lim + emb_init_fn_ = partial(torch.nn.init.uniform_, a=a, b=b) + if verbose > 1: + warnings.warn(f'Embedding layer initialized using uniform distribution in range {lim}.') + else: + emb_init_fn_ = init_fn_ + emb_init_fn_(module.weight) + elif isinstance(module, tuple(set(NORM_CLASS_REGISTRY.values()))): + if verbose > 1: + warnings.warn(f'Norm weights are set to 1. If norm layer has a bias it is initialized to 0.') + if hasattr(module, 'weight') and module.weight is not None: + torch.nn.init.ones_(module.weight) + if hasattr(module, 'bias') and module.bias is not None: + torch.nn.init.zeros_(module.bias) + elif isinstance(module, nn.MultiheadAttention): + if module._qkv_same_embed_dim: + assert module.in_proj_weight is not None + assert module.q_proj_weight is None and module.k_proj_weight is None and (module.v_proj_weight is None) + assert d_model is not None + _d = d_model + splits = (0, _d, 2 * _d, 3 * _d) + for (s, e) in zip(splits[:-1], splits[1:]): + init_fn_(module.in_proj_weight[s:e]) + else: + assert module.q_proj_weight is not None and module.k_proj_weight is not None and (module.v_proj_weight is not None) + assert module.in_proj_weight is None + init_fn_(module.q_proj_weight) + init_fn_(module.k_proj_weight) + init_fn_(module.v_proj_weight) + if module.in_proj_bias is not None: + torch.nn.init.zeros_(module.in_proj_bias) + if module.bias_k is not None: + torch.nn.init.zeros_(module.bias_k) + if module.bias_v is not None: + torch.nn.init.zeros_(module.bias_v) + init_fn_(module.out_proj.weight) + if init_div_is_residual is not False and getattr(module.out_proj, '_is_residual', False): + with torch.no_grad(): + module.out_proj.weight.div_(div_is_residual) + if module.out_proj.bias is not None: + torch.nn.init.zeros_(module.out_proj.bias) + else: + for _ in module.parameters(recurse=False): + raise NotImplementedError(f'{module.__class__.__name__} parameters are not initialized by param_init_fn.') + +def _normal_init_(std, mean=0.0): + return partial(torch.nn.init.normal_, mean=mean, std=std) + +def _normal_param_init_fn_(module: nn.Module, std: float, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, verbose: int=0, **kwargs): + del kwargs + init_fn_ = _normal_init_(std=std) + if verbose > 1: + warnings.warn(f'Using torch.nn.init.normal_ init fn mean=0.0, std={std}') + generic_param_init_fn_(module=module, init_fn_=init_fn_, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def baseline_param_init_fn_(module: nn.Module, init_std: float, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, verbose: int=0, **kwargs): + del kwargs + if init_std is None: + raise ValueError("You must set model.init_config['init_std'] to a float value to use the default initialization scheme.") + _normal_param_init_fn_(module=module, std=init_std, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def small_param_init_fn_(module: nn.Module, n_layers: int, d_model: int, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, verbose: int=0, **kwargs): + del kwargs + std = math.sqrt(2 / (5 * d_model)) + _normal_param_init_fn_(module=module, std=std, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def neox_param_init_fn_(module: nn.Module, n_layers: int, d_model: int, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, verbose: int=0, **kwargs): + """From section 2.3.1 of GPT-NeoX-20B: + + An Open-Source AutoregressiveLanguage Model — Black et. al. (2022) + see https://github.com/EleutherAI/gpt-neox/blob/9610391ab319403cef079b438edd016a2443af54/megatron/model/init_functions.py#L151 + and https://github.com/EleutherAI/gpt-neox/blob/main/megatron/model/transformer.py + """ + del kwargs + residual_div = n_layers / math.sqrt(10) + if verbose > 1: + warnings.warn(f'setting init_div_is_residual to {residual_div}') + small_param_init_fn_(module=module, d_model=d_model, n_layers=n_layers, init_div_is_residual=residual_div, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def kaiming_uniform_param_init_fn_(module: nn.Module, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, init_gain: float=0, fan_mode: str='fan_in', init_nonlinearity: str='leaky_relu', verbose: int=0, **kwargs): + del kwargs + if verbose > 1: + warnings.warn(f'Using nn.init.kaiming_uniform_ init fn with parameters: ' + f'a={init_gain}, mode={fan_mode}, nonlinearity={init_nonlinearity}') + kaiming_uniform_ = partial(nn.init.kaiming_uniform_, a=init_gain, mode=fan_mode, nonlinearity=init_nonlinearity) + generic_param_init_fn_(module=module, init_fn_=kaiming_uniform_, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def kaiming_normal_param_init_fn_(module: nn.Module, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, init_gain: float=0, fan_mode: str='fan_in', init_nonlinearity: str='leaky_relu', verbose: int=0, **kwargs): + del kwargs + if verbose > 1: + warnings.warn(f'Using nn.init.kaiming_normal_ init fn with parameters: ' + f'a={init_gain}, mode={fan_mode}, nonlinearity={init_nonlinearity}') + kaiming_normal_ = partial(torch.nn.init.kaiming_normal_, a=init_gain, mode=fan_mode, nonlinearity=init_nonlinearity) + generic_param_init_fn_(module=module, init_fn_=kaiming_normal_, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def xavier_uniform_param_init_fn_(module: nn.Module, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, init_gain: float=0, verbose: int=0, **kwargs): + del kwargs + xavier_uniform_ = partial(torch.nn.init.xavier_uniform_, gain=init_gain) + if verbose > 1: + warnings.warn(f'Using torch.nn.init.xavier_uniform_ init fn with parameters: ' + f'gain={init_gain}') + generic_param_init_fn_(module=module, init_fn_=xavier_uniform_, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) + +def xavier_normal_param_init_fn_(module: nn.Module, n_layers: int, d_model: Optional[int]=None, init_div_is_residual: Union[int, float, str, bool]=True, emb_init_std: Optional[float]=None, emb_init_uniform_lim: Optional[Union[Tuple[float, float], float]]=None, init_gain: float=0, verbose: int=0, **kwargs): + xavier_normal_ = partial(torch.nn.init.xavier_normal_, gain=init_gain) + if verbose > 1: + warnings.warn(f'Using torch.nn.init.xavier_normal_ init fn with parameters: ' + f'gain={init_gain}') + generic_param_init_fn_(module=module, init_fn_=xavier_normal_, d_model=d_model, n_layers=n_layers, init_div_is_residual=init_div_is_residual, emb_init_std=emb_init_std, emb_init_uniform_lim=emb_init_uniform_lim, verbose=verbose) +MODEL_INIT_REGISTRY = {'default_': torch_default_param_init_fn_, 'baseline_': baseline_param_init_fn_, 'kaiming_uniform_': kaiming_uniform_param_init_fn_, 'kaiming_normal_': kaiming_normal_param_init_fn_, 'neox_init_': neox_param_init_fn_, 'small_init_': small_param_init_fn_, 'xavier_uniform_': xavier_uniform_param_init_fn_, 'xavier_normal_': xavier_normal_param_init_fn_} \ No newline at end of file diff --git a/dam/model/llava_arch.py b/dam/model/llava_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..82d119b58a293b7c29a54df2f03bcb1de35d60c9 --- /dev/null +++ b/dam/model/llava_arch.py @@ -0,0 +1,676 @@ +# Copyright 2023 Haotian Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys, os.path as osp +import warnings +from abc import ABC, abstractmethod + +import torch, logging + +from transformers import ( + AutoTokenizer, + AutoModel, + AutoModelForCausalLM, + AutoConfig, + BitsAndBytesConfig, + PretrainedConfig, + PreTrainedModel, +) + +from .constants import ( + DEFAULT_IM_END_TOKEN, + DEFAULT_IM_START_TOKEN, + DEFAULT_IMAGE_PATCH_TOKEN, + IGNORE_INDEX, + IMAGE_TOKEN_INDEX, + MASK_TOKEN_INDEX, +) + +from collections import OrderedDict +from .utils import get_model_config +from .language_model.builder import build_llm_and_tokenizer +from .multimodal_encoder.builder import build_vision_tower, build_context_provider +from .multimodal_projector.builder import build_mm_projector +from .configuration_llava import LlavaConfig + +from transformers.modeling_utils import ContextManagers, no_init_weights + +## TODO decide whether should we use metaclass +class LlavaMetaModel(ABC): + def init_vlm(self, config: PreTrainedModel = None, *args, **kwargs): + # TODO(ligeng): figure out how from_config and from_pretrained works in HF implementation. + if hasattr(self, "llm") or hasattr(self, "vision_tower") or hasattr(self, "mm_projector"): + # already initialized, skipped + return + + model_dtype = getattr(config, "model_dtype", "torch.float16") + if not hasattr(config, "model_dtype"): + warnings.warn("model_dtype not found in config, defaulting to torch.float16.") + config.model_dtype = model_dtype + + # print("init_vlm(): config", config); input("DEBUG init_vlm") + cfgs = get_model_config(config) + # Only the first three are required. Others are optional. + llm_cfg, vision_tower_cfg, mm_projector_cfg, mask_encoder_cfg, context_provider_cfg = cfgs + if llm_cfg is None or vision_tower_cfg is None or mm_projector_cfg is None: + raise ValueError("`llm_cfg` `mm_projector_cfg` `vision_tower_cfg` not found in the config.") + # print("init_vlm():", cfgs); input("DEBUG init_vlm") + # print(llm_cfg, vision_tower_cfg, mm_projector_cfg); input("DEBUG init_vlm") + self.llm, self.tokenizer = build_llm_and_tokenizer(llm_cfg, config, *args, **kwargs) + self.vision_tower = build_vision_tower(vision_tower_cfg, config) + self.mm_projector = build_mm_projector(mm_projector_cfg, config) + self.context_provider = build_context_provider(context_provider_cfg, config) if context_provider_cfg is not None else None + + self.post_config() + self.is_loaded = True + + assert ( + self.llm is not None or self.vision_tower is not None or self.mm_projector is not None + ), "At least one of the components must be instantiated." + + @classmethod + def load_from_config(cls, model_path_or_config, *args, **kwargs): + pass + + ## FIXME we will use this function to load model in the future + @classmethod + def load_pretrained(cls, model_path_or_config, *args, **kwargs): + kwargs.pop("config", None) + + if isinstance(model_path_or_config, str): + config = AutoConfig.from_pretrained(model_path_or_config) + elif isinstance(model_path_or_config, LlavaConfig): + config = model_path_or_config + else: + raise NotImplementedError(f"wrong type, {type(model_path_or_config)} \ + {isinstance(model_path_or_config, LlavaConfig)}") + + model_dtype = getattr(config, "model_dtype", "torch.float16") + if not hasattr(config, "model_dtype"): + warnings.warn("model_dtype not found in config, defaulting to torch.float16.") + config.model_dtype = model_dtype + + cfgs = get_model_config(config) + # Only the first three are required. Others are optional. + llm_cfg, vision_tower_cfg, mm_projector_cfg, mask_encoder_cfg, context_provider_cfg = cfgs + if llm_cfg is None or vision_tower_cfg is None or mm_projector_cfg is None: + raise ValueError("`llm_cfg` `mm_projector_cfg` `vision_tower_cfg` not found in the config.") + + # print(llm_cfg, vision_tower_cfg, mm_projector_cfg); input("DEBUG load_pretrained") + with ContextManagers([no_init_weights(_enable=True),]): + vlm = cls(config, *args, **kwargs) + # print(llm_cfg, vision_tower_cfg, mm_projector_cfg); input("DEBUG load_pretrained finish") + + if hasattr(vlm, "llm") or hasattr(vlm, "vision_tower") or hasattr(vlm, "mm_projector"): + if vlm.is_loaded: + return vlm + + vlm.llm, vlm.tokenizer = build_llm_and_tokenizer(llm_cfg, config, *args, **kwargs) + vlm.vision_tower = build_vision_tower(vision_tower_cfg, config) + vlm.mm_projector = build_mm_projector(mm_projector_cfg, config) + if mask_encoder_cfg is not None: + raise NotImplementedError("Mask encoder is not supported.") + vlm.context_provider = build_context_provider(context_provider_cfg, config) if context_provider_cfg is not None else None + + self.post_config() + self.is_loaded = True + + # FIXME(ligeng, yunhao): llm should never be none here. + assert ( + vlm.llm is not None or vlm.vision_tower is not None or vlm.mm_projector is not None + ), "At least one of the components must be instantiated." + return vlm + + ## FIXME we will use this function to save the model in the future + def save_pretrained(self, output_dir, state_dict=None): + if state_dict is None: + # other wise fetch from deepspeed + # state_dict = accelerator.get_state_dict(is_deepspeed_enabled) + state_dict = self.state_dict() + + if getattr(self, "tokenizer", None): + self.tokenizer.save_pretrained(osp.join(output_dir, "llm")) + + if self.get_llm(): + print(f"saving llm to {osp.join(output_dir, 'llm')}") + self.llm.config._name_or_path = osp.join(output_dir, "llm") + llm_state_dict = OrderedDict({k.split("llm.")[-1]: v for k, v in state_dict.items() if "llm" in k}) + self.llm.save_pretrained(os.path.join(output_dir, "llm"), state_dict=llm_state_dict) + self.config.llm_cfg = self.llm.config + + if self.get_vision_tower() and "radio" not in self.get_vision_tower().__class__.__name__.lower(): + print(f"saving vision_tower to {osp.join(output_dir, 'vision_tower')}") + self.vision_tower.config._name_or_path = osp.join(output_dir, "vision_tower") + vision_tower_state_dict = OrderedDict( + {k.split("vision_tower.vision_tower.")[-1]: v for k, v in state_dict.items() if "vision_tower" in k} + ) + self.vision_tower.vision_tower.save_pretrained( + os.path.join(output_dir, "vision_tower"), + state_dict=vision_tower_state_dict, + ) + self.vision_tower.image_processor.save_pretrained(os.path.join(output_dir, "vision_tower")) + self.config.vision_tower_cfg = self.vision_tower.config + if hasattr(self.config.vision_tower_cfg, 'auto_map'): + delattr(self.config.vision_tower_cfg, 'auto_map') + + if self.get_mm_projector(): + print(f"saving mm_projector to {osp.join(output_dir, 'mm_projector')}") + self.mm_projector.config._name_or_path = osp.join(output_dir, "mm_projector") + mm_projector_state_dict = OrderedDict( + {k.split("mm_projector.")[-1]: v for k, v in state_dict.items() if "mm_projector" in k} + ) + self.mm_projector.save_pretrained( + os.path.join(output_dir, "mm_projector"), + state_dict=mm_projector_state_dict, + ) + self.config.mm_projector_cfg = self.mm_projector.config + + if self.get_context_provider(): + print(f"saving context_provider to {osp.join(output_dir, 'context_provider')}") + self.context_provider.config._name_or_path = osp.join(output_dir, "context_provider") + context_provider_state_dict = OrderedDict( + {k.split("context_provider.")[-1]: v for k, v in state_dict.items() if "context_provider" in k} + ) + self.context_provider.save_pretrained( + os.path.join(output_dir, "context_provider"), + state_dict=context_provider_state_dict, + ) + self.config.context_provider_cfg = self.context_provider.config + + ## update and save top-level config + self.config._name_or_path = output_dir + self.config.architectures = [self.__class__.__name__] + self.config.save_pretrained(output_dir) + + + def get_llm(self): + llm = getattr(self, "llm", None) + if type(llm) is list: + llm = llm[0] + return llm + + def get_lm_head(self): + lm_head = getattr(self.get_llm(), "lm_head", None) + return lm_head + + def get_vision_tower(self): + vision_tower = getattr(self, "vision_tower", None) + if type(vision_tower) is list: + vision_tower = vision_tower[0] + return vision_tower + + def get_mm_projector(self): + mm_projector = getattr(self, "mm_projector", None) + if type(mm_projector) is list: + mm_projector = mm_projector[0] + return mm_projector + + def get_context_provider(self): + context_provider = getattr(self, "context_provider", None) + return context_provider + + def post_config(self): + self.training = self.get_llm().training + ## configuration + if getattr(self.config, "llm_cfg", None) is None: + self.config.llm_cfg = self.llm.config + if getattr(self.config, "vision_tower_cfg", None) is None: + self.config.vision_tower_cfg = self.vision_tower.config + if getattr(self.config, "mm_projector_cfg", None) is None: + self.config.mm_projector_cfg = self.mm_projector.config + if getattr(self.config, "context_provider_cfg", None) is None and self.context_provider is not None: + self.config.context_provider_cfg = self.context_provider.config + + def freezed_module_patch(self): + ''' + Huggingface will call model.train() at each training_step. To ensure the expected behaviors for modules like dropout, batchnorm, etc., we need to call model.eval() for the freezed modules. + ''' + if self.training: + if self.get_llm() and not getattr(self.config, "tune_language_model", False): + logging.warning("Caution: Your LLM is currently in training mode, ensuring accurate gradient computation. Please be vigilant, particularly regarding BatchNorm and Dropout operations.") + if self.get_vision_tower() and not getattr(self.config, "tune_vision_tower", False): + self.get_vision_tower().eval() + if self.get_mm_projector() and not getattr(self.config, "tune_mm_projector", False): + self.get_mm_projector().eval() + if self.get_context_provider() and not getattr(self.config, "tune_context_provider", False): + self.get_context_provider().eval() + + def encode_images(self, images): + image_features = self.get_vision_tower()(images) + image_features = self.get_mm_projector()(image_features) + return image_features + + def encode_images_with_context(self, images): + context_provider = self.get_context_provider() + # If the channels completely match, they are cimage (image with context). + cimage_mask = torch.any((images[:, :4, ...] != images[:, 4:, ...]).flatten(start_dim=1), dim=1) + + if context_provider.treat_image_as_cimage: + # If the context provider treats the image as cimage, then all images are cimage. + cimage_mask[:] = True + + if context_provider.context_image_as_queries: + # Swap the crop image and full image since the model uses the full image as queries by default + images = torch.cat((images[:, 4:, ...], images[:, :4, ...]), dim=1) + + # Process the first 4 channels for all images: for image it's the image, for cimage it's the full image + vision_tower = self.get_vision_tower() + # Encode context images (full images) + image_features = vision_tower(images[:, :4, ...]).to(self.device) + # Each cimage has 8 channels (full and crop concatenated) + cimage_concatenated = images[cimage_mask] + cimage_full_features = image_features[cimage_mask] + if context_provider.context_provider_type == "cross_attn_end_to_all": + cimage_features = self.context_provider( + cimage_full_features=cimage_full_features, + cimage_concatenated=cimage_concatenated, + vision_tower=vision_tower + ).to(self.device) + elif context_provider.context_provider_type == "concat": + # Full features of cimages are computed but not used. + cimage_features = self.context_provider( + cimage_concatenated=cimage_concatenated, + vision_tower=vision_tower + ).to(self.device) + else: + raise NotImplementedError(f"Context provider type {context_provider.context_provider_type} not implemented.") + # Put cimage_features into image_features + image_features[cimage_mask] = cimage_features + + # Project to the llm space + image_features = self.get_mm_projector()(image_features) + + return image_features + + ## @yunhao: is there a better way to handle function call and attributes for llm? + ## support beam search + def _temporary_reorder_cache(self, past_key_values, sorted_idx): + return self.get_llm()._temporary_reorder_cache(past_key_values, sorted_idx) + + def get_input_embeddings(self): + return self.get_llm().get_input_embeddings() + + def get_output_embeddings(self): + return self.get_llm().get_output_embeddings() + + def resize_token_embeddings(self, embed_size): + self.get_llm().resize_token_embeddings(embed_size) + + + +class LlavaMetaForCausalLM(ABC): + """This class is originally implemented by the LLaVA team and + modified by Haotian Tang and Jason Lu based on Ji Lin's implementation + to support multiple images and input packing.""" + + ## TODO move the forward function here if there is no need to override it + def prepare_inputs_labels_for_multimodal( + self, input_ids, position_ids, attention_mask, past_key_values, labels, images + ): + vision_tower = self.get_vision_tower() + if vision_tower is None or images is None or input_ids.shape[1] == 1: + if ( + past_key_values is not None + and vision_tower is not None + and images is not None + and input_ids.shape[1] == 1 + ): + target_shape = past_key_values[-1][-1].shape[-2] + 1 + attention_mask = torch.cat( + ( + attention_mask, + torch.ones( + ( + attention_mask.shape[0], + target_shape - attention_mask.shape[1], + ), + dtype=attention_mask.dtype, + device=attention_mask.device, + ), + ), + dim=1, + ) + position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 + return ( + input_ids, + position_ids, + attention_mask, + past_key_values, + None, + labels, + ) + # handle different image dtypes for packing + if type(images) is list: + images = torch.cat(images, dim=0) + elif images.ndim == 5: # batch_size x seq_len x image_channels + images = images.flatten(0, 1) + if getattr(self, "context_provider", None): + image_features = self.encode_images_with_context(images) + else: + # Since we slice it with index below, turning it into a list splits things by the first index which does not result in data copy or degrade performance. + # Example dimension: [1, 196, 2560] + assert images.shape[1] <= 4, f"images have more than 4 channels, but context provider is not included" + image_features = self.encode_images(images).to(self.device) + # Note (kentang-mit@): image start / end is not implemented here to support pretraining. + if getattr(self.config, "turn_mm_projector", False) and getattr(self.config, "mm_use_im_start_end", False): + raise NotImplementedError + + # Let's just add dummy tensors if they do not exist, + # it is a headache to deal with None all the time. + # But it is not ideal, and if you have a better idea, + # please open an issue / submit a PR, thanks. + _labels = labels + _position_ids = position_ids + _attention_mask = attention_mask + if attention_mask is None: + attention_mask = torch.ones_like(input_ids, dtype=torch.bool) + else: + attention_mask = attention_mask.bool() + if position_ids is None: + position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device) + if labels is None: + labels = torch.full_like(input_ids, IGNORE_INDEX) + + # remove the padding using attention_mask + input_ids_copy = input_ids.clone() + # kentang-mit@: Otherwise tokenizer out of bounds. Embeddings of image tokens will not be used. + input_ids_copy[input_ids_copy == IMAGE_TOKEN_INDEX] = 0 + input_embeds = self.llm.model.embed_tokens(input_ids_copy) + + input_ids = [ + cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask) + ] + input_embeds_1 = [ + cur_input_embeds[cur_attention_mask] + for cur_input_embeds, cur_attention_mask in zip(input_embeds, attention_mask) + ] + labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)] + + new_input_embeds = [] + new_labels = [] + cur_image_idx = 0 + + # print("BEFORE BATCH LOOP:", len(input_ids), input_ids[0].shape, input_ids[0].device, [(x == IMAGE_TOKEN_INDEX).sum() for x in input_ids]) + + # kentang-mit@: If some part of the model is executed in the loop, the the loop length needs to be a constant. + for batch_idx, cur_input_ids in enumerate(input_ids): + cur_input_ids = input_ids[batch_idx] + num_images = (cur_input_ids == IMAGE_TOKEN_INDEX).sum() + if num_images == 0: + cur_image_features = image_features[0] + # cur_input_embeds_1 = self.get_llm().embed_tokens(cur_input_ids) + cur_input_embeds_1 = input_embeds_1[batch_idx] + cur_input_embeds = torch.cat([cur_input_embeds_1, cur_image_features[0:0]], dim=0) + new_input_embeds.append(cur_input_embeds) + new_labels.append(labels[batch_idx]) + # kenang-mit@: we do not have placeholdr image for text-only data now. + # cur_image_idx += 1 + continue + + cur_input_embeds = input_embeds_1[batch_idx] + image_token_indices = ( + [-1] + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist() + [cur_input_ids.shape[0]] + ) + cur_input_ids_noim = [] + cur_labels = labels[batch_idx] + cur_labels_noim = [] + cur_input_embeds_no_im = [] + for i in range(len(image_token_indices) - 1): + cur_input_ids_noim.append(cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]]) + cur_labels_noim.append(cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]]) + cur_input_embeds_no_im.append(cur_input_embeds[image_token_indices[i] + 1 : image_token_indices[i + 1]]) + split_sizes = [x.shape[0] for x in cur_labels_noim] + # cur_input_embeds = self.get_llm().embed_tokens(torch.cat(cur_input_ids_noim)) + # cur_input_embeds_no_im = torch.split(cur_input_embeds, split_sizes, dim=0) + cur_new_input_embeds = [] + cur_new_labels = [] + for i in range(num_images + 1): + cur_new_input_embeds.append(cur_input_embeds_no_im[i]) + cur_new_labels.append(cur_labels_noim[i]) + if i < num_images: + cur_image_features = image_features[cur_image_idx] + cur_image_idx += 1 + cur_new_input_embeds.append(cur_image_features) + cur_new_labels.append( + torch.full( + (cur_image_features.shape[0],), + IGNORE_INDEX, + device=cur_labels.device, + dtype=cur_labels.dtype, + ) + ) + + cur_new_input_embeds = torch.cat(cur_new_input_embeds) + cur_new_labels = torch.cat(cur_new_labels) + + new_input_embeds.append(cur_new_input_embeds) + new_labels.append(cur_new_labels) + + # Truncate sequences to max length as image embeddings can make the sequence longer + tokenizer_model_max_length = getattr(self.llm.config, "tokenizer_model_max_length", None) + if tokenizer_model_max_length is not None: + if any(len(x) > tokenizer_model_max_length for x in new_input_embeds): + warnings.warn("Inputs truncated!") + new_input_embeds = [x[:tokenizer_model_max_length] for x in new_input_embeds] + new_labels = [x[:tokenizer_model_max_length] for x in new_labels] + # Combine them + max_len = max(x.shape[0] for x in new_input_embeds) + batch_size = len(new_input_embeds) + + new_input_embeds_padded = [] + new_labels_padded = torch.full( + (batch_size, max_len), + IGNORE_INDEX, + dtype=new_labels[0].dtype, + device=new_labels[0].device, + ) + attention_mask = torch.zeros( + (batch_size, max_len), + dtype=attention_mask.dtype, + device=attention_mask.device, + ) + position_ids = torch.zeros((batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device) + + for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)): + cur_len = cur_new_embed.shape[0] + if getattr(self.llm.config, "tokenizer_padding_side", "right") == "left": + new_input_embeds_padded.append( + torch.cat( + ( + torch.zeros( + (max_len - cur_len, cur_new_embed.shape[1]), + dtype=cur_new_embed.dtype, + device=cur_new_embed.device, + ), + cur_new_embed, + ), + dim=0, + ) + ) + if cur_len > 0: + new_labels_padded[i, -cur_len:] = cur_new_labels + attention_mask[i, -cur_len:] = True + position_ids[i, -cur_len:] = torch.arange( + 0, cur_len, dtype=position_ids.dtype, device=position_ids.device + ) + else: + new_input_embeds_padded.append( + torch.cat( + ( + cur_new_embed, + torch.zeros( + (max_len - cur_len, cur_new_embed.shape[1]), + dtype=cur_new_embed.dtype, + device=cur_new_embed.device, + ), + ), + dim=0, + ) + ) + if cur_len > 0: + new_labels_padded[i, :cur_len] = cur_new_labels + attention_mask[i, :cur_len] = True + position_ids[i, :cur_len] = torch.arange( + 0, cur_len, dtype=position_ids.dtype, device=position_ids.device + ) + + new_input_embeds = torch.stack(new_input_embeds_padded, dim=0) + + if _labels is None: + new_labels = None + else: + new_labels = new_labels_padded + + if _attention_mask is None: + attention_mask = None + else: + attention_mask = attention_mask.to(dtype=_attention_mask.dtype) + + if _position_ids is None: + position_ids = None + + return ( + None, + position_ids, + attention_mask, + past_key_values, + new_input_embeds, + new_labels, + ) + + def repack_multimodal_data( + self, + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels, + ): + # kentang-mit@: reorder and repack (reduce computation overhead) + # requires transformers replacement. + new_inputs_embeds = [] + new_position_ids = [] + new_labels = [] + seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32) + sorted_seqlens_in_batch, sorted_idx = torch.sort(seqlens_in_batch, descending=True) + # print(sorted_seqlens_in_batch) + max_seqlen = inputs_embeds.shape[1] + + cur_inputs_embeds = [] + cur_position_ids = [] + cur_labels = [] + cur_batch_len = 0 + # print(sorted_seqlens_in_batch.device, len(sorted_seqlens_in_batch), max_seqlen) + for i in range(len(sorted_seqlens_in_batch)): + cur_seqlen = sorted_seqlens_in_batch[i].item() + if cur_seqlen + cur_batch_len <= max_seqlen: + cur_batch_len += cur_seqlen + # each item: num_tokens x num_channels + # remove padding on-the-fly + cur_inputs_embeds.append(inputs_embeds[sorted_idx[i]][attention_mask[sorted_idx[i]]]) + # each item: num_tokens + cur_position_ids.append( + torch.arange( + cur_inputs_embeds[-1].shape[0], + device=cur_inputs_embeds[-1].device, + ) + ) + # each item: num_tokens + # remove padding on-the-fly + cur_labels.append(labels[sorted_idx[i]][attention_mask[sorted_idx[i]]]) + else: + new_inputs_embeds.append(torch.cat(cur_inputs_embeds, 0)) + new_position_ids.append(torch.cat(cur_position_ids, 0)) + new_labels.append(torch.cat(cur_labels, 0)) + # The current batch is too long. We will start a new batch. + cur_batch_len = cur_seqlen + cur_inputs_embeds = [inputs_embeds[sorted_idx[i]][attention_mask[sorted_idx[i]]]] + cur_position_ids = [ + torch.arange( + cur_inputs_embeds[-1].shape[0], + device=cur_inputs_embeds[-1].device, + ) + ] + cur_labels = [labels[sorted_idx[i]][attention_mask[sorted_idx[i]]]] + + if len(cur_inputs_embeds): + new_inputs_embeds.append(torch.cat(cur_inputs_embeds, 0)) + new_position_ids.append(torch.cat(cur_position_ids, 0)) + new_labels.append(torch.cat(cur_labels, 0)) + + # print(new_position_ids[0].device, [x.shape for x in new_inputs_embeds], [x.shape for x in new_labels], [x.shape for x in new_position_ids]) + # assert 0 + new_inputs_embeds = torch.nn.utils.rnn.pad_sequence( + new_inputs_embeds, batch_first=True, padding_value=self.llm.pad_token_id + ) + + new_position_ids = torch.nn.utils.rnn.pad_sequence(new_position_ids, batch_first=True, padding_value=-1) + + new_labels = torch.nn.utils.rnn.pad_sequence(new_labels, batch_first=True, padding_value=IGNORE_INDEX) + ## yunhao: it's currently a workaround to avoid errors for seq_len < 100 + new_attention_mask = new_position_ids.ne(-1) + # sanity check + assert new_attention_mask.sum() == attention_mask.sum() + # print(new_inputs_embeds.shape, (new_attention_mask.sum(1))) + # print(sorted_seqlens_in_batch.device, sorted_seqlens_in_batch, new_attention_mask.sum(1)) + + # return None, position_ids, attention_mask, past_key_values, new_input_embeds, new_labels + return ( + None, + new_position_ids, + new_attention_mask, + past_key_values, + new_inputs_embeds, + new_labels, + sorted_seqlens_in_batch, + ) + + def initialize_vision_tokenizer(self, model_args, tokenizer): + if model_args.mm_use_im_patch_token: + tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) + self.resize_token_embeddings(len(tokenizer)) + + if model_args.mm_use_im_start_end: + num_new_tokens = tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True) + self.resize_token_embeddings(len(tokenizer)) + + if num_new_tokens > 0: + input_embeddings = self.get_input_embeddings().weight.data + output_embeddings = self.get_output_embeddings().weight.data + + input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) + output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) + + input_embeddings[-num_new_tokens:] = input_embeddings_avg + output_embeddings[-num_new_tokens:] = output_embeddings_avg + ## TODO yunhao: handle cases for + if model_args.pretrain_mm_mlp_adapter: + mm_projector_weights = torch.load(model_args.pretrain_mm_mlp_adapter, map_location="cpu") + embed_tokens_weight = mm_projector_weights["model.embed_tokens.weight"] + assert num_new_tokens == 2 + if input_embeddings.shape == embed_tokens_weight.shape: + input_embeddings[-num_new_tokens:] = embed_tokens_weight[-num_new_tokens:] + elif embed_tokens_weight.shape[0] == num_new_tokens: + input_embeddings[-num_new_tokens:] = embed_tokens_weight + else: + raise ValueError( + f"Unexpected embed_tokens_weight shape. Pretrained: {embed_tokens_weight.shape}. Current: {input_embeddings.shape}. Numer of new tokens: {num_new_tokens}." + ) + elif model_args.mm_use_im_patch_token: + if model_args.mm_projector: + for p in self.get_input_embeddings().parameters(): + p.requires_grad = False + for p in self.get_output_embeddings().parameters(): + p.requires_grad = False diff --git a/dam/model/mm_utils.py b/dam/model/mm_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a90643e2c5eac574e09962f391e93f5b7544b86e --- /dev/null +++ b/dam/model/mm_utils.py @@ -0,0 +1,312 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +from PIL import Image +from io import BytesIO +import base64 +import numpy as np +import os + +import torch +from transformers import StoppingCriteria +from .constants import IMAGE_TOKEN_INDEX + +import tempfile +from io import BytesIO + + +def get_frame_from_vcap(vidcap, num_frames=10, fps=None, frame_count=None): + import cv2 + + if fps == None or frame_count == None: + # if one of fps or frame_count is None, still recompute + fps = vidcap.get(cv2.CAP_PROP_FPS) + frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) + if fps == 0 or frame_count == 0: + print("Video file not found. return empty images.") + return [ + Image.new("RGB", (720, 720)), + ] * num_frames + + duration = frame_count / fps + frame_interval = frame_count // num_frames + if frame_interval == 0 and frame_count <= 1: + print("frame_interval is equal to 0. return empty image.") + return [ + Image.new("RGB", (720, 720)), + ] * num_frames + # print("duration:", duration, "frames:", frame_count, "intervals:", frame_interval) + + images = [] + count = 0 + success = True + frame_indices = np.linspace(0, frame_count - 2, num_frames, dtype=int) + + while success: + # print("frame_count:", frame_count, "count:", count, "num_frames:", num_frames, "frame_interval:", frame_interval) + if frame_count >= num_frames: + success, frame = vidcap.read() + if count in frame_indices: + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + im_pil = Image.fromarray(img) + images.append(im_pil) + if len(images) >= num_frames: + return images + count += 1 + else: + # Left padding frames if the video is not long enough + success, frame = vidcap.read() + if success: + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + im_pil = Image.fromarray(img) + images.append(im_pil) + count += 1 + elif count >= 1: + width, height = images[-1].size + images = [Image.new("RGB", (width, height))] * \ + (num_frames - len(images)) + images + print("padding frames:", (num_frames - len(images))) + return images + else: + break + raise ValueError( + "Did not find enough frames in the video. return empty image.") + + +def opencv_extract_frames(vpath_or_bytesio, frames=6, fps=None, frame_count=None): + """ + Extract frames from a video using OpenCV. + + Args: + vpath_or_bytesio (str or BytesIO): Path to the video file or BytesIO object containing the video. + frames (int): Number of frames to extract from the video. + + Returns: + list: List of PIL Images extracted from the video. + + Raises: + NotImplementedError: If the type of `vpath_or_bytesio` is not supported. + """ + import cv2 + + if isinstance(vpath_or_bytesio, str): + vidcap = cv2.VideoCapture(vpath_or_bytesio) + return get_frame_from_vcap(vidcap, frames, fps=fps, frame_count=frame_count) + elif isinstance(vpath_or_bytesio, (BytesIO,)): + # assuming mp4 + with tempfile.NamedTemporaryFile(delete=True, suffix=".mp4") as temp_video: + temp_video.write(vpath_or_bytesio.read()) + temp_video_name = temp_video.name + vidcap = cv2.VideoCapture(temp_video_name) + return get_frame_from_vcap(vidcap, frames, fps=fps, frame_count=frame_count) + else: + raise NotImplementedError(type(vpath_or_bytesio)) + + +def load_image_from_base64(image): + return Image.open(BytesIO(base64.b64decode(image))) + + +def expand2square(pil_img, background_color): + """ + Expand the given PIL image to a square shape by adding padding. + + Parameters: + - pil_img: The PIL image to be expanded. + - background_color: The color of the padding to be added. + + Returns: + - The expanded PIL image. + + If the image is already square, it is returned as is. + If the image is wider than it is tall, padding is added to the top and bottom. + If the image is taller than it is wide, padding is added to the left and right. + """ + width, height = pil_img.size + if pil_img.mode == 'L': + background_color = background_color[0] + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + +def process_image(image_file, data_args, image_folder, pil_preprocess_fn=None): + processor = data_args.image_processor + if isinstance(image_file, str): + if image_folder is not None: + image = Image.open(os.path.join( + image_folder, image_file)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + else: + # image is stored in bytearray + image = image_file.convert("RGB") + + info = None + + if pil_preprocess_fn is not None: + image = pil_preprocess_fn(image) + if isinstance(image, tuple): + image, info = image + + if data_args.image_aspect_ratio == "resize": + if hasattr(data_args.image_processor, "crop_size"): + # CLIP vision tower + crop_size = data_args.image_processor.crop_size + else: + # SIGLIP vision tower + assert hasattr(data_args.image_processor, "size") + crop_size = data_args.image_processor.size + image = image.resize((crop_size["height"], crop_size["width"])) + if data_args.image_aspect_ratio == "pad": + + def expand2square(pil_img, background_color): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new( + pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new( + pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image, tuple(int(x * 255) + for x in processor.image_mean)) + image = processor.preprocess(image, return_tensors="pt")[ + "pixel_values"][0] + else: + # Using default behavior of the vision encoder + # For CLIP, default is central crop + # For Radio, default is central crop + # For Siglip, default is resize + # For InternVIT, default is resize + image = processor.preprocess(image, return_tensors="pt")[ + "pixel_values"][0] + if info is not None: + return image, info + return image + + +def process_images(images, image_processor, model_cfg): + + model_cfg.image_processor = image_processor + new_images = [process_image(image, model_cfg, None) for image in images] + + if all(x.shape == new_images[0].shape for x in new_images): + new_images = torch.stack(new_images, dim=0) + return new_images + + +# Note that newer VILA codebase adds an lstrip option that defaults to False, and the functionality is the same by default +def tokenizer_image_token( + prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None +): + prompt_chunks = [ + tokenizer(chunk).input_ids for chunk in prompt.split("")] + + def insert_separator(X, sep): + return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1] + + input_ids = [] + offset = 0 + if ( + len(prompt_chunks) > 0 + and len(prompt_chunks[0]) > 0 + and prompt_chunks[0][0] == tokenizer.bos_token_id + ): + offset = 1 + input_ids.append(prompt_chunks[0][0]) + + for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)): + input_ids.extend(x[offset:]) + + if return_tensors is not None: + if return_tensors == "pt": + return torch.tensor(input_ids, dtype=torch.long) + raise ValueError(f"Unsupported tensor type: {return_tensors}") + return input_ids + + +def is_gemma_tokenizer(tokenizer): + return "gemma" in tokenizer.__class__.__name__.lower() + + +def get_model_name_from_path(model_path): + model_path = model_path.strip("/") + model_paths = model_path.split("/") + if model_paths[-1].startswith("checkpoint-"): + return model_paths[-2] + "_" + model_paths[-1] + else: + return model_paths[-1] + + +class KeywordsStoppingCriteria(StoppingCriteria): + def __init__(self, keywords, tokenizer, input_ids): + self.keywords = keywords + self.keyword_ids = [] + self.max_keyword_len = 0 + for keyword in keywords: + cur_keyword_ids = tokenizer(keyword).input_ids + if ( + len(cur_keyword_ids) > 1 + and cur_keyword_ids[0] == tokenizer.bos_token_id + ): + cur_keyword_ids = cur_keyword_ids[1:] + if len(cur_keyword_ids) > self.max_keyword_len: + self.max_keyword_len = len(cur_keyword_ids) + self.keyword_ids.append(torch.tensor(cur_keyword_ids)) + self.tokenizer = tokenizer + self.start_len = input_ids.shape[1] + + def call_for_batch( + self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs + ) -> bool: + offset = min(output_ids.shape[1] - + self.start_len, self.max_keyword_len) + self.keyword_ids = [ + keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids + ] + for keyword_id in self.keyword_ids: + if (output_ids[0, -keyword_id.shape[0]:] == keyword_id).all(): + return True + outputs = self.tokenizer.batch_decode( + output_ids[:, -offset:], skip_special_tokens=True + )[0] + for keyword in self.keywords: + if keyword in outputs: + return True + return False + + def __call__( + self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs + ) -> bool: + outputs = [] + for i in range(output_ids.shape[0]): + outputs.append(self.call_for_batch( + output_ids[i].unsqueeze(0), scores)) + return all(outputs) diff --git a/dam/model/model_utils.py b/dam/model/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..417e45d65531caf3c8811be19ecd4eeeab373ffb --- /dev/null +++ b/dam/model/model_utils.py @@ -0,0 +1,268 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch +import torch.nn as nn +import os +import warnings +from typing import Optional, Union, List, Tuple +from transformers import ( + AutoTokenizer, + AutoModel, + AutoModelForCausalLM, + AutoConfig, + BitsAndBytesConfig, + PretrainedConfig, + PreTrainedModel, + LlamaConfig, + LlamaModel, +) +from transformers.modeling_outputs import CausalLMOutputWithPast +from transformers import PretrainedConfig + +from .llava_arch import LlavaMetaModel, LlavaMetaForCausalLM +from .language_model.llava_llama import LlavaLlamaConfig +# TODO: we may move LlavaConfig to configuration_llava.py +# from model.configuration_llava import LlavaConfig + +class LlavaLlamaModel(LlavaMetaModel, LlavaMetaForCausalLM, PreTrainedModel): + config_class = LlavaLlamaConfig + main_input_name = "input_embeds" + supports_gradient_checkpointing = True + + def __init__(self, config: LlavaLlamaConfig = None, *args, **kwargs) -> None: + super().__init__(config) + self.init_vlm(config=config, *args, **kwargs) + + @classmethod + def from_pretrained( + cls, + pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], + *model_args, + config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None, + cache_dir: Optional[Union[str, os.PathLike]] = None, + ignore_mismatched_sizes: bool = False, + force_download: bool = False, + local_files_only: bool = False, + token: Optional[Union[str, bool]] = None, + revision: str = "main", + use_safetensors: bool = None, + **kwargs, + ): + if hasattr(cls, "load_pretrained"): + return cls.load_pretrained(pretrained_model_name_or_path, + *model_args, config=config, cache_dir=cache_dir, ignore_mismatched_sizes=ignore_mismatched_sizes, force_download=force_download, local_files_only=local_files_only, token=token, + revision=revision, use_safetensors=use_safetensors, **kwargs + ) + return super(LlavaLlamaModel).from_pretrained(pretrained_model_name_or_path, + *model_args, config=config, cache_dir=cache_dir, ignore_mismatched_sizes=ignore_mismatched_sizes, force_download=force_download, local_files_only=local_files_only, token=token, + revision=revision, use_safetensors=use_safetensors, **kwargs) + + def forward( + self, + input_ids: torch.LongTensor = None, + images: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + self.freezed_module_patch() + if inputs_embeds is None: + ( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels, + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, position_ids, attention_mask, past_key_values, labels, images + ) + # Note (kentang-mit@): we have a unit test for this function. + if self.training: + ( + _, + new_position_ids, + new_attention_mask, + _, + new_inputs_embeds, + new_labels, + sorted_seqlens_in_batch, + ) = self.repack_multimodal_data( + input_ids, + position_ids, + attention_mask, + past_key_values, + inputs_embeds, + labels, + ) + new_input_ids = None + past_key_values = None + else: + new_attention_mask = attention_mask + new_position_ids = position_ids + new_inputs_embeds = inputs_embeds + new_labels = labels + sorted_seqlens_in_batch = attention_mask.sum(-1).int() + new_input_ids = input_ids + + outputs = self.llm.forward( + input_ids=new_input_ids, + attention_mask=new_attention_mask, + position_ids=new_position_ids, + past_key_values=past_key_values, + inputs_embeds=new_inputs_embeds, + labels=new_labels, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + seqlens_in_batch=sorted_seqlens_in_batch, + ) + return outputs + + @torch.no_grad() + def generate( + self, + input_ids: Optional[torch.FloatTensor] = None, + images: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + **generation_kwargs, + ): + if images is not None: + ( + _, + _, + attention_mask, + _, + inputs_embeds, + _, + ) = self.prepare_inputs_labels_for_multimodal( + input_ids, None, attention_mask, None, None, images + ) + else: + inputs_embeds = self.get_input_embeddings()(input_ids) + inputs_embeds = inputs_embeds.to(self.dtype) + + outputs = self.llm.generate( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + **generation_kwargs + ) + return outputs + + +def disable_torch_init(): + """ + Disable the redundant torch default initialization to accelerate model creation. + """ + import torch + setattr(torch.nn.Linear, "reset_parameters", lambda self: None) + setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None) + + +def load_pretrained_model( + model_path, + model_name, + model_base=None, + load_8bit=False, + load_4bit=False, + device_map="auto", + device="cuda", + **kwargs, +): + kwargs = {"device_map": device_map, **kwargs} + + if device != "cuda": + kwargs["device_map"] = {"": device} + + if load_8bit: + kwargs["load_in_8bit"] = True + elif load_4bit: + kwargs["load_in_4bit"] = True + kwargs["quantization_config"] = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + ) + else: + kwargs["torch_dtype"] = torch.float16 + + config = AutoConfig.from_pretrained(model_path) + config.resume_path = model_path + prepare_config_for_eval(config, kwargs) + + model = LlavaLlamaModel( + config=config, + low_cpu_mem_usage=True, + **kwargs + ) + tokenizer = model.tokenizer + + model.eval() + + # mm_use_im_start_end = getattr( + # model.config, "mm_use_im_start_end", False) + # mm_use_im_patch_token = getattr( + # model.config, "mm_use_im_patch_token", True) + # if mm_use_im_patch_token: + # tokenizer.add_tokens( + # [DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) + # if mm_use_im_start_end: + # tokenizer.add_tokens( + # [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True + # ) + + model.resize_token_embeddings(len(tokenizer)) + vision_tower = model.get_vision_tower() + vision_tower.to(device=device, dtype=torch.float16) + mm_projector = model.get_mm_projector() + mm_projector.to(device=device, dtype=torch.float16) + context_provider = model.get_context_provider() + if context_provider is not None: + context_provider.to(device=device, dtype=torch.float16) + image_processor = vision_tower.image_processor + + if hasattr(model.llm.config, "max_sequence_length"): + context_len = model.config.max_sequence_length + else: + context_len = 2048 + + return tokenizer, model, image_processor, context_len + + +def parse_model_name_or_path(config: PretrainedConfig, model_name="llm", suffix="_cfg"): + target_model = f"{model_name}{suffix}" + target_cfg = getattr(config, target_model, None) + + if isinstance(target_cfg, str): + return target_cfg + elif isinstance(target_cfg, dict): + return target_cfg["architectures"][0] + else: + raise ValueError(f"Invalid {target_model} configuration!") + + +def prepare_config_for_eval(config: PretrainedConfig, kwargs: dict): + try: + # compatible with deprecated config convention + if getattr(config, "vision_tower_cfg", None) is None: + config.vision_tower_cfg = config.mm_vision_tower + except AttributeError: + raise ValueError( + f"Invalid configuration! Cannot find vision_tower in config:\n{config}") + + config.model_dtype = kwargs.pop("torch_dtype").__str__() + # siglip does not support device_map = "auto" + vision_tower_name = parse_model_name_or_path(config, "vision_tower") + if "siglip" in vision_tower_name.lower(): + kwargs["device_map"] = "cuda" + + +AutoConfig.register("llava_llama", LlavaLlamaConfig) +AutoModel.register(LlavaLlamaConfig, LlavaLlamaModel) diff --git a/dam/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc b/dam/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b1234055c951fa4bb1ab827642aa7c6129e5c8f Binary files /dev/null and b/dam/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/__pycache__/context_provider.cpython-310.pyc b/dam/model/multimodal_encoder/__pycache__/context_provider.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0abb00ba806d791e716ab72ef178f695cc57dbf7 Binary files /dev/null and b/dam/model/multimodal_encoder/__pycache__/context_provider.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc b/dam/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38fa1857813913950716b4f013b47f9c03e54168 Binary files /dev/null and b/dam/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/__pycache__/vision_encoder.cpython-310.pyc b/dam/model/multimodal_encoder/__pycache__/vision_encoder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13ffbda202b1a01a26023b917736b1a21e8029eb Binary files /dev/null and b/dam/model/multimodal_encoder/__pycache__/vision_encoder.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/builder.py b/dam/model/multimodal_encoder/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..7357effbca4ca98b317aa0de9b8a4b36791983f4 --- /dev/null +++ b/dam/model/multimodal_encoder/builder.py @@ -0,0 +1,54 @@ +# This file is modified from https://github.com/haotian-liu/LLaVA/ +import torch +import os +from transformers import AutoConfig, PretrainedConfig, PreTrainedModel +from .siglip_encoder import SiglipVisionTower +from .context_provider import ContextProvider, ContextProviderConfig + +def build_vision_tower( + model_name_or_path: str, config: PretrainedConfig +) -> PreTrainedModel: + ## skip vision tower instantiation + if model_name_or_path is None: + return None + + vision_tower_arch = None + if config.resume_path and "radio" not in model_name_or_path: + assert os.path.exists( + model_name_or_path + ), f"Resume vision tower path {model_name_or_path} does not exist!" + vision_tower_cfg = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) + vision_tower_arch = vision_tower_cfg.architectures[0].lower() + vision_tower_name = ( + vision_tower_arch if vision_tower_arch is not None else model_name_or_path + ) + + if "siglip" in vision_tower_name: + vision_tower = SiglipVisionTower(model_name_or_path, config) + else: + raise ValueError(f"Unknown vision tower: {model_name_or_path}") + + config.mm_hidden_size = vision_tower.config.hidden_size + return vision_tower + +def build_context_provider( + model_type_or_path: str, config: PretrainedConfig +) -> PreTrainedModel: + if model_type_or_path is None: + return None + + ## load from pretrained model + if config.resume_path: + assert os.path.exists( + model_type_or_path + ), f"Resume context provider path {model_type_or_path} does not exist!" + return ContextProvider.from_pretrained( + model_type_or_path, config, torch_dtype=eval(config.model_dtype) + ) + ## build from scratch + else: + mm_projector_cfg = ContextProviderConfig(model_type_or_path) + mm_projector = ContextProvider(mm_projector_cfg, config).to( + eval(config.model_dtype) + ) + return mm_projector diff --git a/dam/model/multimodal_encoder/clip_encoder_ignored.py b/dam/model/multimodal_encoder/clip_encoder_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..286d656dfb1b3c9722026338e708e31a5129aaf2 --- /dev/null +++ b/dam/model/multimodal_encoder/clip_encoder_ignored.py @@ -0,0 +1,19 @@ +# This file is modified from https://github.com/haotian-liu/LLaVA/ +import torch + +from llava.model.multimodal_encoder.vision_encoder import VisionTower +from transformers import ( + PretrainedConfig, + CLIPVisionModel, + CLIPImageProcessor, +) + + +class CLIPVisionTower(VisionTower): + def __init__(self, model_name_or_path: str, config: PretrainedConfig): + super().__init__(model_name_or_path, config) + self.image_processor = CLIPImageProcessor.from_pretrained(model_name_or_path) + self.vision_tower = CLIPVisionModel.from_pretrained( + model_name_or_path, torch_dtype=eval(config.model_dtype) + ) + self.is_loaded = True diff --git a/dam/model/multimodal_encoder/context_provider.py b/dam/model/multimodal_encoder/context_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..744510d5866bb0e3e4f95a90a08f7c33926723ad --- /dev/null +++ b/dam/model/multimodal_encoder/context_provider.py @@ -0,0 +1,369 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import torch.nn as nn +import re +import torch +import torch.nn.functional as F +# import deepspeed +from transformers import AutoConfig, AutoModel, PretrainedConfig, PreTrainedModel +# from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled + +def is_deepspeed_zero3_enabled(*args, **kwargs): + return False + +class ContextProviderConfig(PretrainedConfig): + model_type = "context_provider" + + def __init__( + self, + context_provider_type: str=None, + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_attention_heads=12, + num_channels=3, + num_mask_channels=0, + image_size=224, + patch_size=16, + hidden_act="gelu_pytorch_tanh", + layer_norm_eps=1e-6, + attention_dropout=0.0, + zero_init_output=True, + residual_dropout=0.0, + context_image_as_queries=False, + context_provider_layer_indices=None, + masked_cross_attn=False, + crop_position_single_embedding=False, + trainable_crop_position_embedding=True, + crop_embedding_mode="add", + treat_image_as_cimage=False, + **kwargs, + ): + super().__init__(**kwargs) + + self.context_provider_type = context_provider_type + + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_attention_heads = num_attention_heads + self.num_channels = num_channels + self.num_mask_channels = num_mask_channels + self.patch_size = patch_size + self.image_size = image_size + self.attention_dropout = attention_dropout + self.layer_norm_eps = layer_norm_eps + self.hidden_act = hidden_act + + self.zero_init_output = zero_init_output + self.residual_dropout = residual_dropout + self.context_image_as_queries = context_image_as_queries + + # cross_attn_end_to_all + # the `num_hidden_layers` should be the same as the one in the vision tower + self.num_hidden_layers = num_hidden_layers + self.context_provider_layer_indices = context_provider_layer_indices + + self.masked_cross_attn = masked_cross_attn + # If enabled, crop_position_embedding (delta to full pos) will be updated during training. + self.trainable_crop_position_embedding = trainable_crop_position_embedding + # If enabled, crop_position_embedding (delta to full pos) will be a single embedding for all positions. + self.crop_position_single_embedding = crop_position_single_embedding + # add: delta. replace: do not add the original positional embedding + self.crop_embedding_mode = crop_embedding_mode + + # If True, the input image will be treated as a cimage (with mask as full 1s) + self.treat_image_as_cimage = treat_image_as_cimage + + +# Context Provider +from transformers.activations import ACT2FN +from typing import Any, Optional, Tuple, Union + +class ContextProviderCrossAttention(nn.Module): + """Multi-headed cross-attention from 'Attention Is All You Need' paper""" + + # Copied from transformers.models.clip.modeling_clip.CLIPAttention.__init__ + def __init__(self, config): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.embed_dim // self.num_heads + if self.head_dim * self.num_heads != self.embed_dim: + raise ValueError( + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." + ) + self.scale = self.head_dim**-0.5 + self.dropout = config.attention_dropout + + self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + + batch_size, q_len, _ = hidden_states.size() + batch_size, kv_len, _ = encoder_hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(encoder_hidden_states) + value_states = self.v_proj(encoder_hidden_states) + + query_states = query_states.view(batch_size, q_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, kv_len, self.num_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, kv_len, self.num_heads, self.head_dim).transpose(1, 2) + + k_v_seq_len = key_states.shape[-2] + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) * self.scale + + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + raise ValueError( + f"Attention weights should be of size {(batch_size, self.num_heads, q_len, k_v_seq_len)}, but is" + f" {attn_weights.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (batch_size, 1, q_len, k_v_seq_len): + raise ValueError( + f"Attention mask should be of size {(batch_size, 1, q_len, k_v_seq_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights + attention_mask + + # Visualizations (-inf are shown as white) + # import matplotlib.pyplot as plt + # plt.imshow(attention_mask[0, 0, 0].view(27, 27).detach().cpu().numpy()) + # plt.title("Attention mask") + # plt.colorbar() + # plt.show() + + # upcast attention to fp32 + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + + # Visualizations: show the attention weights of the first head, with the first query + # import matplotlib.pyplot as plt + # plt.imshow(attn_weights[0, 0, 0].view(27, 27).detach().cpu().numpy()) + # plt.title("Attention weights") + # plt.colorbar() + # plt.show() + + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(batch_size, self.num_heads, q_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, q_len, self.embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights + +class ContextProviderMLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.activation_fn = ACT2FN[config.hidden_act] + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = self.activation_fn(hidden_states) + hidden_states = self.fc2(hidden_states) + return hidden_states + + +def get_token_mask_bias(mask, patch_size): + # Note: mask should be (0, 1) + with torch.no_grad(): + # Add a channel dimension and perform conv + # mask_tokens_after_conv: (B, 1, H, W), example dimension: [1, 1, 27, 27] + mask_tokens_after_conv = F.conv2d( + input=mask[:, None], + weight=torch.ones( + (1, 1, patch_size, patch_size), + device=mask.device, dtype=mask.dtype + ), + bias=None, + stride=(patch_size, patch_size), + padding="valid" + ) + + token_mask_bias = torch.zeros_like(mask_tokens_after_conv) + token_mask_bias.masked_fill_(mask_tokens_after_conv < 1e-5, float("-inf")) + token_mask_bias = token_mask_bias.flatten(1) + + # Flattened dimension: (1, 729) + return token_mask_bias + +def attn_mask_from_cimage_concatenated(cimage_concatenated, patch_size): + # Use the mask from input image (4th channel) + mask_normalized = cimage_concatenated[:, 3] + mask_unnormalized = (mask_normalized + 1) / 2 + # (1, 729) + token_mask_bias = get_token_mask_bias(mask_unnormalized, patch_size=patch_size) + + # attn_mask: (B, 1, Q, KV) + # print("Token positions:", token_mask.nonzero()) + + # Obtain token mask in the bias format: in mask 0, out of mask -inf + q_kv = token_mask_bias.shape[-1] + attn_mask_bias = token_mask_bias[:, None, None, :].repeat(1, 1, q_kv, 1) + + # Visualizations + # print(f"token_mask_bias shape: {token_mask_bias.shape}, attn_mask_bias shape: {attn_mask_bias.shape}") + # import matplotlib.pyplot as plt + # plt.imshow(attn_mask_bias[0, 0, 0].view(27, 27).detach().cpu().numpy()) + # plt.title("Attention mask (outside)") + # plt.show() + + return attn_mask_bias + +# From SiglipEncoderLayer. We would like to modify this to cross-attention. +class CrossAttnEncoderLayer(nn.Module): + def __init__(self, config: ContextProviderConfig): + super().__init__() + self.embed_dim = config.hidden_size + self.cross_attn = ContextProviderCrossAttention(config) + self.residual_dropout = nn.Dropout(config.residual_dropout) + self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + self.mlp = ContextProviderMLP(config) + self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + + if config.zero_init_output: + # TODO: alternatively, we could parameterize with an MLP + # These factors are initialized with 0 (so only residual passes through) + if config.context_provider_type != "cross_attn_at_the_end": + self.register_parameter("attn_factor", nn.Parameter(torch.zeros((1,)))) + self.register_parameter("mlp_factor", nn.Parameter(torch.zeros((1,)))) + else: + # Use scalar tensor for compatibility + self.register_parameter("attn_factor", nn.Parameter(torch.zeros((1,)).view(()))) + self.register_parameter("mlp_factor", nn.Parameter(torch.zeros((1,)).view(()))) + else: + self.attn_factor = 1. + self.mlp_factor = 1. + + # Ignore copy + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.FloatTensor]: + """ + Args: + hidden_states (`torch.FloatTensor`): + Input to the layer of shape `(batch, seq_len, embed_dim)`. + attention_mask (`torch.FloatTensor`): + Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values. + output_attentions (`bool`, *optional*, defaults to `False`): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + """ + residual = hidden_states + + hidden_states = self.layer_norm1(hidden_states) + hidden_states, attn_weights = self.cross_attn( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + output_attentions=output_attentions, + ) + # Dropping the residual: let the model leverage more on the context + hidden_states = self.residual_dropout(residual) + self.attn_factor * hidden_states + + residual = hidden_states + hidden_states = self.layer_norm2(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + self.mlp_factor * hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (attn_weights,) + + return outputs + +class CrossAttnContextProviderEndToAll(nn.Module): + def __init__(self, config: ContextProviderConfig): + super().__init__() + self.layers = nn.ModuleList([ + CrossAttnEncoderLayer(config) for i in enumerate(range(config.num_hidden_layers)) if config.context_provider_layer_indices is None or i in config.context_provider_layer_indices + ]) + self.patch_size = config.patch_size + self.masked_cross_attn = config.masked_cross_attn + + def forward(self, context_image_features, cimage_concatenated, vision_tower): + # Use the mask from input image (4th channel) + if self.masked_cross_attn: + attn_mask = attn_mask_from_cimage_concatenated(cimage_concatenated, patch_size=self.patch_size) + else: + attn_mask = None + + detail_raw_image = cimage_concatenated[:, 4:, ...] + # NOTE: when using context image as queries, the context image was swapped with the detail image before passing into the context provider + outputs = vision_tower(detail_raw_image, context_provider_layers=self.layers, contexts=context_image_features, cross_attention_mask=attn_mask) + + return outputs + +class ContextProvider(PreTrainedModel): + config_class = ContextProviderConfig + + def __init__( + self, context_provider_cfg: ContextProviderConfig, config: PretrainedConfig + ): + super().__init__(context_provider_cfg) + + self.context_image_as_queries = context_provider_cfg.context_image_as_queries + self.context_provider_type = context_provider_type = context_provider_cfg.context_provider_type + + self.treat_image_as_cimage = context_provider_cfg.treat_image_as_cimage + + if self.context_image_as_queries: + assert not context_provider_cfg.masked_cross_attn, "Masked cross-attention not implemented when using context image as queries." + assert "concat" not in context_provider_type, "Concat not implemented when using context image as queries." + + if context_provider_type == "cross_attn_end_to_all": + # Information flow: end of context features -> all detail features + self.context_provider_module = CrossAttnContextProviderEndToAll(context_provider_cfg) + else: + raise ValueError(f"Unknown context provider type: {context_provider_type}") + + def forward(self, cimage_full_features=None, cimage_crop_features=None, cimage_concatenated=None, vision_tower=None): + if self.context_provider_type == "cross_attn_end_to_all": + assert cimage_full_features.shape[0] == cimage_concatenated.shape[0], f"shape mismatches: {cimage_full_features.shape[0]} != {cimage_concatenated.shape[0]}" + return self.context_provider_module(context_image_features=cimage_full_features, cimage_concatenated=cimage_concatenated, vision_tower=vision_tower) + else: + raise ValueError(f"Unknown context provider type: {context_provider_type}") + +AutoConfig.register("context_provider", ContextProviderConfig) +AutoModel.register(ContextProviderConfig, ContextProvider) diff --git a/dam/model/multimodal_encoder/image_processor.py b/dam/model/multimodal_encoder/image_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..5fa62259129f48e2e1e1fa466ef9a0acddc84982 --- /dev/null +++ b/dam/model/multimodal_encoder/image_processor.py @@ -0,0 +1,546 @@ +# coding=utf-8 +# Copyright 2023 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image processor class for RADIO.""" +import math +from copy import deepcopy +from itertools import product +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy as np + + +import PIL +from PIL.Image import Image + +from transformers.image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from transformers.image_transforms import convert_to_rgb, pad, resize, to_channel_dimension_format +from transformers.image_utils import ( + IMAGENET_DEFAULT_MEAN, + IMAGENET_DEFAULT_STD, + ChannelDimension, + ImageInput, + PILImageResampling, + get_image_size, + infer_channel_dimension_format, + is_scaled_image, + make_list_of_images, + to_numpy_array, + valid_images, +) +from transformers.utils import ( + TensorType, + is_tf_available, + is_torch_available, + is_torchvision_available, + logging, + requires_backends, +) + + +if is_torch_available(): + import torch + import torch.nn.functional as F + +if is_torchvision_available(): + from torchvision.ops.boxes import batched_nms + +if is_tf_available(): + import tensorflow as tf + from tensorflow.experimental import numpy as tnp + + from ...tf_utils import flatten, shape_list + +logger = logging.get_logger(__name__) + + +def rank_print(s): + rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0 + print(f"[Rank {rank}] {s}") + +class ImageProcessor(BaseImageProcessor): + r""" + Constructs an image processor. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the + `do_resize` parameter in the `preprocess` method. + size (`dict`, *optional*, defaults to `{"longest_edge": 1024}`): + Size of the output image after resizing. If "longest_edge" is specified, resizes the longest edge of the image to match + `size["longest_edge"]` while maintaining the aspect ratio. If "width" and "height" are specified, resizes the image + to that size, possibly changing the aspect ratio. Can be overridden by the `size` parameter in the + `preprocess` method. + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`): + Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the + `preprocess` method. + do_rescale (`bool`, *optional*, defaults to `True`): + Wwhether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the + `do_rescale` parameter in the `preprocess` method. + rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): + Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be + overridden by the `rescale_factor` parameter in the `preprocess` method. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess` + method. Can be overridden by the `do_normalize` parameter in the `preprocess` method. + image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_MEAN`): + Mean to use if normalizing the image. This is a float or list of floats the length of the number of + channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be + overridden by the `image_mean` parameter in the `preprocess` method. + image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_STD`): + Standard deviation to use if normalizing the image. This is a float or list of floats the length of the + number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method. + Can be overridden by the `image_std` parameter in the `preprocess` method. + do_pad (`bool`, *optional*, defaults to `True`): + Whether to pad the image to the specified `pad_size`. Can be overridden by the `do_pad` parameter in the + `preprocess` method. + pad_size (`dict`, *optional*, defaults to `{"height": 1024, "width": 1024}`): + Size of the output image after padding. Can be overridden by the `pad_size` parameter in the `preprocess` + method. + pad_value (`float` or `Iterable[float]`, *optional*, defaults to `0.`): + Value of padded pixels. + pad_multiple (`int`, *optional*, defaults to `None`): + Pad to a multiple of specified number. + do_convert_rgb (`bool`, *optional*, defaults to `True`): + Whether to convert the image to RGB. + """ + + model_input_names = ["pixel_values"] + + def __init__( + self, + do_resize: bool = True, + size: Dict[str, int] = None, + resample: PILImageResampling = PILImageResampling.BILINEAR, + do_rescale: bool = True, + rescale_factor: Union[int, float] = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_pad: bool = True, + pad_size: int = None, + pad_multiple: int = None, + pad_value: Optional[Union[float, List[float]]] = 0., + do_convert_rgb: bool = True, + **kwargs, + ) -> None: + super().__init__(**kwargs) + x = 0 + size = size if size is not None else {"longest_edge": 1024} + size = get_size_dict(max_size=size, default_to_square=False) if not isinstance(size, dict) else size + + if pad_size is not None and pad_multiple is not None: + raise ValueError("pad_size and pad_multiple should not be set at the same time.") + + pad_size = pad_size if pad_size is not None else {"height": 1024, "width": 1024} if pad_multiple is not None else None + if do_pad: + pad_size = get_size_dict(pad_size, default_to_square=True) + + self.do_resize = do_resize + self.size = size + self.resample = resample + self.do_rescale = do_rescale + self.rescale_factor = rescale_factor + self.do_normalize = do_normalize + self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN + self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self.do_pad = do_pad + self.pad_multiple = pad_multiple + self.pad_size = pad_size + self.pad_value = tuple(pad_value) if isinstance(pad_value, list) else pad_value + self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "pad_size", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] + + def pad_image( + self, + image: np.ndarray, + pad_size: Dict[str, int], + data_format: Optional[Union[str, ChannelDimension]] = None, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + **kwargs, + ) -> np.ndarray: + """ + Pad an image to `(pad_size["height"], pad_size["width"])` to the right and bottom. + + Args: + image (`np.ndarray`): + Image to pad. + pad_size (`Dict[str, int]`): + Size of the output image after padding. + data_format (`str` or `ChannelDimension`, *optional*): + The data format of the image. Can be either "channels_first" or "channels_last". If `None`, the + `data_format` of the `image` will be used. + input_data_format (`str` or `ChannelDimension`, *optional*): + The channel dimension format of the input image. If not provided, it will be inferred. + """ + output_height, output_width = pad_size["height"], pad_size["width"] + input_height, input_width = get_image_size(image, channel_dim=input_data_format) + + pad_width = output_width - input_width + pad_height = output_height - input_height + + padded_image = pad( + image, + ((0, pad_height), (0, pad_width)), + data_format=data_format, + input_data_format=input_data_format, + constant_values=self.pad_value, + **kwargs, + ) + return padded_image + + def _get_preprocess_shape(self, old_shape: Tuple[int, int], longest_edge: int): + """ + Compute the output size given input size and target long side length. + """ + oldh, oldw = old_shape + scale = longest_edge * 1.0 / max(oldh, oldw) + newh, neww = oldh * scale, oldw * scale + newh = int(newh + 0.5) + neww = int(neww + 0.5) + return (newh, neww) + + def resize( + self, + image: np.ndarray, + size: Dict[str, int], + resample: PILImageResampling = PILImageResampling.BICUBIC, + data_format: Optional[Union[str, ChannelDimension]] = None, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + **kwargs, + ) -> np.ndarray: + """ + Resize an image to `(size["height"], size["width"])`. + + Args: + image (`np.ndarray`): + Image to resize. + size (`Dict[str, int]`): + Dictionary in the format `{"longest_edge": int}` or `{"width": int, "height": int}` specifying the size + of the output image. If "longest_edge" is specified, resizes the longest edge of the image to match + `size["longest_edge"]` while maintaining the aspect ratio. If "width" and "height" are specified, resizes the image + to that size, possibly changing the aspect ratio. + resample: + `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`. + data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the output image. If unset, the channel dimension format of the input + image is used. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + + Returns: + `np.ndarray`: The resized image. + """ + size = get_size_dict(size) + if "longest_edge" not in size: + if "width" not in size or "height" not in size: + raise ValueError(f"The `size` dictionary must contain the key `longest_edge`, or `width` and `height`. Got {size.keys()}") + input_size = get_image_size(image, channel_dim=input_data_format) + if "longest_edge" in size: + output_height, output_width = self._get_preprocess_shape(input_size, size["longest_edge"]) + else: + output_height, output_width = size["height"], size["width"] + return resize( + image, + size=(output_height, output_width), + resample=resample, + data_format=data_format, + input_data_format=input_data_format, + **kwargs, + ) + + def _preprocess( + self, + image: ImageInput, + do_resize: bool, + do_rescale: bool, + do_normalize: bool, + size: Optional[Dict[str, int]] = None, + resample: PILImageResampling = None, + rescale_factor: Optional[float] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_pad: Optional[bool] = None, + pad_size: Optional[Dict[str, int]] = None, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + ): + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + reshaped_input_size = get_image_size(image, channel_dim=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) + + if do_pad: + if self.pad_multiple: + h, w = get_image_size(image, channel_dim=input_data_format) + pad_size = { + "height": math.ceil(h / self.pad_multiple) * self.pad_multiple, + "width": math.ceil(w / self.pad_multiple) * self.pad_multiple, + } + + image = self.pad_image(image=image, pad_size=pad_size, input_data_format=input_data_format) + + return image, reshaped_input_size + + def _preprocess_image( + self, + image: ImageInput, + do_resize: Optional[bool] = None, + size: Dict[str, int] = None, + resample: PILImageResampling = None, + do_rescale: bool = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_pad: Optional[bool] = None, + pad_size: Optional[Dict[str, int]] = None, + do_convert_rgb: Optional[bool] = None, + data_format: Optional[Union[str, ChannelDimension]] = None, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + ) -> Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]]: + #image = to_numpy_array(image) + +# import time +# if int(time.time()*1000) % 10 == 0: +# # create an PIL image of size 1x1 +# image = PIL.Image.new('RGB', (1, 1)) + + if isinstance(image, Image): + # PIL always uses Channels Last. + input_data_format = ChannelDimension.LAST + + # PIL RGBA images are converted to RGB + #mode_before = image.mode + if do_convert_rgb: + image = convert_to_rgb(image) + + # All transformations expect numpy arrays. + image_ = image + image = to_numpy_array(image) + +# if isinstance(image_, np.ndarray): +# rank_print(f"preprocess image type={type(image_)} shape={image_.shape} array shape={image.shape}") +# elif isinstance(image_, Image): +# rank_print(f"preprocessimage type={type(image_)} size={image_.size} mode={image_.mode} array shape={image.shape}") +# else: +# rank_print(f"preprocess unknown image type={type(image_)} array shape={image.shape}") + + if len(image.shape) == 2: + h, w = image.shape + ret = np.empty((h, w, 3), dtype=np.uint8) + ret[:, :, 0] = image + ret[:, :, 1] = image + ret[:, :, 2] = image + image = ret + rank_print(f"preprocess new image shape={image.shape}") + elif len(image.shape) == 3 and image.shape[-1] == 1: + ret = np.empty((h, w, 3), dtype=np.uint8) + ret[:, :, 0] = image[:, :, 0] + ret[:, :, 1] = image[:, :, 0] + ret[:, :, 2] = image[:, :, 0] + image = ret + rank_print(f"preprocess new image shape={image.shape}") + + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + if input_data_format is None: + input_data_format = infer_channel_dimension_format(image) + + original_size = get_image_size(image, channel_dim=input_data_format) + + image, reshaped_input_size = self._preprocess( + image=image, + do_resize=do_resize, + size=size, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + pad_size=pad_size, + input_data_format=input_data_format, + ) + + if data_format is not None: + image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + +# rank_print(f"preprocess original_size={original_size} reshaped_input_size={reshaped_input_size} image shape={image.shape} type={type(image)}") + + # if image is a single channel convert to rgb + if do_convert_rgb and image.shape[0] == 1: + c, h, w = image.shape + ret = np.empty((3, h, w), dtype=np.uint8) + ret[0, :, :] = image[0, :, :] + ret[1, :, :] = image[0, :, :] + ret[2, :, :] = image[0, :, :] + image = ret + rank_print(f"preprocess final: {image.shape}") + + return image, original_size, reshaped_input_size + + def preprocess( + self, + images: ImageInput, + do_resize: Optional[bool] = None, + size: Optional[Dict[str, int]] = None, + resample: Optional["PILImageResampling"] = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[Union[int, float]] = None, + do_normalize: Optional[bool] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_pad: Optional[bool] = None, + pad_size: Optional[Dict[str, int]] = None, + do_convert_rgb: Optional[bool] = None, + return_tensors: Optional[Union[str, TensorType]] = None, + data_format: ChannelDimension = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + **kwargs, + ): + """ + Preprocess an image or batch of images. + + Args: + images (`ImageInput`): + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + size (`Dict[str, int]`, *optional*, defaults to `self.size`): + Controls the size of the image after `resize`. The longest edge of the image is resized to + `size["longest_edge"]` whilst preserving the aspect ratio. + resample (`PILImageResampling`, *optional*, defaults to `self.resample`): + `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image pixel values by rescaling factor. + rescale_factor (`int` or `float`, *optional*, defaults to `self.rescale_factor`): + Rescale factor to apply to the image pixel values. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Image mean to normalize the image by if `do_normalize` is set to `True`. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Image standard deviation to normalize the image by if `do_normalize` is set to `True`. + do_pad (`bool`, *optional*, defaults to `self.do_pad`): + Whether to pad the image. + pad_size (`Dict[str, int]`, *optional*, defaults to `self.pad_size`): + Controls the size of the padding applied to the image. The image is padded to `pad_size["height"]` and + `pad_size["width"]` if `do_pad` is set to `True`. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + return_tensors (`str` or `TensorType`, *optional*): + The type of tensors to return. Can be one of: + - Unset: Return a list of `np.ndarray`. + - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`. + - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`. + - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. + - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`. + data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + """ + do_resize = do_resize if do_resize is not None else self.do_resize + size = size if size is not None else self.size + size = get_size_dict(max_size=size, default_to_square=False) if not isinstance(size, dict) else size + resample = resample if resample is not None else self.resample + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + image_mean = image_mean if image_mean is not None else self.image_mean + image_std = image_std if image_std is not None else self.image_std + do_pad = do_pad if do_pad is not None else self.do_pad + pad_size = pad_size if pad_size is not None else self.pad_size + if do_pad: + pad_size = get_size_dict(pad_size, default_to_square=True) + do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + + images = make_list_of_images(images) + + if not valid_images(images): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor, tf.Tensor or jax.ndarray." + ) + + images, original_sizes, reshaped_input_sizes = zip( + *( + self._preprocess_image( + image=img, + do_resize=do_resize, + size=size, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + pad_size=pad_size, + do_convert_rgb=do_convert_rgb, + data_format=data_format, + input_data_format=input_data_format, + ) + for img in images + ) + ) + + data = { + "pixel_values": images, + "original_sizes": original_sizes, + "reshaped_input_sizes": reshaped_input_sizes, + } + + return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/dam/model/multimodal_encoder/intern_ignored/configuration_intern_vit.py b/dam/model/multimodal_encoder/intern_ignored/configuration_intern_vit.py new file mode 100644 index 0000000000000000000000000000000000000000..95d154c4dd9b25af2fe833c3474874083a3291ca --- /dev/null +++ b/dam/model/multimodal_encoder/intern_ignored/configuration_intern_vit.py @@ -0,0 +1,119 @@ +# -------------------------------------------------------- +# InternVL +# Copyright (c) 2023 OpenGVLab +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import os +from typing import Union + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + + +class InternVisionConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`InternVisionModel`]. It is used to + instantiate a vision encoder according to the specified arguments, defining the model architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + num_channels (`int`, *optional*, defaults to 3): + Number of color channels in the input images (e.g., 3 for RGB). + patch_size (`int`, *optional*, defaults to 14): + The size (resolution) of each patch. + image_size (`int`, *optional*, defaults to 224): + The size (resolution) of each image. + qkv_bias (`bool`, *optional*, defaults to `False`): + Whether to add a bias to the queries and values in the self-attention layers. + hidden_size (`int`, *optional*, defaults to 3200): + Dimensionality of the encoder layers and the pooler layer. + num_attention_heads (`int`, *optional*, defaults to 25): + Number of attention heads for each attention layer in the Transformer encoder. + intermediate_size (`int`, *optional*, defaults to 12800): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + qk_normalization (`bool`, *optional*, defaults to `True`): + Whether to normalize the queries and keys in the self-attention layers. + num_hidden_layers (`int`, *optional*, defaults to 48): + Number of hidden layers in the Transformer encoder. + use_flash_attn (`bool`, *optional*, defaults to `True`): + Whether to use flash attention mechanism. + hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"selu"` and `"gelu_new"` ``"gelu"` are supported. + layer_norm_eps (`float`, *optional*, defaults to 1e-6): + The epsilon used by the layer normalization layers. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. + drop_path_rate (`float`, *optional*, defaults to 0.0): + Dropout rate for stochastic depth. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + initializer_factor (`float`, *optional*, defaults to 0.1): + A factor for layer scale. + """ + + model_type = 'intern_vit_6b' + + def __init__( + self, + num_channels=3, + patch_size=14, + image_size=224, + qkv_bias=False, + hidden_size=3200, + num_attention_heads=25, + intermediate_size=12800, + qk_normalization=True, + num_hidden_layers=48, + use_flash_attn=True, + hidden_act='gelu', + layer_norm_eps=1e-6, + dropout=0.0, + drop_path_rate=0.0, + attention_dropout=0.0, + initializer_range=0.02, + initializer_factor=0.1, + **kwargs, + ): + super().__init__(**kwargs) + + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.dropout = dropout + self.drop_path_rate = drop_path_rate + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_channels = num_channels + self.patch_size = patch_size + self.image_size = image_size + self.initializer_range = initializer_range + self.initializer_factor = initializer_factor + self.attention_dropout = attention_dropout + self.layer_norm_eps = layer_norm_eps + self.hidden_act = hidden_act + self.qkv_bias = qkv_bias + self.qk_normalization = qk_normalization + self.use_flash_attn = use_flash_attn + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> 'PretrainedConfig': + config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) + + if 'vision_config' in config_dict: + config_dict = config_dict['vision_config'] + + if 'model_type' in config_dict and hasattr(cls, 'model_type') and config_dict['model_type'] != cls.model_type: + logger.warning( + f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " + f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.' + ) + + return cls.from_dict(config_dict, **kwargs) + \ No newline at end of file diff --git a/dam/model/multimodal_encoder/intern_ignored/flash_attention.py b/dam/model/multimodal_encoder/intern_ignored/flash_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..60c9155fb07b51571868e570641f44462590ab21 --- /dev/null +++ b/dam/model/multimodal_encoder/intern_ignored/flash_attention.py @@ -0,0 +1,76 @@ +# https://github.com/Dao-AILab/flash-attention/blob/v0.2.8/flash_attn/flash_attention.py +import torch +import torch.nn as nn +from einops import rearrange + +try: # v1 + from flash_attn.flash_attn_interface import \ + flash_attn_unpadded_qkvpacked_func +except: # v2 + from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func + +from flash_attn.bert_padding import pad_input, unpad_input + + +class FlashAttention(nn.Module): + """Implement the scaled dot product attention with softmax. + Arguments + --------- + softmax_scale: The temperature to use for the softmax attention. + (default: 1/sqrt(d_keys) where d_keys is computed at + runtime) + attention_dropout: The dropout rate to apply to the attention + (default: 0.0) + """ + + def __init__(self, softmax_scale=None, attention_dropout=0.0, device=None, dtype=None): + super().__init__() + self.softmax_scale = softmax_scale + self.dropout_p = attention_dropout + + def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens=None, + max_s=None, need_weights=False): + """Implements the multihead softmax attention. + Arguments + --------- + qkv: The tensor containing the query, key, and value. (B, S, 3, H, D) if key_padding_mask is None + if unpadded: (nnz, 3, h, d) + key_padding_mask: a bool tensor of shape (B, S) + """ + assert not need_weights + assert qkv.dtype in [torch.float16, torch.bfloat16] + assert qkv.is_cuda + + if cu_seqlens is None: + batch_size = qkv.shape[0] + seqlen = qkv.shape[1] + if key_padding_mask is None: + qkv = rearrange(qkv, 'b s ... -> (b s) ...') + max_s = seqlen + cu_seqlens = torch.arange(0, (batch_size + 1) * seqlen, step=seqlen, dtype=torch.int32, + device=qkv.device) + output = flash_attn_unpadded_qkvpacked_func( + qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0, + softmax_scale=self.softmax_scale, causal=causal + ) + output = rearrange(output, '(b s) ... -> b s ...', b=batch_size) + else: + nheads = qkv.shape[-2] + x = rearrange(qkv, 'b s three h d -> b s (three h d)') + x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask) + x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=nheads) + output_unpad = flash_attn_unpadded_qkvpacked_func( + x_unpad, cu_seqlens, max_s, self.dropout_p if self.training else 0.0, + softmax_scale=self.softmax_scale, causal=causal + ) + output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'), + indices, batch_size, seqlen), + 'b s (h d) -> b s h d', h=nheads) + else: + assert max_s is not None + output = flash_attn_unpadded_qkvpacked_func( + qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0, + softmax_scale=self.softmax_scale, causal=causal + ) + + return output, None \ No newline at end of file diff --git a/dam/model/multimodal_encoder/intern_ignored/modeling_intern_vit_ignored.py b/dam/model/multimodal_encoder/intern_ignored/modeling_intern_vit_ignored.py new file mode 100644 index 0000000000000000000000000000000000000000..0d77764d1d69286898ac0da53ccd294ce5a11632 --- /dev/null +++ b/dam/model/multimodal_encoder/intern_ignored/modeling_intern_vit_ignored.py @@ -0,0 +1,547 @@ +# -------------------------------------------------------- +# InternVL +# Copyright (c) 2023 OpenGVLab +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +from typing import Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from einops import rearrange +from torch import nn +from transformers.activations import ACT2FN +from transformers.modeling_outputs import (BaseModelOutput, + BaseModelOutputWithPooling) +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import logging + +from llava.model.multimodal_encoder.intern.configuration_intern_vit import InternVisionConfig + +from .flash_attention import FlashAttention +has_flash_attn = True + + +logger = logging.get_logger(__name__) + + + + +""" DropBlock, DropPath + +PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers. + +Papers: +DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890) + +Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382) + +Code: +DropBlock impl inspired by two Tensorflow impl that I liked: + - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74 + - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py + +Hacked together by / Copyright 2020 Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def ndgrid(*tensors) -> Tuple[torch.Tensor, ...]: + """generate N-D grid in dimension order. + + The ndgrid function is like meshgrid except that the order of the first two input arguments are switched. + + That is, the statement + [X1,X2,X3] = ndgrid(x1,x2,x3) + + produces the same result as + + [X2,X1,X3] = meshgrid(x2,x1,x3) + + This naming is based on MATLAB, the purpose is to avoid confusion due to torch's change to make + torch.meshgrid behaviour move from matching ndgrid ('ij') indexing to numpy meshgrid defaults of ('xy'). + + """ + try: + return torch.meshgrid(*tensors, indexing='ij') + except TypeError: + # old PyTorch < 1.10 will follow this path as it does not have indexing arg, + # the old behaviour of meshgrid was 'ij' + return torch.meshgrid(*tensors) + + +def drop_block_2d( + x, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, + batchwise: bool = False +): + """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf + + DropBlock with an experimental gaussian noise option. This layer has been tested on a few training + runs with success, but needs further validation and possibly optimization for lower runtime impact. + """ + B, C, H, W = x.shape + total_size = W * H + clipped_block_size = min(block_size, min(W, H)) + # seed_drop_rate, the gamma parameter + gamma = gamma_scale * drop_prob * total_size / clipped_block_size ** 2 / ( + (W - block_size + 1) * (H - block_size + 1)) + + # Forces the block to be inside the feature map. + w_i, h_i = ndgrid(torch.arange(W, device=x.device), torch.arange(H, device=x.device)) + valid_block = ((w_i >= clipped_block_size // 2) & (w_i < W - (clipped_block_size - 1) // 2)) & \ + ((h_i >= clipped_block_size // 2) & (h_i < H - (clipped_block_size - 1) // 2)) + valid_block = torch.reshape(valid_block, (1, 1, H, W)).to(dtype=x.dtype) + + if batchwise: + # one mask for whole batch, quite a bit faster + uniform_noise = torch.rand((1, C, H, W), dtype=x.dtype, device=x.device) + else: + uniform_noise = torch.rand_like(x) + block_mask = ((2 - gamma - valid_block + uniform_noise) >= 1).to(dtype=x.dtype) + block_mask = -F.max_pool2d( + -block_mask, + kernel_size=clipped_block_size, # block_size, + stride=1, + padding=clipped_block_size // 2) + + if with_noise: + normal_noise = torch.randn((1, C, H, W), dtype=x.dtype, device=x.device) if batchwise else torch.randn_like(x) + if inplace: + x.mul_(block_mask).add_(normal_noise * (1 - block_mask)) + else: + x = x * block_mask + normal_noise * (1 - block_mask) + else: + normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7)).to(x.dtype) + if inplace: + x.mul_(block_mask * normalize_scale) + else: + x = x * block_mask * normalize_scale + return x + + +def drop_block_fast_2d( + x: torch.Tensor, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, +): + """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf + + DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid + block mask at edges. + """ + B, C, H, W = x.shape + total_size = W * H + clipped_block_size = min(block_size, min(W, H)) + gamma = gamma_scale * drop_prob * total_size / clipped_block_size ** 2 / ( + (W - block_size + 1) * (H - block_size + 1)) + + block_mask = torch.empty_like(x).bernoulli_(gamma) + block_mask = F.max_pool2d( + block_mask.to(x.dtype), kernel_size=clipped_block_size, stride=1, padding=clipped_block_size // 2) + + if with_noise: + normal_noise = torch.empty_like(x).normal_() + if inplace: + x.mul_(1. - block_mask).add_(normal_noise * block_mask) + else: + x = x * (1. - block_mask) + normal_noise * block_mask + else: + block_mask = 1 - block_mask + normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-6)).to(dtype=x.dtype) + if inplace: + x.mul_(block_mask * normalize_scale) + else: + x = x * block_mask * normalize_scale + return x + + +class DropBlock2d(nn.Module): + """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf + """ + + def __init__( + self, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, + batchwise: bool = False, + fast: bool = True): + super(DropBlock2d, self).__init__() + self.drop_prob = drop_prob + self.gamma_scale = gamma_scale + self.block_size = block_size + self.with_noise = with_noise + self.inplace = inplace + self.batchwise = batchwise + self.fast = fast # FIXME finish comparisons of fast vs not + + def forward(self, x): + if not self.training or not self.drop_prob: + return x + if self.fast: + return drop_block_fast_2d( + x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace) + else: + return drop_block_2d( + x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace, self.batchwise) + + +def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) + + def extra_repr(self): + return f'drop_prob={round(self.drop_prob,3):0.3f}' + + + +class InternRMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + + +try: + from apex.normalization import FusedRMSNorm + + InternRMSNorm = FusedRMSNorm # noqa + + logger.info('Discovered apex.normalization.FusedRMSNorm - will use it instead of InternRMSNorm') +except ImportError: + # using the normal InternRMSNorm + pass +except Exception: + logger.warning('discovered apex but it failed to load, falling back to InternRMSNorm') + pass + + +class InternVisionEmbeddings(nn.Module): + def __init__(self, config: InternVisionConfig): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + self.class_embedding = nn.Parameter( + torch.randn(1, 1, self.embed_dim), + ) + + self.patch_embedding = nn.Conv2d( + in_channels=3, out_channels=self.embed_dim, kernel_size=self.patch_size, stride=self.patch_size + ) + + self.num_patches = (self.image_size // self.patch_size) ** 2 + self.num_positions = self.num_patches + 1 + + self.position_embedding = nn.Parameter(torch.randn(1, self.num_positions, self.embed_dim)) + + def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: + batch_size = pixel_values.shape[0] + target_dtype = self.patch_embedding.weight.dtype + patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid] + patch_embeds = patch_embeds.flatten(2).transpose(1, 2) + class_embeds = self.class_embedding.expand(batch_size, 1, -1).to(target_dtype) + embeddings = torch.cat([class_embeds, patch_embeds], dim=1) + embeddings = embeddings + self.position_embedding.to(target_dtype) + return embeddings + + +class InternAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: InternVisionConfig): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.num_heads = config.num_attention_heads + self.use_flash_attn = config.use_flash_attn and has_flash_attn + if config.use_flash_attn and not has_flash_attn: + print('Warning: Flash Attention is not available, use_flash_attn is set to False.') + self.head_dim = self.embed_dim // self.num_heads + if self.head_dim * self.num_heads != self.embed_dim: + raise ValueError( + f'embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:' + f' {self.num_heads}).' + ) + + self.scale = self.head_dim ** -0.5 + self.qkv = nn.Linear(self.embed_dim, 3 * self.embed_dim, bias=config.qkv_bias) + self.attn_drop = nn.Dropout(config.attention_dropout) + self.proj_drop = nn.Dropout(config.dropout) + + self.qk_normalization = config.qk_normalization + + if self.qk_normalization: + self.q_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps) + self.k_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps) + + if self.use_flash_attn: + self.inner_attn = FlashAttention(attention_dropout=config.attention_dropout) + self.proj = nn.Linear(self.embed_dim, self.embed_dim) + + def _naive_attn(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) + + if self.qk_normalization: + B_, H_, N_, D_ = q.shape + q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2) + k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2) + + attn = ((q * self.scale) @ k.transpose(-2, -1)) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def _flash_attn(self, x, key_padding_mask=None, need_weights=False): + qkv = self.qkv(x) + qkv = rearrange(qkv, 'b s (three h d) -> b s three h d', three=3, h=self.num_heads) + + if self.qk_normalization: + q, k, v = qkv.unbind(2) + q = self.q_norm(q.flatten(-2, -1)).view(q.shape) + k = self.k_norm(k.flatten(-2, -1)).view(k.shape) + qkv = torch.stack([q, k, v], dim=2) + + context, _ = self.inner_attn( + qkv, key_padding_mask=key_padding_mask, need_weights=need_weights, causal=False + ) + outs = self.proj(rearrange(context, 'b s h d -> b s (h d)')) + outs = self.proj_drop(outs) + return outs + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + x = self._naive_attn(hidden_states) if not self.use_flash_attn else self._flash_attn(hidden_states) + return x + + +class InternMLP(nn.Module): + def __init__(self, config: InternVisionConfig): + super().__init__() + self.config = config + self.act = ACT2FN[config.hidden_act] + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = self.act(hidden_states) + hidden_states = self.fc2(hidden_states) + return hidden_states + + +class InternVisionEncoderLayer(nn.Module): + def __init__(self, config: InternVisionConfig, drop_path_rate: float): + super().__init__() + self.embed_dim = config.hidden_size + self.intermediate_size = config.intermediate_size + + self.attn = InternAttention(config) + self.mlp = InternMLP(config) + self.norm1 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps) + self.norm2 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps) + + self.ls1 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim)) + self.ls2 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim)) + self.drop_path1 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + self.drop_path2 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + + def forward( + self, + hidden_states: torch.Tensor, + ) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor], Optional[Tuple[torch.FloatTensor]]]: + """ + Args: + hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)` + """ + hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1) + + hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2) + + return hidden_states + + +class InternVisionEncoder(nn.Module): + """ + Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a + [`InternEncoderLayer`]. + + Args: + config (`InternConfig`): + The corresponding vision configuration for the `InternEncoder`. + """ + + def __init__(self, config: InternVisionConfig): + super().__init__() + self.config = config + # stochastic depth decay rule + dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, config.num_hidden_layers)] + self.layers = nn.ModuleList([ + InternVisionEncoderLayer(config, dpr[idx]) for idx in range(config.num_hidden_layers)]) + self.gradient_checkpointing = True + + def forward( + self, + inputs_embeds, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: + r""" + Args: + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Embedded representation of the inputs. Should be float, not int tokens. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + """ + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + encoder_states = () if output_hidden_states else None + hidden_states = inputs_embeds + + for idx, encoder_layer in enumerate(self.layers): + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + if self.gradient_checkpointing and self.training: + layer_outputs = torch.utils.checkpoint.checkpoint( + encoder_layer, + hidden_states) + else: + layer_outputs = encoder_layer( + hidden_states, + ) + hidden_states = layer_outputs + + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + + if not return_dict: + return tuple(v for v in [hidden_states, encoder_states] if v is not None) + return BaseModelOutput( + last_hidden_state=hidden_states, hidden_states=encoder_states + ) + + +class InternVisionModel(PreTrainedModel): + main_input_name = 'pixel_values' + config_class = InternVisionConfig + _no_split_modules = ['InternVisionEncoderLayer'] + + def __init__(self, config: InternVisionConfig): + super().__init__(config) + self.config = config + + self.embeddings = InternVisionEmbeddings(config) + self.encoder = InternVisionEncoder(config) + + def resize_pos_embeddings(self, old_size, new_size, patch_size): + pos_emb = self.embeddings.position_embedding + _, num_positions, embed_dim = pos_emb.shape + cls_emb = pos_emb[:, :1, :] + pos_emb = pos_emb[:, 1:, :].reshape(1, old_size // patch_size, old_size // patch_size, -1).permute(0, 3, 1, 2) + pos_emb = F.interpolate(pos_emb.float(), size=new_size // patch_size, mode='bicubic', align_corners=False) + pos_emb = pos_emb.to(cls_emb.dtype).reshape(1, embed_dim, -1).permute(0, 2, 1) + pos_emb = torch.cat([cls_emb, pos_emb], dim=1) + self.embeddings.position_embedding = nn.Parameter(pos_emb) + logger.info('Resized position embeddings from {} to {}'.format(old_size, new_size)) + + def get_input_embeddings(self): + return self.embeddings + + def forward( + self, + pixel_values: Optional[torch.FloatTensor] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + pixel_embeds: Optional[torch.FloatTensor] = None, + ) -> Union[Tuple, BaseModelOutputWithPooling]: + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if pixel_values is None and pixel_embeds is None: + raise ValueError('You have to specify pixel_values or pixel_embeds') + + if pixel_embeds is not None: + hidden_states = pixel_embeds + else: + if len(pixel_values.shape) == 4: + hidden_states = self.embeddings(pixel_values) + else: + raise ValueError(f'wrong pixel_values size: {pixel_values.shape}') + encoder_outputs = self.encoder( + inputs_embeds=hidden_states, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + last_hidden_state = encoder_outputs.last_hidden_state + pooled_output = last_hidden_state[:, 0, :] + + if not return_dict: + return (last_hidden_state, pooled_output) + encoder_outputs[1:] + + return BaseModelOutputWithPooling( + last_hidden_state=last_hidden_state, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + ) \ No newline at end of file diff --git a/dam/model/multimodal_encoder/siglip/__init__.py b/dam/model/multimodal_encoder/siglip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..19dbec2172fd685b71cee47d6cc593412cea2419 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/__init__.py @@ -0,0 +1,94 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import TYPE_CHECKING + +from transformers.utils import ( + OptionalDependencyNotAvailable, + _LazyModule, + is_torch_available, + is_vision_available, +) + + +_import_structure = { + "configuration_siglip": [ + "SIGLIP_PRETRAINED_CONFIG_ARCHIVE_MAP", + "SiglipConfig", + "SiglipTextConfig", + "SiglipVisionConfig", + ], + "processing_siglip": ["SiglipProcessor"], + "tokenization_siglip": ["SiglipTokenizer"], +} + +try: + if not is_vision_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + pass +else: + _import_structure["image_processing_siglip"] = ["SiglipImageProcessor"] + +try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + pass +else: + _import_structure["modeling_siglip"] = [ + "SIGLIP_PRETRAINED_MODEL_ARCHIVE_LIST", + "SiglipModel", + "SiglipPreTrainedModel", + "SiglipTextModel", + "SiglipVisionModel", + ] + + +if TYPE_CHECKING: + from .configuration_siglip import ( + SIGLIP_PRETRAINED_CONFIG_ARCHIVE_MAP, + SiglipConfig, + SiglipTextConfig, + SiglipVisionConfig, + ) + from .processing_siglip import SiglipProcessor + from .tokenization_siglip import SiglipTokenizer + + try: + if not is_vision_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + pass + else: + from .image_processing_siglip import SiglipImageProcessor + + try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + pass + else: + from .modeling_siglip import ( + SIGLIP_PRETRAINED_MODEL_ARCHIVE_LIST, + SiglipModel, + SiglipPreTrainedModel, + SiglipTextModel, + SiglipVisionModel, + ) + + +else: + import sys + + sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) diff --git a/dam/model/multimodal_encoder/siglip/__pycache__/__init__.cpython-310.pyc b/dam/model/multimodal_encoder/siglip/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04b95c24e8fac6857f1067aee008da2b04f70323 Binary files /dev/null and b/dam/model/multimodal_encoder/siglip/__pycache__/__init__.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/siglip/__pycache__/configuration_siglip.cpython-310.pyc b/dam/model/multimodal_encoder/siglip/__pycache__/configuration_siglip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..483b76f373ee30bb65d2e441dc7b14747a955aaf Binary files /dev/null and b/dam/model/multimodal_encoder/siglip/__pycache__/configuration_siglip.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/siglip/__pycache__/image_processing_siglip.cpython-310.pyc b/dam/model/multimodal_encoder/siglip/__pycache__/image_processing_siglip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74afb57d55a15394b8d542e0b77de2b2faa9025e Binary files /dev/null and b/dam/model/multimodal_encoder/siglip/__pycache__/image_processing_siglip.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/siglip/__pycache__/modeling_siglip.cpython-310.pyc b/dam/model/multimodal_encoder/siglip/__pycache__/modeling_siglip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f9d812ec71ff053c932071ca83f1950ebf9f570 Binary files /dev/null and b/dam/model/multimodal_encoder/siglip/__pycache__/modeling_siglip.cpython-310.pyc differ diff --git a/dam/model/multimodal_encoder/siglip/configuration_siglip.py b/dam/model/multimodal_encoder/siglip/configuration_siglip.py new file mode 100644 index 0000000000000000000000000000000000000000..4341b47297ac1ffb4ba8743fbdf33a6b48677c98 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/configuration_siglip.py @@ -0,0 +1,302 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Siglip model configuration""" + +import os +from typing import Union + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + +SIGLIP_PRETRAINED_CONFIG_ARCHIVE_MAP = { + "google/siglip-base-patch16-224": "https://huggingface.co/google/siglip-base-patch16-224/resolve/main/config.json", +} + + +class SiglipTextConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`SiglipTextModel`]. It is used to instantiate a + Siglip text encoder according to the specified arguments, defining the model architecture. Instantiating a + configuration with the defaults will yield a similar configuration to that of the text encoder of the Siglip + [google/siglip-base-patch16-224](https://huggingface.co/google/siglip-base-patch16-224) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + vocab_size (`int`, *optional*, defaults to 32000): + Vocabulary size of the Siglip text model. Defines the number of different tokens that can be represented by + the `inputs_ids` passed when calling [`SiglipModel`]. + hidden_size (`int`, *optional*, defaults to 768): + Dimensionality of the encoder layers and the pooler layer. + intermediate_size (`int`, *optional*, defaults to 3072): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + num_hidden_layers (`int`, *optional*, defaults to 12): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 12): + Number of attention heads for each attention layer in the Transformer encoder. + max_position_embeddings (`int`, *optional*, defaults to 64): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). + hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported. + layer_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the layer normalization layers. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + pad_token_id (`int`, *optional*, defaults to 1): + The id of the padding token in the vocabulary. + bos_token_id (`int`, *optional*, defaults to 49406): + The id of the beginning-of-sequence token in the vocabulary. + eos_token_id (`int`, *optional*, defaults to 49407): + The id of the end-of-sequence token in the vocabulary. + + Example: + + ```python + >>> from transformers import SiglipTextConfig, SiglipTextModel + + >>> # Initializing a SiglipTextConfig with google/siglip-base-patch16-224 style configuration + >>> configuration = SiglipTextConfig() + + >>> # Initializing a SiglipTextModel (with random weights) from the google/siglip-base-patch16-224 style configuration + >>> model = SiglipTextModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "siglip_text_model" + + def __init__( + self, + vocab_size=32000, + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_attention_heads=12, + max_position_embeddings=64, + hidden_act="gelu_pytorch_tanh", + layer_norm_eps=1e-6, + attention_dropout=0.0, + # This differs from `CLIPTokenizer`'s default and from openai/siglip + # See https://github.com/huggingface/transformers/pull/24773#issuecomment-1632287538 + pad_token_id=1, + bos_token_id=49406, + eos_token_id=49407, + **kwargs, + ): + super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) + + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.max_position_embeddings = max_position_embeddings + self.layer_norm_eps = layer_norm_eps + self.hidden_act = hidden_act + self.attention_dropout = attention_dropout + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig": + # cls._set_token_in_kwargs(kwargs) + + config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) + + # get the text config dict if we are loading from SiglipConfig + if config_dict.get("model_type") == "siglip": + config_dict = config_dict["text_config"] + + if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type: + logger.warning( + f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " + f"{cls.model_type}. This is not supported for all configurations of models and can yield errors." + ) + + return cls.from_dict(config_dict, **kwargs) + + +class SiglipVisionConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`SiglipVisionModel`]. It is used to instantiate a + Siglip vision encoder according to the specified arguments, defining the model architecture. Instantiating a + configuration with the defaults will yield a similar configuration to that of the vision encoder of the Siglip + [google/siglip-base-patch16-224](https://huggingface.co/google/siglip-base-patch16-224) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + hidden_size (`int`, *optional*, defaults to 768): + Dimensionality of the encoder layers and the pooler layer. + intermediate_size (`int`, *optional*, defaults to 3072): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + num_hidden_layers (`int`, *optional*, defaults to 12): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 12): + Number of attention heads for each attention layer in the Transformer encoder. + num_channels (`int`, *optional*, defaults to 3): + Number of channels in the input images. + image_size (`int`, *optional*, defaults to 224): + The size (resolution) of each image. + patch_size (`int`, *optional*, defaults to 16): + The size (resolution) of each patch. + hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported. + layer_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the layer normalization layers. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + + Example: + + ```python + >>> from transformers import SiglipVisionConfig, SiglipVisionModel + + >>> # Initializing a SiglipVisionConfig with google/siglip-base-patch16-224 style configuration + >>> configuration = SiglipVisionConfig() + + >>> # Initializing a SiglipVisionModel (with random weights) from the google/siglip-base-patch16-224 style configuration + >>> model = SiglipVisionModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "siglip_vision_model" + + def __init__( + self, + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_attention_heads=12, + num_channels=3, + image_size=224, + patch_size=16, + hidden_act="gelu_pytorch_tanh", + layer_norm_eps=1e-6, + attention_dropout=0.0, + **kwargs, + ): + super().__init__(**kwargs) + + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_channels = num_channels + self.patch_size = patch_size + self.image_size = image_size + self.attention_dropout = attention_dropout + self.layer_norm_eps = layer_norm_eps + self.hidden_act = hidden_act + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig": + # cls._set_token_in_kwargs(kwargs) + + config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) + + # get the vision config dict if we are loading from SiglipConfig + if config_dict.get("model_type") == "siglip": + config_dict = config_dict["vision_config"] + + if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type: + logger.warning( + f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " + f"{cls.model_type}. This is not supported for all configurations of models and can yield errors." + ) + + return cls.from_dict(config_dict, **kwargs) + + +class SiglipConfig(PretrainedConfig): + r""" + [`SiglipConfig`] is the configuration class to store the configuration of a [`SiglipModel`]. It is used to + instantiate a Siglip model according to the specified arguments, defining the text model and vision model configs. + Instantiating a configuration with the defaults will yield a similar configuration to that of the Siglip + [google/siglip-base-patch16-224](https://huggingface.co/google/siglip-base-patch16-224) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + text_config (`dict`, *optional*): + Dictionary of configuration options used to initialize [`SiglipTextConfig`]. + vision_config (`dict`, *optional*): + Dictionary of configuration options used to initialize [`SiglipVisionConfig`]. + kwargs (*optional*): + Dictionary of keyword arguments. + + Example: + + ```python + >>> from transformers import SiglipConfig, SiglipModel + + >>> # Initializing a SiglipConfig with google/siglip-base-patch16-224 style configuration + >>> configuration = SiglipConfig() + + >>> # Initializing a SiglipModel (with random weights) from the google/siglip-base-patch16-224 style configuration + >>> model = SiglipModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + + >>> # We can also initialize a SiglipConfig from a SiglipTextConfig and a SiglipVisionConfig + >>> from transformers import SiglipTextConfig, SiglipVisionConfig + + >>> # Initializing a SiglipText and SiglipVision configuration + >>> config_text = SiglipTextConfig() + >>> config_vision = SiglipVisionConfig() + + >>> config = SiglipConfig.from_text_vision_configs(config_text, config_vision) + ```""" + + model_type = "siglip" + + def __init__(self, text_config=None, vision_config=None, **kwargs): + super().__init__(**kwargs) + + if text_config is None: + text_config = {} + logger.info("`text_config` is `None`. Initializing the `SiglipTextConfig` with default values.") + + if vision_config is None: + vision_config = {} + logger.info("`vision_config` is `None`. initializing the `SiglipVisionConfig` with default values.") + + self.text_config = SiglipTextConfig(**text_config) + self.vision_config = SiglipVisionConfig(**vision_config) + + self.initializer_factor = 1.0 + + @classmethod + def from_text_vision_configs(cls, text_config: SiglipTextConfig, vision_config: SiglipVisionConfig, **kwargs): + r""" + Instantiate a [`SiglipConfig`] (or a derived class) from siglip text model configuration and siglip vision + model configuration. + + Returns: + [`SiglipConfig`]: An instance of a configuration object + """ + + return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) diff --git a/dam/model/multimodal_encoder/siglip/convert_siglip_to_hf.py b/dam/model/multimodal_encoder/siglip/convert_siglip_to_hf.py new file mode 100644 index 0000000000000000000000000000000000000000..6adacef84f9e27fbd97940131a29aec622164542 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/convert_siglip_to_hf.py @@ -0,0 +1,413 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Convert SigLIP checkpoints from the original repository. + +URL: https://github.com/google-research/big_vision/tree/main +""" + + +import argparse +import collections +from pathlib import Path + +import numpy as np +import requests +import torch +from huggingface_hub import hf_hub_download +from numpy import load +from PIL import Image + +from transformers import SiglipConfig, SiglipImageProcessor, SiglipModel, SiglipProcessor, SiglipTokenizer +from transformers.utils import logging + + +logging.set_verbosity_info() +logger = logging.get_logger(__name__) + + +model_name_to_checkpoint = { + # base checkpoints + "siglip-base-patch16-224": "/Users/nielsrogge/Documents/SigLIP/webli_en_b16_224_63724782.npz", + "siglip-base-patch16-256": "/Users/nielsrogge/Documents/SigLIP/webli_en_b16_256_60500360.npz", + "siglip-base-patch16-384": "/Users/nielsrogge/Documents/SigLIP/webli_en_b16_384_68578854.npz", + "siglip-base-patch16-512": "/Users/nielsrogge/Documents/SigLIP/webli_en_b16_512_68580893.npz", + # large checkpoints + "siglip-large-patch16-256": "/Users/nielsrogge/Documents/SigLIP/webli_en_l16_256_60552751.npz", + "siglip-large-patch16-384": "/Users/nielsrogge/Documents/SigLIP/webli_en_l16_384_63634585.npz", + # multilingual checkpoint + "siglip-base-patch16-256-i18n": "/Users/nielsrogge/Documents/SigLIP/webli_i18n_b16_256_66117334.npz", + # so400m checkpoints + "siglip-so400m-patch14-384": "/Users/nielsrogge/Documents/SigLIP/webli_en_so400m_384_58765454.npz", +} + +model_name_to_image_size = { + "siglip-base-patch16-224": 224, + "siglip-base-patch16-256": 256, + "siglip-base-patch16-384": 384, + "siglip-base-patch16-512": 512, + "siglip-large-patch16-256": 256, + "siglip-large-patch16-384": 384, + "siglip-base-patch16-256-i18n": 256, + "siglip-so400m-patch14-384": 384, +} + + +def get_siglip_config(model_name): + config = SiglipConfig() + + vocab_size = 250000 if "i18n" in model_name else 32000 + image_size = model_name_to_image_size[model_name] + patch_size = 16 if "patch16" in model_name else 14 + + # size of the architecture + config.vision_config.image_size = image_size + config.vision_config.patch_size = patch_size + config.text_config.vocab_size = vocab_size + + if "base" in model_name: + pass + elif "large" in model_name: + config.text_config.hidden_size = 1024 + config.text_config.intermediate_size = 4096 + config.text_config.num_hidden_layers = 24 + config.text_config.num_attention_heads = 16 + config.vision_config.hidden_size = 1024 + config.vision_config.intermediate_size = 4096 + config.vision_config.num_hidden_layers = 24 + config.vision_config.num_attention_heads = 16 + elif "so400m" in model_name: + config.text_config.hidden_size = 1152 + config.text_config.intermediate_size = 4304 + config.text_config.num_hidden_layers = 27 + config.text_config.num_attention_heads = 16 + config.vision_config.hidden_size = 1152 + config.vision_config.intermediate_size = 4304 + config.vision_config.num_hidden_layers = 27 + config.vision_config.num_attention_heads = 16 + else: + raise ValueError("Model not supported") + + return config + + +def create_rename_keys(config): + rename_keys = [] + # fmt: off + + # vision encoder + + rename_keys.append(("params/img/embedding/kernel", "vision_model.embeddings.patch_embedding.weight")) + rename_keys.append(("params/img/embedding/bias", "vision_model.embeddings.patch_embedding.bias")) + rename_keys.append(("params/img/pos_embedding", "vision_model.embeddings.position_embedding.weight")) + + for i in range(config.vision_config.num_hidden_layers): + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/LayerNorm_0/scale", f"vision_model.encoder.layers.{i}.layer_norm1.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/LayerNorm_0/bias", f"vision_model.encoder.layers.{i}.layer_norm1.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/LayerNorm_1/scale", f"vision_model.encoder.layers.{i}.layer_norm2.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/LayerNorm_1/bias", f"vision_model.encoder.layers.{i}.layer_norm2.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MlpBlock_0/Dense_0/kernel", f"vision_model.encoder.layers.{i}.mlp.fc1.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MlpBlock_0/Dense_0/bias", f"vision_model.encoder.layers.{i}.mlp.fc1.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MlpBlock_0/Dense_1/kernel", f"vision_model.encoder.layers.{i}.mlp.fc2.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MlpBlock_0/Dense_1/bias", f"vision_model.encoder.layers.{i}.mlp.fc2.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/key/kernel", f"vision_model.encoder.layers.{i}.self_attn.k_proj.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/key/bias", f"vision_model.encoder.layers.{i}.self_attn.k_proj.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/value/kernel", f"vision_model.encoder.layers.{i}.self_attn.v_proj.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/value/bias", f"vision_model.encoder.layers.{i}.self_attn.v_proj.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/query/kernel", f"vision_model.encoder.layers.{i}.self_attn.q_proj.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/query/bias", f"vision_model.encoder.layers.{i}.self_attn.q_proj.bias")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/out/kernel", f"vision_model.encoder.layers.{i}.self_attn.out_proj.weight")) + rename_keys.append((f"params/img/Transformer/encoderblock_{i}/MultiHeadDotProductAttention_0/out/bias", f"vision_model.encoder.layers.{i}.self_attn.out_proj.bias")) + + rename_keys.append(("params/img/Transformer/encoder_norm/scale", "vision_model.post_layernorm.weight")) + rename_keys.append(("params/img/Transformer/encoder_norm/bias", "vision_model.post_layernorm.bias")) + + rename_keys.append(("params/img/MAPHead_0/probe", "vision_model.head.probe")) + rename_keys.append(("params/img/MAPHead_0/LayerNorm_0/scale", "vision_model.head.layernorm.weight")) + rename_keys.append(("params/img/MAPHead_0/LayerNorm_0/bias", "vision_model.head.layernorm.bias")) + rename_keys.append(("params/img/MAPHead_0/MlpBlock_0/Dense_0/kernel", "vision_model.head.mlp.fc1.weight")) + rename_keys.append(("params/img/MAPHead_0/MlpBlock_0/Dense_0/bias", "vision_model.head.mlp.fc1.bias")) + rename_keys.append(("params/img/MAPHead_0/MlpBlock_0/Dense_1/kernel", "vision_model.head.mlp.fc2.weight")) + rename_keys.append(("params/img/MAPHead_0/MlpBlock_0/Dense_1/bias", "vision_model.head.mlp.fc2.bias")) + rename_keys.append(("params/img/MAPHead_0/MultiHeadDotProductAttention_0/out/kernel", "vision_model.head.attention.out_proj.weight")) + rename_keys.append(("params/img/MAPHead_0/MultiHeadDotProductAttention_0/out/bias", "vision_model.head.attention.out_proj.bias")) + + # text encoder + + rename_keys.append(("params/txt/Embed_0/embedding", "text_model.embeddings.token_embedding.weight")) + rename_keys.append(("params/txt/pos_embedding", "text_model.embeddings.position_embedding.weight")) + + for i in range(config.text_config.num_hidden_layers): + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/LayerNorm_0/scale", f"text_model.encoder.layers.{i}.layer_norm1.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/LayerNorm_0/bias", f"text_model.encoder.layers.{i}.layer_norm1.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/LayerNorm_1/scale", f"text_model.encoder.layers.{i}.layer_norm2.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/LayerNorm_1/bias", f"text_model.encoder.layers.{i}.layer_norm2.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MlpBlock_0/Dense_0/kernel", f"text_model.encoder.layers.{i}.mlp.fc1.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MlpBlock_0/Dense_0/bias", f"text_model.encoder.layers.{i}.mlp.fc1.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MlpBlock_0/Dense_1/kernel", f"text_model.encoder.layers.{i}.mlp.fc2.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MlpBlock_0/Dense_1/bias", f"text_model.encoder.layers.{i}.mlp.fc2.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/key/kernel", f"text_model.encoder.layers.{i}.self_attn.k_proj.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/key/bias", f"text_model.encoder.layers.{i}.self_attn.k_proj.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/value/kernel", f"text_model.encoder.layers.{i}.self_attn.v_proj.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/value/bias", f"text_model.encoder.layers.{i}.self_attn.v_proj.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/query/kernel", f"text_model.encoder.layers.{i}.self_attn.q_proj.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/query/bias", f"text_model.encoder.layers.{i}.self_attn.q_proj.bias")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/out/kernel", f"text_model.encoder.layers.{i}.self_attn.out_proj.weight")) + rename_keys.append((f"params/txt/Encoder_0/encoderblock_{i}/MultiHeadDotProductAttention_0/out/bias", f"text_model.encoder.layers.{i}.self_attn.out_proj.bias")) + + rename_keys.append(("params/txt/Encoder_0/encoder_norm/scale", "text_model.final_layer_norm.weight")) + rename_keys.append(("params/txt/Encoder_0/encoder_norm/bias", "text_model.final_layer_norm.bias")) + rename_keys.append(("params/txt/head/kernel", "text_model.head.weight")) + rename_keys.append(("params/txt/head/bias", "text_model.head.bias")) + + # learned temperature and bias + rename_keys.append(("params/t", "logit_scale")) + rename_keys.append(("params/b", "logit_bias")) + + # fmt: on + return rename_keys + + +def rename_key(dct, old, new, config): + val = dct.pop(old) + + if ("out_proj" in new or "v_proj" in new or "k_proj" in new or "q_proj" in new) and "vision" in new: + val = val.reshape(-1, config.vision_config.hidden_size) + if ("out_proj" in new or "v_proj" in new or "k_proj" in new or "q_proj" in new) and "text" in new: + val = val.reshape(-1, config.text_config.hidden_size) + + if "patch_embedding.weight" in new: + val = val.transpose(3, 2, 0, 1) + elif new.endswith("weight") and "position_embedding" not in new and "token_embedding" not in new: + val = val.T + + if "position_embedding" in new and "vision" in new: + val = val.reshape(-1, config.vision_config.hidden_size) + if "position_embedding" in new and "text" in new: + val = val.reshape(-1, config.text_config.hidden_size) + + if new.endswith("bias"): + val = val.reshape(-1) + + dct[new] = torch.from_numpy(val) + + +def read_in_q_k_v_head(state_dict, config): + # read in individual input projection layers + key_proj_weight = ( + state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/key/kernel") + .reshape(-1, config.vision_config.hidden_size) + .T + ) + key_proj_bias = state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/key/bias").reshape(-1) + value_proj_weight = ( + state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/value/kernel") + .reshape(-1, config.vision_config.hidden_size) + .T + ) + value_proj_bias = state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/value/bias").reshape(-1) + query_proj_weight = ( + state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/query/kernel") + .reshape(-1, config.vision_config.hidden_size) + .T + ) + query_proj_bias = state_dict.pop("params/img/MAPHead_0/MultiHeadDotProductAttention_0/query/bias").reshape(-1) + + # next, add them to the state dict as a single matrix + vector + state_dict["vision_model.head.attention.in_proj_weight"] = torch.from_numpy( + np.concatenate([query_proj_weight, key_proj_weight, value_proj_weight], axis=0) + ) + state_dict["vision_model.head.attention.in_proj_bias"] = torch.from_numpy( + np.concatenate([query_proj_bias, key_proj_bias, value_proj_bias], axis=0) + ) + + +# We will verify our results on an image of cute cats +def prepare_img(): + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + image = Image.open(requests.get(url, stream=True).raw) + return image + + +def flatten_nested_dict(params, parent_key="", sep="/"): + items = [] + + for k, v in params.items(): + new_key = parent_key + sep + k if parent_key else k + + if isinstance(v, collections.abc.MutableMapping): + items.extend(flatten_nested_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + + +@torch.no_grad() +def convert_siglip_checkpoint(model_name, pytorch_dump_folder_path, verify_logits=True, push_to_hub=False): + """ + Copy/paste/tweak model's weights to our SigLIP structure. + """ + + # define default SigLIP configuration + config = get_siglip_config(model_name) + + # get checkpoint + checkpoint = model_name_to_checkpoint[model_name] + + # get vocab file + if "i18n" in model_name: + vocab_file = "/Users/nielsrogge/Documents/SigLIP/multilingual_vocab/sentencepiece.model" + else: + vocab_file = "/Users/nielsrogge/Documents/SigLIP/english_vocab/sentencepiece.model" + + # load original state dict + data = load(checkpoint) + state_dict = flatten_nested_dict(data) + + # remove and rename some keys + rename_keys = create_rename_keys(config) + for src, dest in rename_keys: + rename_key(state_dict, src, dest, config) + + # qkv matrices of attention pooling head need special treatment + read_in_q_k_v_head(state_dict, config) + + # load HuggingFace model + model = SiglipModel(config).eval() + model.load_state_dict(state_dict) + + # create processor + # important: make tokenizer not return attention_mask since original one doesn't require it + image_size = config.vision_config.image_size + size = {"height": image_size, "width": image_size} + image_processor = SiglipImageProcessor(size=size) + tokenizer = SiglipTokenizer(vocab_file=vocab_file, model_input_names=["input_ids"]) + processor = SiglipProcessor(image_processor=image_processor, tokenizer=tokenizer) + + # verify on dummy images and texts + url_1 = "https://cdn.openai.com/multimodal-neurons/assets/apple/apple-ipod.jpg" + image_1 = Image.open(requests.get(url_1, stream=True).raw).convert("RGB") + url_2 = "https://cdn.openai.com/multimodal-neurons/assets/apple/apple-blank.jpg" + image_2 = Image.open(requests.get(url_2, stream=True).raw).convert("RGB") + texts = ["an apple", "a picture of an apple"] + + inputs = processor(images=[image_1, image_2], text=texts, return_tensors="pt", padding="max_length") + + # verify input_ids against original ones + if image_size == 224: + filename = "siglip_pixel_values.pt" + elif image_size == 256: + filename = "siglip_pixel_values_256.pt" + elif image_size == 384: + filename = "siglip_pixel_values_384.pt" + elif image_size == 512: + filename = "siglip_pixel_values_512.pt" + else: + raise ValueError("Image size not supported") + + filepath = hf_hub_download(repo_id="nielsr/test-image", filename=filename, repo_type="dataset") + original_pixel_values = torch.load(filepath) + filepath = hf_hub_download(repo_id="nielsr/test-image", filename="siglip_input_ids.pt", repo_type="dataset") + original_input_ids = torch.load(filepath) + + if "i18n" not in model_name: + assert inputs.input_ids.tolist() == original_input_ids.tolist() + + print("Mean of original pixel values:", original_pixel_values.mean()) + print("Mean of new pixel values:", inputs.pixel_values.mean()) + + # note: we're testing with original pixel values here since we don't have exact pixel values + with torch.no_grad(): + outputs = model(input_ids=inputs.input_ids, pixel_values=original_pixel_values) + + # with torch.no_grad(): + # outputs = model(input_ids=inputs.input_ids, pixel_values=inputs.pixel_values) + + print(outputs.logits_per_image[:3, :3]) + + probs = torch.sigmoid(outputs.logits_per_image) # these are the probabilities + print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'") + print(f"{probs[0][1]:.1%} that image 0 is '{texts[1]}'") + + if verify_logits: + if model_name == "siglip-base-patch16-224": + expected_slice = torch.tensor( + [[-2.9621, -2.1672], [-0.2713, 0.2910]], + ) + elif model_name == "siglip-base-patch16-256": + expected_slice = torch.tensor( + [[-3.1146, -1.9894], [-0.7312, 0.6387]], + ) + elif model_name == "siglip-base-patch16-384": + expected_slice = torch.tensor( + [[-2.8098, -2.1891], [-0.4242, 0.4102]], + ) + elif model_name == "siglip-base-patch16-512": + expected_slice = torch.tensor( + [[-2.7899, -2.2668], [-0.4295, -0.0735]], + ) + elif model_name == "siglip-large-patch16-256": + expected_slice = torch.tensor( + [[-1.5827, -0.5801], [-0.9153, 0.1363]], + ) + elif model_name == "siglip-large-patch16-384": + expected_slice = torch.tensor( + [[-2.1523, -0.2899], [-0.2959, 0.7884]], + ) + elif model_name == "siglip-so400m-patch14-384": + expected_slice = torch.tensor([[-1.2441, -0.6649], [-0.7060, 0.7374]]) + elif model_name == "siglip-base-patch16-256-i18n": + expected_slice = torch.tensor( + [[-0.9064, 0.1073], [-0.0299, 0.5304]], + ) + + assert torch.allclose(outputs.logits_per_image[:3, :3], expected_slice, atol=1e-4) + print("Looks ok!") + + if pytorch_dump_folder_path is not None: + Path(pytorch_dump_folder_path).mkdir(exist_ok=True) + print(f"Saving model {model_name} to {pytorch_dump_folder_path}") + model.save_pretrained(pytorch_dump_folder_path) + print(f"Saving processor to {pytorch_dump_folder_path}") + processor.save_pretrained(pytorch_dump_folder_path) + + if push_to_hub: + model.push_to_hub(f"nielsr/{model_name}") + processor.push_to_hub(f"nielsr/{model_name}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # Required parameters + parser.add_argument( + "--model_name", + default="siglip-base-patch16-224", + type=str, + choices=model_name_to_checkpoint.keys(), + help="Name of the model you'd like to convert.", + ) + parser.add_argument( + "--pytorch_dump_folder_path", default=None, type=str, help="Path to the output PyTorch model directory." + ) + parser.add_argument( + "--verify_logits", + action="store_false", + help="Whether to verify logits against the original implementation.", + ) + parser.add_argument( + "--push_to_hub", action="store_true", help="Whether or not to push the converted model to the 🤗 hub." + ) + + args = parser.parse_args() + convert_siglip_checkpoint(args.model_name, args.pytorch_dump_folder_path, args.verify_logits, args.push_to_hub) diff --git a/dam/model/multimodal_encoder/siglip/image_processing_siglip.py b/dam/model/multimodal_encoder/siglip/image_processing_siglip.py new file mode 100644 index 0000000000000000000000000000000000000000..833ce33620b9b67c69fd7f963cff51d5821ec341 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/image_processing_siglip.py @@ -0,0 +1,302 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Image processor class for SigLIP.""" + +from typing import Dict, List, Optional, Union + +from transformers.image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from transformers.image_transforms import ( + center_crop, + resize, + rescale, + normalize, + to_channel_dimension_format, + get_resize_output_image_size, + get_channel_dimension_axis, + convert_to_rgb, +) +from transformers.image_utils import ( + IMAGENET_STANDARD_MEAN, + IMAGENET_STANDARD_STD, + ChannelDimension, + ImageInput, + PILImageResampling, + infer_channel_dimension_format, + # is_scaled_image, + make_list_of_images, + to_numpy_array, + valid_images, +) +from transformers.utils import TensorType, is_vision_available, logging +import numpy as np + +logger = logging.get_logger(__name__) + +def is_scaled_image(image: np.ndarray) -> bool: + """ + Checks to see whether the pixel values have already been rescaled to [0, 1]. + """ + if image.dtype == np.uint8: + return False + + # It's possible the image has pixel values in [0, 255] but is of floating type + return np.min(image) >= 0 and np.max(image) <= 1 + + +if is_vision_available(): + import PIL + + +class SiglipImageProcessor(BaseImageProcessor): + r""" + Constructs a SigLIP image processor. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by + `do_resize` in the `preprocess` method. + size (`Dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`): + Size of the image after resizing. Can be overridden by `size` in the `preprocess` method. + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): + Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method. + do_rescale (`bool`, *optional*, defaults to `True`): + Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in + the `preprocess` method. + rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): + Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess` + method. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image by the specified mean and standard deviation. Can be overridden by + `do_normalize` in the `preprocess` method. + image_mean (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`): + Mean to use if normalizing the image. This is a float or list of floats the length of the number of + channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. + image_std (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`): + Standard deviation to use if normalizing the image. This is a float or list of floats the length of the + number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method. + Can be overridden by the `image_std` parameter in the `preprocess` method. + """ + + model_input_names = ["pixel_values"] + + def __init__( + self, + do_resize: bool = True, + size: Dict[str, int] = None, + resample: PILImageResampling = PILImageResampling.BICUBIC, + do_rescale: bool = True, + rescale_factor: Union[int, float] = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = True, + **kwargs, + ) -> None: + super().__init__(**kwargs) + size = size if size is not None else {"shortest_edge": 384} + size = get_size_dict(size, default_to_square=False) + image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN + image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + + self.do_resize = do_resize + self.size = size + self.resample = resample + self.do_rescale = do_rescale + self.rescale_factor = rescale_factor + self.do_normalize = do_normalize + self.image_mean = image_mean + self.image_std = image_std + self.do_convert_rgb = do_convert_rgb + + def resize( + self, + image: np.ndarray, + size: Dict[str, int], + resample: PILImageResampling = PILImageResampling.BICUBIC, + data_format: Optional[Union[str, ChannelDimension]] = None, + **kwargs, + ) -> np.ndarray: + """ + Resize an image. The shortest edge of the image is resized to size["shortest_edge"], with the longest edge + resized to keep the input aspect ratio. + + Args: + image (`np.ndarray`): + Image to resize. + size (`Dict[str, int]`): + Size of the output image. + resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`): + Resampling filter to use when resiizing the image. + data_format (`str` or `ChannelDimension`, *optional*): + The channel dimension format of the image. If not provided, it will be the same as the input image. + """ + # size = get_size_dict(size, default_to_square=False) + default_to_square = True + if "shortest_edge" in size: + size = size["shortest_edge"] + default_to_square = False + elif "height" in size and "width" in size: + size = (size["height"], size["width"]) + else: + raise ValueError("Size must contain either 'shortest_edge' or 'height' and 'width'.") + output_size = get_resize_output_image_size(image, size=size, default_to_square=default_to_square) + return resize(image, size=output_size, resample=resample, data_format=data_format, **kwargs) + + def preprocess( + self, + images: ImageInput, + do_resize: bool = None, + size: Dict[str, int] = None, + resample: PILImageResampling = None, + do_rescale: bool = None, + rescale_factor: float = None, + do_normalize: bool = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = None, + return_tensors: Optional[Union[str, TensorType]] = None, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + **kwargs, + ) -> PIL.Image.Image: + """ + Preprocess an image or batch of images. + + Args: + images (`ImageInput`): + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + size (`Dict[str, int]`, *optional*, defaults to `self.size`): + Size of the image after resizing. + resample (`int`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only + has an effect if `do_resize` is set to `True`. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Rescale factor to rescale the image by if `do_rescale` is set to `True`. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to + `True`. + return_tensors (`str` or `TensorType`, *optional*): + The type of tensors to return. Can be one of: + - Unset: Return a list of `np.ndarray`. + - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`. + - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`. + - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. + - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`. + data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + """ + do_resize = do_resize if do_resize is not None else self.do_resize + size = size if size is not None else self.size + size = get_size_dict(size, param_name="size", default_to_square=False) + resample = resample if resample is not None else self.resample + # do_center_crop = do_center_crop if do_center_crop is not None else self.do_center_crop + # crop_size = crop_size if crop_size is not None else self.crop_size + # crop_size = get_size_dict(crop_size, param_name="crop_size", default_to_square=True) + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + image_mean = image_mean if image_mean is not None else self.image_mean + image_std = image_std if image_std is not None else self.image_std + do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + + + images = make_list_of_images(images) + + if not valid_images(images): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if do_resize and size is None: + raise ValueError("Size must be specified if do_resize is True.") + + if do_rescale and rescale_factor is None: + raise ValueError("Rescale factor must be specified if do_rescale is True.") + + if do_normalize and (image_mean is None or image_std is None): + raise ValueError("Image mean and std must be specified if do_normalize is True.") + + # PIL RGBA images are converted to RGB + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + + # All transformations expect numpy arrays. + images = [to_numpy_array(image) for image in images] + + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + # if input_data_format is None: + # # We assume that all images have the same channel dimension format. + # input_data_format = infer_channel_dimension_format(images[0]) + + if do_resize: + images = [self.resize(image=image, size=size, resample=resample) for image in images] + + if do_rescale: + images = [rescale(image=image, scale=rescale_factor) for image in images] + + if do_normalize: + output_images = [] + for image in images: + if get_channel_dimension_axis(image) == 0: + image = image.transpose((1, 2, 0)) + if image.shape[-1] == 1: + image = np.dstack((image, image, image)) + output_images.append(image) + images = output_images + # for image in images: + # # print("image shape", image.shape) + # channel_axis = get_channel_dimension_axis(image) + # num_channels = image.shape[channel_axis] + # if num_channels != len(image_mean): + # print("image_mean", image_mean) + # print("channel_axis", channel_axis) + # print("num_channels", num_channels) + # print("image.shape", image.shape) + # raise ValueError( + # f"Number of channels in the image ({num_channels}) does not match the length of image mean " + # f"({len(image_mean)})." + # ) + + images = [normalize(image=image, mean=image_mean, std=image_std) for image in images] + + images = [to_channel_dimension_format(image, data_format) for image in images] + + + data = {"pixel_values": images} + return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/dam/model/multimodal_encoder/siglip/modeling_siglip.py b/dam/model/multimodal_encoder/siglip/modeling_siglip.py new file mode 100644 index 0000000000000000000000000000000000000000..85691d37626768afdc886e4da0298a3d47709597 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/modeling_siglip.py @@ -0,0 +1,1268 @@ +# coding=utf-8 +# Copyright 2024 Google AI and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PyTorch Siglip model.""" + + +import math +import warnings +from dataclasses import dataclass +from typing import Any, Optional, Tuple, Union, List + +import numpy as np +import torch +import torch.utils.checkpoint +from torch import nn +from torch.nn.init import _calculate_fan_in_and_fan_out + +from transformers.activations import ACT2FN +# from ...modeling_attn_mask_utils import _prepare_4d_attention_mask +from transformers.modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import ( + ModelOutput, + add_start_docstrings, + add_start_docstrings_to_model_forward, + logging, + replace_return_docstrings, +) +from .configuration_siglip import SiglipConfig, SiglipTextConfig, SiglipVisionConfig + + +logger = logging.get_logger(__name__) + +# _CHECKPOINT_FOR_DOC = "google/siglip-base-patch16-224" + +# SIGLIP_PRETRAINED_MODEL_ARCHIVE_LIST = [ +# "google/siglip-base-patch16-224", +# # See all SigLIP models at https://huggingface.co/models?filter=siglip +# ] + + +def _trunc_normal_(tensor, mean, std, a, b): + # Cut & paste from PyTorch official master until it's in a few official releases - RW + # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " + "The distribution of values may be incorrect.", + stacklevel=2, + ) + + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + l = norm_cdf((a - mean) / std) + u = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * l - 1, 2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.0)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + + +def trunc_normal_tf_( + tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0 +) -> torch.Tensor: + """Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \\leq \text{mean} \\leq b`. + + NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the + bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0 + and the result is subsquently scaled and shifted by the mean and std args. + + Args: + tensor: an n-dimensional `torch.Tensor` + mean: the mean of the normal distribution + std: the standard deviation of the normal distribution + a: the minimum cutoff value + b: the maximum cutoff value + """ + with torch.no_grad(): + _trunc_normal_(tensor, 0, 1.0, a, b) + tensor.mul_(std).add_(mean) + + +def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + if mode == "fan_in": + denom = fan_in + elif mode == "fan_out": + denom = fan_out + elif mode == "fan_avg": + denom = (fan_in + fan_out) / 2 + + variance = scale / denom + + if distribution == "truncated_normal": + # constant is stddev of standard normal truncated to (-2, 2) + trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978) + elif distribution == "normal": + with torch.no_grad(): + tensor.normal_(std=math.sqrt(variance)) + elif distribution == "uniform": + bound = math.sqrt(3 * variance) + with torch.no_grad(): + tensor.uniform_(-bound, bound) + else: + raise ValueError(f"invalid distribution {distribution}") + + +def lecun_normal_(tensor): + variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") + + +def default_flax_embed_init(tensor): + variance_scaling_(tensor, mode="fan_in", distribution="normal") + + +@dataclass +# Copied from transformers.models.clip.modeling_clip.CLIPVisionModelOutput with CLIP->Siglip +class SiglipVisionModelOutput(ModelOutput): + """ + Base class for vision model's outputs that also contains image embeddings of the pooling of the last hidden states. + + Args: + image_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`): + The image embeddings obtained by applying the projection layer to the pooler_output. + last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + image_embeds: Optional[torch.FloatTensor] = None + last_hidden_state: torch.FloatTensor = None + hidden_states: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor]] = None + + +@dataclass +# Copied from transformers.models.clip.modeling_clip.CLIPTextModelOutput with CLIP->Siglip +class SiglipTextModelOutput(ModelOutput): + """ + Base class for text model's outputs that also contains a pooling of the last hidden states. + + Args: + text_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`): + The text embeddings obtained by applying the projection layer to the pooler_output. + last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + text_embeds: Optional[torch.FloatTensor] = None + last_hidden_state: torch.FloatTensor = None + hidden_states: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor]] = None + + +@dataclass +# Copied from transformers.models.clip.modeling_clip.CLIPOutput with CLIP->Siglip +class SiglipOutput(ModelOutput): + """ + Args: + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`): + Contrastive loss for image-text similarity. + logits_per_image:(`torch.FloatTensor` of shape `(image_batch_size, text_batch_size)`): + The scaled dot product scores between `image_embeds` and `text_embeds`. This represents the image-text + similarity scores. + logits_per_text:(`torch.FloatTensor` of shape `(text_batch_size, image_batch_size)`): + The scaled dot product scores between `text_embeds` and `image_embeds`. This represents the text-image + similarity scores. + text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`): + The text embeddings obtained by applying the projection layer to the pooled output of [`SiglipTextModel`]. + image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`): + The image embeddings obtained by applying the projection layer to the pooled output of [`SiglipVisionModel`]. + text_model_output(`BaseModelOutputWithPooling`): + The output of the [`SiglipTextModel`]. + vision_model_output(`BaseModelOutputWithPooling`): + The output of the [`SiglipVisionModel`]. + """ + + loss: Optional[torch.FloatTensor] = None + logits_per_image: torch.FloatTensor = None + logits_per_text: torch.FloatTensor = None + text_embeds: torch.FloatTensor = None + image_embeds: torch.FloatTensor = None + text_model_output: BaseModelOutputWithPooling = None + vision_model_output: BaseModelOutputWithPooling = None + + def to_tuple(self) -> Tuple[Any]: + return tuple( + self[k] if k not in ["text_model_output", "vision_model_output"] else getattr(self, k).to_tuple() + for k in self.keys() + ) + + +class SiglipVisionEmbeddings(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + self.patch_embedding = nn.Conv2d( + in_channels=config.num_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + padding="valid", + ) + + if config.num_mask_channels: + # Mask should have the same output shape to be added. + # Currently we have bias in this embedding (so that mask vs no mask are different). + self.mask_patch_embedding = nn.Conv2d( + in_channels=config.num_mask_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + padding="valid" + ) + self.mask_patch_embedding.use_zero_init = True + else: + self.mask_patch_embedding = None + + self.num_patches = (self.image_size // self.patch_size) ** 2 + self.num_positions = self.num_patches + self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim) + self.register_buffer("position_ids", torch.arange(self.num_positions).expand((1, -1)), persistent=False) + + def forward(self, pixel_values: torch.FloatTensor, additional_position_embedding: Optional[torch.Tensor] = None, additional_embedding_mode: Optional[str] = None) -> torch.Tensor: + if self.mask_patch_embedding is None: + patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid] + else: + # Comment this out if you want to encode both images without mask channel and with mask channel. + # However, if different samples in the batch have different number of channels, this is not applicable. + # assert pixel_values.size(1) == 4, f"Input does not have a mask channel, shape: {pixel_values.shape}" + patch_embeds = self.patch_embedding(pixel_values[:, :3, ...]) # shape = [*, width, grid, grid] + if pixel_values.size(1) == 4: + patch_embeds = patch_embeds + self.mask_patch_embedding(pixel_values[:, 3:4, ...]) + embeddings = patch_embeds.flatten(2).transpose(1, 2) + + if additional_position_embedding is not None: + if additional_embedding_mode == "add": + embeddings = embeddings + self.position_embedding(self.position_ids) + embeddings = embeddings + additional_position_embedding + elif additional_embedding_mode == "replace": + # The original positional embedding is not used (multiplied by zero to ensure all parameters are used to be safe) + embeddings = embeddings + self.position_embedding(self.position_ids) * 0. + embeddings = embeddings + additional_position_embedding + else: + raise ValueError(f"additional_embedding_mode should be either 'add' or 'replace', got {additional_embedding_mode}") + else: + # Without additional position embedding + embeddings = embeddings + self.position_embedding(self.position_ids) + # print("No additional position embedding") + return embeddings + + +# Copied from transformers.models.clip.modeling_clip.CLIPTextEmbeddings with CLIP->Siglip +class SiglipTextEmbeddings(nn.Module): + def __init__(self, config: SiglipTextConfig): + super().__init__() + embed_dim = config.hidden_size + + self.token_embedding = nn.Embedding(config.vocab_size, embed_dim) + self.position_embedding = nn.Embedding(config.max_position_embeddings, embed_dim) + + # position_ids (1, len position emb) is contiguous in memory and exported when serialized + self.register_buffer( + "position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)), persistent=False + ) + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + ) -> torch.Tensor: + seq_length = input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2] + + if position_ids is None: + position_ids = self.position_ids[:, :seq_length] + + if inputs_embeds is None: + inputs_embeds = self.token_embedding(input_ids) + + position_embeddings = self.position_embedding(position_ids) + embeddings = inputs_embeds + position_embeddings + + return embeddings + + +class SiglipAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + # Copied from transformers.models.clip.modeling_clip.CLIPAttention.__init__ + def __init__(self, config): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.embed_dim // self.num_heads + if self.head_dim * self.num_heads != self.embed_dim: + raise ValueError( + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." + ) + self.scale = self.head_dim**-0.5 + self.dropout = config.attention_dropout + + self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + + batch_size, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view(batch_size, q_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, q_len, self.num_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, q_len, self.num_heads, self.head_dim).transpose(1, 2) + + k_v_seq_len = key_states.shape[-2] + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) * self.scale + + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + raise ValueError( + f"Attention weights should be of size {(batch_size, self.num_heads, q_len, k_v_seq_len)}, but is" + f" {attn_weights.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (batch_size, 1, q_len, k_v_seq_len): + raise ValueError( + f"Attention mask should be of size {(batch_size, 1, q_len, k_v_seq_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights + attention_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(batch_size, self.num_heads, q_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, q_len, self.embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights + + +# Copied from transformers.models.clip.modeling_clip.CLIPMLP with CLIP->Siglip +class SiglipMLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.activation_fn = ACT2FN[config.hidden_act] + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = self.activation_fn(hidden_states) + hidden_states = self.fc2(hidden_states) + return hidden_states + + +# Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip +class SiglipEncoderLayer(nn.Module): + def __init__(self, config: SiglipConfig): + super().__init__() + self.embed_dim = config.hidden_size + self.self_attn = SiglipAttention(config) + self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + self.mlp = SiglipMLP(config) + self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + + # Ignore copy + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.FloatTensor]: + """ + Args: + hidden_states (`torch.FloatTensor`): + Input to the layer of shape `(batch, seq_len, embed_dim)`. + attention_mask (`torch.FloatTensor`): + Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values. + output_attentions (`bool`, *optional*, defaults to `False`): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + """ + residual = hidden_states + + hidden_states = self.layer_norm1(hidden_states) + hidden_states, attn_weights = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + output_attentions=output_attentions, + ) + hidden_states = residual + hidden_states + + residual = hidden_states + hidden_states = self.layer_norm2(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (attn_weights,) + + return outputs + + +class SiglipPreTrainedModel(PreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = SiglipConfig + base_model_prefix = "siglip" + supports_gradient_checkpointing = True + + def _init_weights(self, module): + """Initialize the weights""" + if isinstance(module, SiglipVisionEmbeddings): + width = ( + self.config.vision_config.hidden_size + if isinstance(self.config, SiglipConfig) + else self.config.hidden_size + ) + nn.init.normal_(module.position_embedding.weight, std=1 / np.sqrt(width)) + elif isinstance(module, nn.Embedding): + default_flax_embed_init(module.weight) + elif isinstance(module, SiglipAttention): + nn.init.xavier_uniform_(module.q_proj.weight) + nn.init.xavier_uniform_(module.k_proj.weight) + nn.init.xavier_uniform_(module.v_proj.weight) + nn.init.xavier_uniform_(module.out_proj.weight) + nn.init.zeros_(module.q_proj.bias) + nn.init.zeros_(module.k_proj.bias) + nn.init.zeros_(module.v_proj.bias) + nn.init.zeros_(module.out_proj.bias) + elif isinstance(module, SiglipMLP): + nn.init.xavier_uniform_(module.fc1.weight) + nn.init.xavier_uniform_(module.fc2.weight) + nn.init.normal_(module.fc1.bias, std=1e-6) + nn.init.normal_(module.fc2.bias, std=1e-6) + elif isinstance(module, SiglipMultiheadAttentionPoolingHead): + nn.init.xavier_uniform_(module.probe.data) + nn.init.xavier_uniform_(module.attention.in_proj_weight.data) + nn.init.zeros_(module.attention.in_proj_bias.data) + elif isinstance(module, SiglipModel): + logit_scale_init = torch.log(torch.tensor(1.0)) + module.logit_scale.data.fill_(logit_scale_init) + module.logit_bias.data.zero_() + elif isinstance(module, nn.Conv2d) and getattr(module, "use_zero_init", False): + import deepspeed + param_list = [module.weight] + if module.bias is not None: + param_list += [module.bias] + # This is used in mask patch embedding + if is_deepspeed_zero3_enabled(): + with deepspeed.zero.GatheredParameters(param_list, modifier_rank=0): + for param in param_list: + nn.init.zeros_(param) + else: + for param in param_list: + nn.init.zeros_(param) + elif isinstance(module, (nn.Linear, nn.Conv2d)): + lecun_normal_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + +SIGLIP_START_DOCSTRING = r""" + This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage + and behavior. + + Parameters: + config ([`SiglipConfig`]): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + +SIGLIP_TEXT_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, + config.max_position_embeddings - 1]`. + + [What are position IDs?](../glossary#position-ids) + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + +SIGLIP_VISION_INPUTS_DOCSTRING = r""" + Args: + pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): + Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using + [`AutoImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + +SIGLIP_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, + config.max_position_embeddings - 1]`. + + [What are position IDs?](../glossary#position-ids) + pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): + Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using + [`AutoImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details. + return_loss (`bool`, *optional*): + Whether or not to return the contrastive loss. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +# Copied from transformers.models.clip.modeling_clip.CLIPEncoder with CLIP->Siglip +class SiglipEncoder(nn.Module): + """ + Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a + [`SiglipEncoderLayer`]. + + Args: + config: SiglipConfig + """ + + def __init__(self, config: SiglipConfig): + super().__init__() + self.config = config + self.layers = nn.ModuleList([SiglipEncoderLayer(config) for _ in range(config.num_hidden_layers)]) + self.gradient_checkpointing = False + + # Ignore copy + def forward( + self, + inputs_embeds, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + context_provider_layers: Optional[nn.ModuleList] = None, + contexts: Optional[List[torch.Tensor]] = None, + cross_attention_mask: Optional[torch.Tensor] = None, + ) -> Union[Tuple, BaseModelOutput]: + r""" + Args: + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert `input_ids` indices into associated vectors + than the model's internal embedding lookup matrix. + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + context_provider_layers (nn.ModuleList): ModuleList of context provider layers. + contexts: List of torch.Tensor for context (for KV in cross-attention). + cross_attention_mask (`torch.Tensor` of shape `(batch_size, q_sequence_length, kv_sequence_length)`, *optional*): mask for cross-attention. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + encoder_states = () if output_hidden_states else None + all_attentions = () if output_attentions else None + + hidden_states = inputs_embeds + for layer_index, encoder_layer in enumerate(self.layers): + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + encoder_layer.__call__, + hidden_states, + attention_mask, + output_attentions, + ) + else: + layer_outputs = encoder_layer( + hidden_states, + attention_mask, + output_attentions=output_attentions, + ) + + hidden_states = layer_outputs[0] + + if output_attentions: + all_attentions = all_attentions + (layer_outputs[1],) + + if context_provider_layers: + # Right now contexts is passed as the encoder_hidden_states (the output hidden_states of the context ViT). + context_provider_layer = context_provider_layers[layer_index] + if context_provider_layer is not None: + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + context_provider_layer.__call__, + hidden_states, + contexts, + cross_attention_mask, + output_attentions, + ) + else: + layer_outputs = context_provider_layer( + hidden_states, + contexts, + cross_attention_mask, + output_attentions=output_attentions, + ) + + hidden_states = layer_outputs[0] + + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + + if not return_dict: + return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None) + return BaseModelOutput( + last_hidden_state=hidden_states, hidden_states=encoder_states, attentions=all_attentions + ) + + +class SiglipTextTransformer(nn.Module): + def __init__(self, config: SiglipTextConfig): + super().__init__() + self.config = config + embed_dim = config.hidden_size + self.embeddings = SiglipTextEmbeddings(config) + self.encoder = SiglipEncoder(config) + self.final_layer_norm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) + + self.head = nn.Linear(embed_dim, embed_dim) + + @add_start_docstrings_to_model_forward(SIGLIP_TEXT_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=SiglipTextConfig) + def forward( + self, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **kwargs, + ) -> Union[Tuple, BaseModelOutputWithPooling]: + r""" + Returns: + + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if input_ids is None: + raise ValueError("You have to specify input_ids") + + input_shape = input_ids.size() + input_ids = input_ids.view(-1, input_shape[-1]) + + hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) + + # note: SigLIP's text model does not use a causal mask, unlike the original CLIP model. + # expand attention_mask + # if attention_mask is not None: + # # [batch_size, seq_len] -> [batch_size, 1, tgt_seq_len, src_seq_len] + # attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype) + + encoder_outputs = self.encoder( + inputs_embeds=hidden_states, + attention_mask=attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + **kwargs, + ) + + last_hidden_state = encoder_outputs[0] + last_hidden_state = self.final_layer_norm(last_hidden_state) + + # Assuming "sticky" EOS tokenization, last token is always EOS. + pooled_output = last_hidden_state[:, -1, :] + pooled_output = self.head(pooled_output) + + if not return_dict: + return (last_hidden_state, pooled_output) + encoder_outputs[1:] + + return BaseModelOutputWithPooling( + last_hidden_state=last_hidden_state, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + ) + + +@add_start_docstrings( + """The text model from SigLIP without any head or projection on top.""", + SIGLIP_START_DOCSTRING, +) +class SiglipTextModel(SiglipPreTrainedModel): + config_class = SiglipTextConfig + + _no_split_modules = ["SiglipTextEmbeddings", "SiglipEncoderLayer"] + + def __init__(self, config: SiglipTextConfig): + super().__init__(config) + self.text_model = SiglipTextTransformer(config) + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self) -> nn.Module: + return self.text_model.embeddings.token_embedding + + def set_input_embeddings(self, value): + self.text_model.embeddings.token_embedding = value + + @add_start_docstrings_to_model_forward(SIGLIP_TEXT_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=SiglipTextConfig) + def forward( + self, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutputWithPooling]: + r""" + Returns: + + Examples: + + ```python + >>> from transformers import AutoTokenizer, SiglipTextModel + + >>> model = SiglipTextModel.from_pretrained("google/siglip-base-patch16-224") + >>> tokenizer = AutoTokenizer.from_pretrained("google/siglip-base-patch16-224") + + >>> # important: make sure to set padding="max_length" as that's how the model was trained + >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding="max_length", return_tensors="pt") + + >>> outputs = model(**inputs) + >>> last_hidden_state = outputs.last_hidden_state + >>> pooled_output = outputs.pooler_output # pooled (EOS token) states + ```""" + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + return self.text_model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + +class SiglipVisionTransformer(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + embed_dim = config.hidden_size + + self.embeddings = SiglipVisionEmbeddings(config) + self.encoder = SiglipEncoder(config) + self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) + self.head = SiglipMultiheadAttentionPoolingHead(config) + + @add_start_docstrings_to_model_forward(SIGLIP_VISION_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=SiglipVisionConfig) + def forward( + self, + pixel_values, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **kwargs + ) -> Union[Tuple, BaseModelOutputWithPooling]: + r""" + Returns: + + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + hidden_states = self.embeddings(pixel_values) + + encoder_outputs = self.encoder( + inputs_embeds=hidden_states, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + **kwargs + ) + + last_hidden_state = encoder_outputs[0] + last_hidden_state = self.post_layernorm(last_hidden_state) + + pooled_output = self.head(last_hidden_state) + + if not return_dict: + return (last_hidden_state, pooled_output) + encoder_outputs[1:] + + return BaseModelOutputWithPooling( + last_hidden_state=last_hidden_state, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + ) + + +class SiglipMultiheadAttentionPoolingHead(nn.Module): + """Multihead Attention Pooling.""" + + def __init__(self, config: SiglipVisionConfig): + super().__init__() + + self.probe = nn.Parameter(torch.randn(1, 1, config.hidden_size)) + self.attention = torch.nn.MultiheadAttention(config.hidden_size, config.num_attention_heads, batch_first=True) + self.layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.mlp = SiglipMLP(config) + + def forward(self, hidden_state): + batch_size = hidden_state.shape[0] + probe = self.probe.repeat(batch_size, 1, 1) + + hidden_state = self.attention(probe, hidden_state, hidden_state)[0] + + residual = hidden_state + hidden_state = self.layernorm(hidden_state) + hidden_state = residual + self.mlp(hidden_state) + + return hidden_state[:, 0] + + +@add_start_docstrings( + """The vision model from SigLIP without any head or projection on top.""", + SIGLIP_START_DOCSTRING, +) +class SiglipVisionModel(SiglipPreTrainedModel): + config_class = SiglipVisionConfig + main_input_name = "pixel_values" + + def __init__(self, config: SiglipVisionConfig): + super().__init__(config) + + self.vision_model = SiglipVisionTransformer(config) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self) -> nn.Module: + return self.vision_model.embeddings.patch_embedding + + @add_start_docstrings_to_model_forward(SIGLIP_VISION_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=SiglipVisionConfig) + def forward( + self, + pixel_values, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **kwargs + ) -> Union[Tuple, BaseModelOutputWithPooling]: + r""" + Returns: + + Examples: + + ```python + >>> from PIL import Image + >>> import requests + >>> from transformers import AutoProcessor, SiglipVisionModel + + >>> model = SiglipVisionModel.from_pretrained("google/siglip-base-patch16-224") + >>> processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224") + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + + >>> inputs = processor(images=image, return_tensors="pt") + + >>> outputs = model(**inputs) + >>> last_hidden_state = outputs.last_hidden_state + >>> pooled_output = outputs.pooler_output # pooled features + ```""" + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + return self.vision_model( + pixel_values=pixel_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + **kwargs + ) + + +@add_start_docstrings(SIGLIP_START_DOCSTRING) +class SiglipModel(SiglipPreTrainedModel): + config_class = SiglipConfig + + def __init__(self, config: SiglipConfig): + super().__init__(config) + + if not isinstance(config.text_config, SiglipTextConfig): + raise ValueError( + "config.text_config is expected to be of type SiglipTextConfig but is of type" + f" {type(config.text_config)}." + ) + + if not isinstance(config.vision_config, SiglipVisionConfig): + raise ValueError( + "config.vision_config is expected to be of type SiglipVisionConfig but is of type" + f" {type(config.vision_config)}." + ) + + text_config = config.text_config + vision_config = config.vision_config + + self.text_model = SiglipTextTransformer(text_config) + self.vision_model = SiglipVisionTransformer(vision_config) + + self.logit_scale = nn.Parameter(torch.randn(1)) + self.logit_bias = nn.Parameter(torch.randn(1)) + + # Initialize weights and apply final processing + self.post_init() + + @add_start_docstrings_to_model_forward(SIGLIP_TEXT_INPUTS_DOCSTRING) + def get_text_features( + self, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> torch.FloatTensor: + r""" + Returns: + text_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The text embeddings obtained by + applying the projection layer to the pooled output of [`SiglipTextModel`]. + + Examples: + + ```python + >>> from transformers import AutoTokenizer, AutoModel + >>> import torch + + >>> model = AutoModel.from_pretrained("google/siglip-base-patch16-224") + >>> tokenizer = AutoTokenizer.from_pretrained("google/siglip-base-patch16-224") + + >>> # important: make sure to set padding="max_length" as that's how the model was trained + >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding="max_length", return_tensors="pt") + >>> with torch.no_grad(): + ... text_features = model.get_text_features(**inputs) + ```""" + # Use SigLIP model's config for some fields (if specified) instead of those of vision & text components. + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + text_outputs = self.text_model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + pooled_output = text_outputs[1] + + return pooled_output + + @add_start_docstrings_to_model_forward(SIGLIP_VISION_INPUTS_DOCSTRING) + def get_image_features( + self, + pixel_values: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> torch.FloatTensor: + r""" + Returns: + image_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The image embeddings obtained by + applying the projection layer to the pooled output of [`SiglipVisionModel`]. + + Examples: + + ```python + >>> from PIL import Image + >>> import requests + >>> from transformers import AutoProcessor, AutoModel + >>> import torch + + >>> model = AutoModel.from_pretrained("google/siglip-base-patch16-224") + >>> processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224") + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + + >>> inputs = processor(images=image, return_tensors="pt") + + >>> with torch.no_grad(): + ... image_features = model.get_image_features(**inputs) + ```""" + # Use SiglipModel's config for some fields (if specified) instead of those of vision & text components. + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + vision_outputs = self.vision_model( + pixel_values=pixel_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + pooled_output = vision_outputs[1] + + return pooled_output + + @add_start_docstrings_to_model_forward(SIGLIP_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=SiglipOutput, config_class=SiglipConfig) + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + return_loss: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SiglipOutput]: + r""" + Returns: + + Examples: + + ```python + >>> from PIL import Image + >>> import requests + >>> from transformers import AutoProcessor, AutoModel + >>> import torch + + >>> model = AutoModel.from_pretrained("google/siglip-base-patch16-224") + >>> processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224") + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + + >>> texts = ["a photo of 2 cats", "a photo of 2 dogs"] + >>> inputs = processor(text=texts, images=image, return_tensors="pt") + + >>> with torch.no_grad(): + ... outputs = model(**inputs) + + >>> logits_per_image = outputs.logits_per_image + >>> probs = torch.sigmoid(logits_per_image) # these are the probabilities + >>> print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'") + 31.9% that image 0 is 'a photo of 2 cats' + ```""" + # Use SigLIP model's config for some fields (if specified) instead of those of vision & text components. + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + vision_outputs = self.vision_model( + pixel_values=pixel_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + text_outputs = self.text_model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + image_embeds = vision_outputs[1] + text_embeds = text_outputs[1] + + # normalized features + image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True) + text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True) + + # cosine similarity as logits + logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * self.logit_scale.exp() + self.logit_bias + logits_per_image = logits_per_text.t() + + loss = None + if return_loss: + raise NotImplementedError("SigLIP loss to be implemented") + + if not return_dict: + output = (logits_per_image, logits_per_text, text_embeds, image_embeds, text_outputs, vision_outputs) + return ((loss,) + output) if loss is not None else output + + return SiglipOutput( + loss=loss, + logits_per_image=logits_per_image, + logits_per_text=logits_per_text, + text_embeds=text_embeds, + image_embeds=image_embeds, + text_model_output=text_outputs, + vision_model_output=vision_outputs, + ) diff --git a/dam/model/multimodal_encoder/siglip/processing_siglip.py b/dam/model/multimodal_encoder/siglip/processing_siglip.py new file mode 100644 index 0000000000000000000000000000000000000000..5c52951299af3dd8b08f23ae7ee17501130f09b2 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/processing_siglip.py @@ -0,0 +1,143 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Image/Text processor class for SigLIP. +""" + +from typing import List, Optional, Union + +from transformers.feature_extraction_utils import BatchFeature +from transformers.image_utils import ImageInput +from transformers.processing_utils import ProcessorMixin +from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy +from transformers.utils import TensorType + + +class SiglipProcessor(ProcessorMixin): + r""" + Constructs a Siglip processor which wraps a Siglip image processor and a Siglip tokenizer into a single processor. + + [`SiglipProcessor`] offers all the functionalities of [`SiglipImageProcessor`] and [`SiglipTokenizer`]. See the + [`~SiglipProcessor.__call__`] and [`~SiglipProcessor.decode`] for more information. + + Args: + image_processor ([`SiglipImageProcessor`]): + The image processor is a required input. + tokenizer ([`SiglipTokenizer`]): + The tokenizer is a required input. + """ + + attributes = ["image_processor", "tokenizer"] + image_processor_class = "SiglipImageProcessor" + tokenizer_class = "SiglipTokenizer" + + def __init__(self, image_processor, tokenizer): + super().__init__(image_processor, tokenizer) + + def __call__( + self, + text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, + images: ImageInput = None, + padding: Union[bool, str, PaddingStrategy] = "max_length", + truncation: Union[bool, str, TruncationStrategy] = None, + max_length=None, + return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH, + ) -> BatchFeature: + """ + Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text` + and `kwargs` arguments to SiglipTokenizer's [`~SiglipTokenizer.__call__`] if `text` is not `None` to encode + the text. To prepare the image(s), this method forwards the `images` argument to + SiglipImageProcessor's [`~SiglipImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring + of the above two methods for more information. + + Args: + text (`str`, `List[str]`, `List[List[str]]`): + The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings + (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set + `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). + images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): + The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch + tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a + number of channels, H and W are image height and width. + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `max_length`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding + index) among: + - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum + acceptable input length for the model if that argument is not provided. + - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different + lengths). + max_length (`int`, *optional*): + Maximum length of the returned list and optionally padding length (see above). + truncation (`bool`, *optional*): + Activates truncation to cut input sequences longer than `max_length` to `max_length`. + return_tensors (`str` or [`~utils.TensorType`], *optional*): + If set, will return tensors of a particular framework. Acceptable values are: + + - `'tf'`: Return TensorFlow `tf.constant` objects. + - `'pt'`: Return PyTorch `torch.Tensor` objects. + - `'np'`: Return NumPy `np.ndarray` objects. + - `'jax'`: Return JAX `jnp.ndarray` objects. + + Returns: + [`BatchFeature`]: A [`BatchFeature`] with the following fields: + + - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. + - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when + `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not + `None`). + - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. + """ + + if text is None and images is None: + raise ValueError("You have to specify either text or images. Both cannot be none.") + + if text is not None: + encoding = self.tokenizer( + text, return_tensors=return_tensors, padding=padding, truncation=truncation, max_length=max_length + ) + + if images is not None: + image_features = self.image_processor(images, return_tensors=return_tensors) + + if text is not None and images is not None: + encoding["pixel_values"] = image_features.pixel_values + return encoding + elif text is not None: + return encoding + else: + return BatchFeature(data=dict(**image_features), tensor_type=return_tensors) + + def decode(self, *args, **kwargs): + """ + This method forwards all its arguments to SiglipTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to + the docstring of this method for more information. + """ + return self.tokenizer.decode(*args, **kwargs) + + def batch_decode(self, *args, **kwargs): + """ + This method forwards all its arguments to SiglipTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please + refer to the docstring of this method for more information. + """ + return self.tokenizer.batch_decode(*args, **kwargs) + + @property + # Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names with CLIP->Siglip, T5->Siglip + def model_input_names(self): + tokenizer_input_names = self.tokenizer.model_input_names + image_processor_input_names = self.image_processor.model_input_names + return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names)) diff --git a/dam/model/multimodal_encoder/siglip/tokenization_siglip.py b/dam/model/multimodal_encoder/siglip/tokenization_siglip.py new file mode 100644 index 0000000000000000000000000000000000000000..b743f706cca5069d00b1c2fa8f7c8e95ab6dde34 --- /dev/null +++ b/dam/model/multimodal_encoder/siglip/tokenization_siglip.py @@ -0,0 +1,389 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Tokenization class for SigLIP model.""" + +import os +import re +import string +import warnings +from shutil import copyfile +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +import sentencepiece as spm + +from transformers.convert_slow_tokenizer import import_protobuf +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.tokenization_utils_base import AddedToken + + +if TYPE_CHECKING: + from transformers.tokenization_utils_base import TextInput +from transformers.utils import logging, requires_backends + + +logger = logging.get_logger(__name__) + +VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} + +PRETRAINED_VOCAB_FILES_MAP = { + "vocab_file": { + "google/siglip-base-patch16-224": "https://huggingface.co/google/siglip-base-patch16-224/resolve/main/spiece.model", + } +} + +PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { + "google/siglip-base-patch16-224": 256, +} + +SPIECE_UNDERLINE = "▁" + + +class SiglipTokenizer(PreTrainedTokenizer): + """ + Construct a Siglip tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece). + + This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to + this superclass for more information regarding those methods. + + Args: + vocab_file (`str`): + [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that + contains the vocabulary necessary to instantiate a tokenizer. + eos_token (`str`, *optional*, defaults to `""`): + The end of sequence token. + unk_token (`str`, *optional*, defaults to `""`): + The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this + token instead. + pad_token (`str`, *optional*, defaults to `""`): + The token used for padding, for example when batching sequences of different lengths. + additional_special_tokens (`List[str]`, *optional*): + Additional special tokens used by the tokenizer. + sp_model_kwargs (`dict`, *optional*): + Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for + SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, + to set: + + - `enable_sampling`: Enable subword regularization. + - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout. + + - `nbest_size = {0,1}`: No sampling is performed. + - `nbest_size > 1`: samples from the nbest_size results. + - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) + using forward-filtering-and-backward-sampling algorithm. + + - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for + BPE-dropout. + model_max_length (`int`, *optional*, defaults to 64): + The maximum length (in number of tokens) for model inputs. + do_lower_case (`bool`, *optional*, defaults to `True`): + Whether or not to lowercase the input when tokenizing. + """ + + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES + model_input_names = ["input_ids", "attention_mask"] + + def __init__( + self, + vocab_file, + eos_token="", + unk_token="", + pad_token="", + additional_special_tokens=None, + sp_model_kwargs: Optional[Dict[str, Any]] = None, + model_max_length=64, + do_lower_case=True, + **kwargs, + ) -> None: + requires_backends(self, "protobuf") + + pad_token = ( + AddedToken(pad_token, rstrip=True, lstrip=True, normalized=False, special=True) + if isinstance(pad_token, str) + else pad_token + ) + unk_token = ( + AddedToken(unk_token, rstrip=True, lstrip=True, normalized=False, special=True) + if isinstance(unk_token, str) + else unk_token + ) + eos_token = ( + AddedToken(eos_token, rstrip=True, lstrip=True, normalized=False, special=True) + if isinstance(eos_token, str) + else eos_token + ) + + self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs + + self.do_lower_case = do_lower_case + self.vocab_file = vocab_file + + self.sp_model = self.get_spm_processor() + self.vocab_file = vocab_file + + super().__init__( + eos_token=eos_token, + unk_token=unk_token, + pad_token=pad_token, + additional_special_tokens=additional_special_tokens, + sp_model_kwargs=self.sp_model_kwargs, + model_max_length=model_max_length, + do_lower_case=do_lower_case, + **kwargs, + ) + + def get_spm_processor(self): + tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs) + with open(self.vocab_file, "rb") as f: + sp_model = f.read() + model_pb2 = import_protobuf() + model = model_pb2.ModelProto.FromString(sp_model) + normalizer_spec = model_pb2.NormalizerSpec() + normalizer_spec.add_dummy_prefix = False + model.normalizer_spec.MergeFrom(normalizer_spec) + sp_model = model.SerializeToString() + tokenizer.LoadFromSerializedProto(sp_model) + return tokenizer + + @property + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.vocab_size + def vocab_size(self): + return self.sp_model.get_piece_size() + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.get_vocab + def get_vocab(self): + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab.update(self.added_tokens_encoder) + return vocab + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.get_special_tokens_mask + def get_special_tokens_mask( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False + ) -> List[int]: + """ + Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer `prepare_for_model` method. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (`bool`, *optional*, defaults to `False`): + Whether or not the token list is already formatted with special tokens for the model. + + Returns: + `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + if already_has_special_tokens: + return super().get_special_tokens_mask( + token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True + ) + + # normal case: some special tokens + if token_ids_1 is None: + return ([0] * len(token_ids_0)) + [1] + return ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._add_eos_if_not_present + def _add_eos_if_not_present(self, token_ids: List[int]) -> List[int]: + """Do not add eos again if user already added it.""" + if len(token_ids) > 0 and token_ids[-1] == self.eos_token_id: + warnings.warn( + f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated" + " eos tokens being added." + ) + return token_ids + else: + return token_ids + [self.eos_token_id] + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.create_token_type_ids_from_sequences + def create_token_type_ids_from_sequences( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make + use of token type ids, therefore a list of zeros is returned. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + + Returns: + `List[int]`: List of zeros. + """ + eos = [self.eos_token_id] + + if token_ids_1 is None: + return len(token_ids_0 + eos) * [0] + return len(token_ids_0 + eos + token_ids_1 + eos) * [0] + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.build_inputs_with_special_tokens + def build_inputs_with_special_tokens( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and + adding special tokens. A sequence has the following format: + + - single sequence: `X ` + - pair of sequences: `A B ` + + Args: + token_ids_0 (`List[int]`): + List of IDs to which the special tokens will be added. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + + Returns: + `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens. + """ + token_ids_0 = self._add_eos_if_not_present(token_ids_0) + if token_ids_1 is None: + return token_ids_0 + else: + token_ids_1 = self._add_eos_if_not_present(token_ids_1) + return token_ids_0 + token_ids_1 + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.__getstate__ + def __getstate__(self): + state = self.__dict__.copy() + state["sp_model"] = None + return state + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.__setstate__ + def __setstate__(self, d): + self.__dict__ = d + + # for backward compatibility + if not hasattr(self, "sp_model_kwargs"): + self.sp_model_kwargs = {} + + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.Load(self.vocab_file) + + def remove_punctuation(self, text: str) -> str: + return text.translate(str.maketrans("", "", string.punctuation)) + + # source: https://github.com/google-research/big_vision/blob/3b8e5ab6ad4f96e32b32826f9e1b8fd277914f9c/big_vision/evaluators/proj/image_text/prompt_engineering.py#L94 + def canonicalize_text(self, text, *, keep_punctuation_exact_string=None): + """Returns canonicalized `text` (puncuation removed). + + Args: + text (`str`): + String to be canonicalized. + keep_punctuation_exact_string (`str`, *optional*): + If provided, then this exact string is kept. For example providing '{}' will keep any occurrences of '{}' + (but will still remove '{' and '}' that appear separately). + """ + if keep_punctuation_exact_string: + text = keep_punctuation_exact_string.join( + self.remove_punctuation(part) for part in text.split(keep_punctuation_exact_string) + ) + else: + text = self.remove_punctuation(text) + text = re.sub(r"\s+", " ", text) + text = text.strip() + + return text + + def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]: + """ + Converts a string to a list of tokens. + """ + tokens = super().tokenize(SPIECE_UNDERLINE + text.replace(SPIECE_UNDERLINE, " "), **kwargs) + + if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens: + tokens = tokens[1:] + return tokens + + @property + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.unk_token_length + def unk_token_length(self): + return len(self.sp_model.encode(str(self.unk_token))) + + def _tokenize(self, text, **kwargs): + """ + Returns a tokenized string. + + We de-activated the `add_dummy_prefix` option, thus the sentencepiece internals will always strip any + SPIECE_UNDERLINE. + + For example: `self.sp_model.encode(f"{SPIECE_UNDERLINE}Hey", out_type = str)` will give `['H', 'e', 'y']` instead of `['▁He', 'y']`. + + Thus we always encode `f"{unk_token}text"` and strip the `unk_token`. Here is an example with `unk_token = ""` and `unk_token_length = 4`. + `self.tokenizer.sp_model.encode(" Hey", out_type = str)[4:]`. + """ + text = self.canonicalize_text(text, keep_punctuation_exact_string=None) + tokens = self.sp_model.encode(text, out_type=str) + + # 1. Encode string + prefix ex: " Hey" + tokens = self.sp_model.encode(self.unk_token + text, out_type=str) + # 2. Remove self.unk_token from ['<','unk','>', '▁Hey'] + return tokens[self.unk_token_length :] if len(tokens) >= self.unk_token_length else tokens + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._convert_token_to_id + def _convert_token_to_id(self, token): + """Converts a token (str) in an id using the vocab.""" + return self.sp_model.piece_to_id(token) + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._convert_id_to_token + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + token = self.sp_model.IdToPiece(index) + return token + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.convert_tokens_to_string + def convert_tokens_to_string(self, tokens): + """Converts a sequence of tokens (string) in a single string.""" + current_sub_tokens = [] + # since we manually add the prefix space, we have to remove it + tokens[0] = tokens[0].lstrip(SPIECE_UNDERLINE) + out_string = "" + prev_is_special = False + for token in tokens: + # make sure that special tokens are not decoded using sentencepiece model + if token in self.all_special_tokens: + if not prev_is_special: + out_string += " " + out_string += self.sp_model.decode(current_sub_tokens) + token + prev_is_special = True + current_sub_tokens = [] + else: + current_sub_tokens.append(token) + prev_is_special = False + out_string += self.sp_model.decode(current_sub_tokens) + return out_string.strip() + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.save_vocabulary + def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: + if not os.path.isdir(save_directory): + logger.error(f"Vocabulary path ({save_directory}) should be a directory") + return + out_vocab_file = os.path.join( + save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] + ) + + if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, "wb") as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + + return (out_vocab_file,) diff --git a/dam/model/multimodal_encoder/siglip_encoder.py b/dam/model/multimodal_encoder/siglip_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..64dd2e607f6119cb1e49d5ee390b14407b77ad6d --- /dev/null +++ b/dam/model/multimodal_encoder/siglip_encoder.py @@ -0,0 +1,25 @@ +import torch +from .vision_encoder import VisionTower + +from transformers import AutoConfig, PretrainedConfig, AutoModel +from .siglip import ( + SiglipVisionConfig, + SiglipVisionModel, + SiglipImageProcessor, +) + + +class SiglipVisionTower(VisionTower): + def __init__(self, model_name_or_path: str, config: PretrainedConfig, state_dict=None): + super().__init__(model_name_or_path, config) + self.image_processor = SiglipImageProcessor.from_pretrained(model_name_or_path) + self.vision_tower = SiglipVisionModel.from_pretrained( + # TODO(ligeng): why pass config here leading to errors? + model_name_or_path, torch_dtype=eval(config.model_dtype), state_dict=state_dict + ) + self.is_loaded = True + + +AutoConfig.register("siglip_vision_model", SiglipVisionConfig, exist_ok=True) +AutoModel.register(SiglipVisionConfig, SiglipVisionModel, exist_ok=True) + diff --git a/dam/model/multimodal_encoder/vision_encoder.py b/dam/model/multimodal_encoder/vision_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..91dc255a1a530092a79a1626b75723b5dc87638b --- /dev/null +++ b/dam/model/multimodal_encoder/vision_encoder.py @@ -0,0 +1,156 @@ +# This file is modified from https://github.com/haotian-liu/LLaVA/ + +from abc import abstractmethod + +import torch +import torch.nn as nn +from accelerate.hooks import add_hook_to_module +from transformers import AutoConfig, PreTrainedModel +from transformers.image_processing_utils import BaseImageProcessor +from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled + + +class VisionTower(nn.Module): + def __init__(self, vision_tower, args, delay_load=False): + super().__init__() + + self.is_loaded = False + + self.vision_tower_name = vision_tower + self.select_layer = getattr(args, "mm_vision_select_layer", -2) + self.select_feature = getattr(args, "mm_vision_select_feature", "patch") + + self.cfg_only = None + + def feature_select(self, image_forward_outs): + image_features = image_forward_outs.hidden_states[self.select_layer] + if self.select_feature == "patch": + image_features = image_features[:, 1:] + elif self.select_feature == "cls_patch": + image_features = image_features + else: + raise ValueError(f"Unexpected select feature: {self.select_feature}") + return image_features + + def _maybe_resize_pos_embeds( + self, + model: PreTrainedModel, + image_processor: BaseImageProcessor, + resolution: int = -1, + interpolate_mode: str = "linear", + ): + if resolution in [model.config.image_size, -1]: + return + print(f"Resizing vision model's position embeddings to support higher vision resolution: from {model.config.image_size} to {resolution} ...") + embeddings = model.vision_model.embeddings + patch_size = embeddings.patch_size + num_new_tokens = int((resolution // patch_size) ** 2) + + old_embeddings = embeddings.position_embedding + match interpolate_mode: + case "linear": + ## Step 1: Calculate the corresponding patch ID (pid) in the current resolution (M patches) based on the target resolution (N patches). Formula: pid = pid / N * M + ## Step 2: Obtain new embeddings by interpolating between the embeddings of the two nearest calculated patch IDs. Formula: new_embeds = (pid - floor(pid)) * embeds[ceil(pid)] + (ceil(pid) - pid) * embeds[floor(pid)] + import torch + import torch.nn as nn + + if is_deepspeed_zero3_enabled(): + import deepspeed + + with deepspeed.zero.GatheredParameters([old_embeddings.weight], modifier_rank=None): + old_num_tokens, old_embedding_dim = old_embeddings.weight.size() + else: + old_num_tokens, old_embedding_dim = old_embeddings.weight.size() + new_embeddings = nn.Embedding( + num_new_tokens, + old_embedding_dim, + dtype=old_embeddings.weight.dtype, + device=old_embeddings.weight.device, + ) + mapped_indices = ( + torch.arange(num_new_tokens).to(old_embeddings.weight.device) + / (num_new_tokens - 1) + * (old_num_tokens - 1) + ) + floor_indices = torch.clamp(mapped_indices.floor().long(), min=0, max=old_num_tokens - 1) + ceil_indices = torch.clamp(mapped_indices.ceil().long(), min=0, max=old_num_tokens - 1) + if is_deepspeed_zero3_enabled(): + params = [old_embeddings.weight, new_embeddings.weight] + with deepspeed.zero.GatheredParameters(params, modifier_rank=0): + interpolated_embeds = (mapped_indices - floor_indices)[:, None] * old_embeddings.weight.data[ + ceil_indices, : + ] + (ceil_indices - mapped_indices)[:, None] * old_embeddings.weight.data[floor_indices, :] + else: + interpolated_embeds = (mapped_indices - floor_indices)[:, None] * old_embeddings.weight.data[ + ceil_indices, : + ] + (ceil_indices - mapped_indices)[:, None] * old_embeddings.weight.data[floor_indices, :] + new_embeddings.weight.data = interpolated_embeds + case _: + raise NotImplementedError + + if hasattr(old_embeddings, "_hf_hook"): + hook = old_embeddings._hf_hook + add_hook_to_module(new_embeddings, hook) + new_embeddings.requires_grad_(old_embeddings.weight.requires_grad) + ## update vision encoder's configurations + model.config.image_size = resolution + if hasattr(image_processor, "crop_size"): + # CLIP vision tower + image_processor.crop_size = resolution + else: + # SIGLIP vision tower + assert hasattr(image_processor, "size") + image_processor.size = {"height": resolution, "width": resolution} + ## TODO define a '_reinitialize' method for VisionTower + embeddings.position_embedding = new_embeddings + embeddings.image_size = resolution + embeddings.num_patches = embeddings.num_positions = num_new_tokens + embeddings.position_ids = ( + torch.arange(embeddings.num_positions).expand((1, -1)).to(old_embeddings.weight.device) + ) + + def forward(self, images, **kwargs): + if type(images) is list: + image_features = [] + for image in images: + image_forward_out = self.vision_tower( + image.to(device=self.device, dtype=self.dtype).unsqueeze(0), + output_hidden_states=True, **kwargs, + ) + image_feature = self.feature_select(image_forward_out).to(image.dtype) + image_features.append(image_feature) + else: + image_forward_outs = self.vision_tower( + images.to(device=self.device, dtype=self.dtype), + output_hidden_states=True, **kwargs, + ) + image_features = self.feature_select(image_forward_outs).to(images.dtype) + + return image_features + + @property + def dummy_feature(self): + return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype) + + @property + def dtype(self): + return self.vision_tower.dtype + + @property + def device(self): + return self.vision_tower.device + + @property + def config(self): + if self.is_loaded: + return self.vision_tower.config + else: + return self.cfg_only + + @property + def hidden_size(self): + return self.config.hidden_size + + @property + def num_patches(self): + return (self.config.image_size // self.config.patch_size) ** 2 diff --git a/dam/model/multimodal_encoder/visualize_features.py b/dam/model/multimodal_encoder/visualize_features.py new file mode 100644 index 0000000000000000000000000000000000000000..2686d32dd8924648a56bda948a7ee9a9c918bdad --- /dev/null +++ b/dam/model/multimodal_encoder/visualize_features.py @@ -0,0 +1,352 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import argparse +from collections import defaultdict +import gc +import math +import os +from PIL import Image +import random +from tqdm import tqdm +from typing import Any, Dict, Iterable, List, Tuple + +#import cv2 +import numpy as np +import torch +from torch import nn +import torch.distributed as dist +from torch.utils.data import DataLoader +import torch.nn.functional as F +from torchvision.utils import make_grid + +from einops import rearrange + +from datasets import load_dataset_builder, load_dataset +from datasets.distributed import split_dataset_by_node + +#from common import rank_print, load_model, get_standard_transform, collate +# +#try: +# import wandb +#except ImportError: +# wandb = None + + +LAYER_STATS = dict() + + +@torch.inference_mode() +def main(rank: int = 0, world_size: int = 1): + ''' + Computes the RankMe (http://arxiv.org/abs/2210.02885) and LiDAR (http://arxiv.org/abs/2312.04000) + estimates of the rank of the produced embeddings. While RADIO doesn't train in a multi-view setting + which is an assumption of LiDAR, the metric does integrate an important concept of the invariance of the + summary features to different view/augmentations of the same image. + ''' + + local_rank = rank % torch.cuda.device_count() + torch.cuda.set_device(local_rank) + cv2.setNumThreads(1) + + device = torch.device('cuda', local_rank) + parser = argparse.ArgumentParser(description='Compute SSL embedding rank estimates') + parser.add_argument('-v', '--model-version', default='radio_v2', + help='Which radio model to load.' + ) + parser.add_argument('-d', '--dataset', default='imagenet-1k', + help='The name of the dataset to classify' + ) + parser.add_argument('--split', default='validation', + help='The dataset split to use.' + ) + parser.add_argument('-n', default=10, type=int, help='The number of samples to load') + parser.add_argument('-r', '--resolution', nargs='+', type=int, default=None, + help='The input image resolution.' + ' If one value is specified, the shortest dimension is resized to this.' + ' If two, the image is center cropped.' + ' If not specified, center cropped 378px is used.' + ' Default: The RADIO model\'s preferred resolution.' + ) + parser.add_argument('--resize-multiple', type=int, default=None, + help='Resize images with dimensions a multiple of this value.' + ' This should be equal to the patch size of a ViT (e.g. RADIOv1)' + ) + parser.add_argument('--batch-size', type=int, default=16, + help='The batch size. If the input is variable sized, then this argument becomes a maximum.' + ) + parser.add_argument('--workers', default=8, type=int, help='Number of loader workers to use') + parser.add_argument('--vitdet-window-size', default=None, type=int, help='Enable ViTDet at the specific window size') + parser.add_argument('--output-dir', default='vis_denoise', type=str) + parser.add_argument('--adaptor-name', default=None, type=str, help='Generate features from a teacher adaptor') + + args, _ = parser.parse_known_args() + + torch.manual_seed(42 + rank) + np.random.seed(42 + rank) + random.seed(42 + rank) + + rank_print('Loading model...') + model, preprocessor, info = load_model(args.model_version, vitdet_window_size=args.vitdet_window_size, adaptor_name=args.adaptor_name) + model.to(device=device).eval() + if isinstance(preprocessor, nn.Module): + preprocessor.to(device).eval() + rank_print('Done') + + rank_print('Loading dataset...') + ds_builder = load_dataset_builder(args.dataset, trust_remote_code=True) + + if args.resolution is None: + args.resolution = (model.preferred_resolution.height, model.preferred_resolution.width) + + patch_size = model.patch_size + + if args.resize_multiple is None: + args.resize_multiple = getattr(model, 'min_resolution_step', model.patch_size) + + transform = get_standard_transform(args.resolution, args.resize_multiple) + dataset = ds_builder.as_dataset(split=args.split) + dataset = dataset.to_iterable_dataset(num_shards=world_size * max(1, args.workers)) + dataset = split_dataset_by_node(dataset, rank=rank, world_size=world_size) + dataset = dataset.map(lambda ex: dict(image=transform(ex['image']), label=torch.as_tensor(ex['label'], dtype=torch.int64))) + + loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, collate_fn=collate, + pin_memory=args.workers > 0, + drop_last=False, + ) + rank_print('Done') + rank_print(f'Description: {ds_builder.info.description}') + + dirs = dict( + orig=os.path.join(args.output_dir, 'orig'), + viz=os.path.join(args.output_dir, 'viz'), + sbs=os.path.join(args.output_dir, 'sbs'), + ) + + for d in dirs.values(): + os.makedirs(d, exist_ok=True) + + ctr = 0 + for batches in loader: + if ctr >= args.n: + break + + for images, _ in batches: + images = images.to(device=device, non_blocking=True) + + all_feat = [] + with torch.autocast(device.type, dtype=torch.bfloat16): + p_images = preprocessor(images) + + output = model(p_images) + if args.adaptor_name: + all_feat = [ + output['backbone'].features, + output[args.adaptor_name].features, + ] + else: + all_feat = [output[1]] + + all_feat = torch.stack(all_feat, dim=1) + + num_rows = images.shape[-2] // patch_size + num_cols = images.shape[-1] // patch_size + + all_feat = rearrange(all_feat, 'b m (h w) c -> b m h w c', h=num_rows, w=num_cols).float() + + for i, feats in enumerate(all_feat): + colored = [] + for features in feats: + color = get_pca_map(features, images.shape[-2:]) + colored.append(color) + + orig = cv2.cvtColor(images[i].permute(1, 2, 0).cpu().numpy(), cv2.COLOR_RGB2BGR) + + cv2.imwrite(f'{dirs["orig"]}/vis_{ctr}.jpg', orig * 255) + cv2.imwrite(f'{dirs["viz"]}/vis_{ctr}.jpg', colored[-1] * 255) + + op = np.concatenate([orig] + colored, axis=1) * 255 + + cv2.imwrite(f'{dirs["sbs"]}/vis_{ctr}.jpg', op) + ctr += 1 + + +def get_robust_pca(features: torch.Tensor, m: float = 2, remove_first_component=False): + # features: (N, C) + # m: a hyperparam controlling how many std dev outside for outliers + assert len(features.shape) == 2, "features should be (N, C)" + reduction_mat = torch.pca_lowrank(features, q=3, niter=20)[2] + colors = features @ reduction_mat + if remove_first_component: + colors_min = colors.min(dim=0).values + colors_max = colors.max(dim=0).values + tmp_colors = (colors - colors_min) / (colors_max - colors_min) + fg_mask = tmp_colors[..., 0] < 0.2 + reduction_mat = torch.pca_lowrank(features[fg_mask], q=3, niter=20)[2] + colors = features @ reduction_mat + else: + fg_mask = torch.ones_like(colors[:, 0]).bool() + d = torch.abs(colors[fg_mask] - torch.median(colors[fg_mask], dim=0).values) + mdev = torch.median(d, dim=0).values + s = d / mdev + try: + rins = colors[fg_mask][s[:, 0] < m, 0] + gins = colors[fg_mask][s[:, 1] < m, 1] + bins = colors[fg_mask][s[:, 2] < m, 2] + rgb_min = torch.tensor([rins.min(), gins.min(), bins.min()]) + rgb_max = torch.tensor([rins.max(), gins.max(), bins.max()]) + except: + rins = colors + gins = colors + bins = colors + rgb_min = torch.tensor([rins.min(), gins.min(), bins.min()]) + rgb_max = torch.tensor([rins.max(), gins.max(), bins.max()]) + + return reduction_mat, rgb_min.to(reduction_mat), rgb_max.to(reduction_mat) + + +def get_pca_map( + feature_map: torch.Tensor, + img_size, + interpolation="bicubic", + return_pca_stats=False, + pca_stats=None, +): + """ + feature_map: (1, h, w, C) is the feature map of a single image. + """ + feature_map = feature_map.float() + if feature_map.shape[0] != 1: + # make it (1, h, w, C) + feature_map = feature_map[None] + if pca_stats is None: + reduct_mat, color_min, color_max = get_robust_pca( + feature_map.reshape(-1, feature_map.shape[-1]) + ) + else: + reduct_mat, color_min, color_max = pca_stats + pca_color = feature_map @ reduct_mat + pca_color = (pca_color - color_min) / (color_max - color_min) + pca_color = F.interpolate( + pca_color.permute(0, 3, 1, 2), + size=img_size, + mode=interpolation, + ).permute(0, 2, 3, 1) + pca_color = pca_color.clamp(0, 1) + pca_color = pca_color.cpu().numpy().squeeze(0) + if return_pca_stats: + return pca_color, (reduct_mat, color_min, color_max) + return pca_color + + +def get_scale_map( + scalar_map: torch.Tensor, + img_size, + interpolation="nearest", +): + """ + scalar_map: (1, h, w, C) is the feature map of a single image. + """ + if scalar_map.shape[0] != 1: + scalar_map = scalar_map[None] + scalar_map = (scalar_map - scalar_map.min()) / ( + scalar_map.max() - scalar_map.min() + 1e-6 + ) + scalar_map = F.interpolate( + scalar_map.permute(0, 3, 1, 2), + size=img_size, + mode=interpolation, + ).permute(0, 2, 3, 1) + # cmap = plt.get_cmap("viridis") + # scalar_map = cmap(scalar_map)[..., :3] + # make it 3 channels + scalar_map = torch.cat([scalar_map] * 3, dim=-1) + scalar_map = scalar_map.cpu().numpy().squeeze(0) + return scalar_map + + +def get_similarity_map(features: torch.Tensor, img_size=(224, 224)): + """ + compute the similarity map of the central patch to the rest of the image + """ + assert len(features.shape) == 4, "features should be (1, C, H, W)" + H, W, C = features.shape[1:] + center_patch_feature = features[0, H // 2, W // 2, :] + center_patch_feature_normalized = center_patch_feature / center_patch_feature.norm() + center_patch_feature_normalized = center_patch_feature_normalized.unsqueeze(1) + # Reshape and normalize the entire feature tensor + features_flat = features.view(-1, C) + features_normalized = features_flat / features_flat.norm(dim=1, keepdim=True) + + similarity_map_flat = features_normalized @ center_patch_feature_normalized + # Reshape the flat similarity map back to the spatial dimensions (H, W) + similarity_map = similarity_map_flat.view(H, W) + + # Normalize the similarity map to be in the range [0, 1] for visualization + similarity_map = (similarity_map - similarity_map.min()) / ( + similarity_map.max() - similarity_map.min() + ) + # we don't want the center patch to be the most similar + similarity_map[H // 2, W // 2] = -1.0 + similarity_map = ( + F.interpolate( + similarity_map.unsqueeze(0).unsqueeze(0), + size=img_size, + mode="bilinear", + ) + .squeeze(0) + .squeeze(0) + ) + + similarity_map_np = similarity_map.cpu().numpy() + negative_mask = similarity_map_np < 0 + + colormap = plt.get_cmap("turbo") + + # Apply the colormap directly to the normalized similarity map and multiply by 255 to get RGB values + similarity_map_rgb = colormap(similarity_map_np)[..., :3] + similarity_map_rgb[negative_mask] = [1.0, 0.0, 0.0] + return similarity_map_rgb + + +def get_cluster_map( + feature_map: torch.Tensor, + img_size, + num_clusters=10, +) -> torch.Tensor: + kmeans = KMeans(n_clusters=num_clusters, distance=CosineSimilarity, verbose=False) + if feature_map.shape[0] != 1: + # make it (1, h, w, C) + feature_map = feature_map[None] + labels = kmeans.fit_predict( + feature_map.reshape(1, -1, feature_map.shape[-1]) + ).float() + labels = ( + F.interpolate( + labels.reshape(1, *feature_map.shape[:-1]), size=img_size, mode="nearest" + ) + .squeeze() + .cpu() + .numpy() + ).astype(int) + cmap = plt.get_cmap("rainbow", num_clusters) + cluster_map = cmap(labels)[..., :3] + return cluster_map.reshape(img_size[0], img_size[1], 3) + + +if __name__ == '__main__': + rank = 0 + world_size = 1 + + # if 'WORLD_SIZE' in os.environ: + # dist.init_process_group(backend='nccl') + # rank = dist.get_rank() + # world_size = dist.get_world_size() + + main(rank, world_size) diff --git a/dam/model/multimodal_projector/__pycache__/base_projector.cpython-310.pyc b/dam/model/multimodal_projector/__pycache__/base_projector.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79e97b982781bdc77b2977b753b76d25f9516333 Binary files /dev/null and b/dam/model/multimodal_projector/__pycache__/base_projector.cpython-310.pyc differ diff --git a/dam/model/multimodal_projector/__pycache__/builder.cpython-310.pyc b/dam/model/multimodal_projector/__pycache__/builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8a594d43020794429e8849e46f9b1cba8393d83 Binary files /dev/null and b/dam/model/multimodal_projector/__pycache__/builder.cpython-310.pyc differ diff --git a/dam/model/multimodal_projector/base_projector.py b/dam/model/multimodal_projector/base_projector.py new file mode 100644 index 0000000000000000000000000000000000000000..ed2ae1bedc15144940e0089c1a672563d1e43415 --- /dev/null +++ b/dam/model/multimodal_projector/base_projector.py @@ -0,0 +1,100 @@ +import torch.nn as nn +import re +import torch +from transformers import AutoConfig, AutoModel, PretrainedConfig, PreTrainedModel + + +class IdentityMap(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, *args, **kwargs): + return x + + @property + def config(self): + return {"mm_projector_type": "identity"} + + +class SimpleResBlock(nn.Module): + def __init__(self, channels): + super().__init__() + self.pre_norm = nn.LayerNorm(channels) + + self.proj = nn.Sequential( + nn.Linear(channels, channels), nn.GELU(), nn.Linear(channels, channels) + ) + + def forward(self, x): + x = self.pre_norm(x) + return x + self.proj(x) + + +class DownSampleBlock(nn.Module): + + def forward(self, x): + vit_embeds = x + h = w = int(vit_embeds.shape[1] ** 0.5) + vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], h, w, -1) + vit_embeds = self.flat_square(vit_embeds) + vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], -1, vit_embeds.shape[-1]) + return vit_embeds + + def flat_square(self, x): + n, w, h, c = x.size() + if w % 2 == 1: + x = torch.concat([x, torch.zeros((n, 1, h, c), dtype=x.dtype).to(x.device)], dim=1).contiguous() + n, w, h, c = x.size() + if h % 2 == 1: + x = torch.concat([x, torch.zeros((n, w, 1, c), dtype=x.dtype).to(x.device)], dim=2).contiguous() + n, w, h, c = x.size() + x = x.view(n, w, int(h / 2), int(c * 2)) + x = x.permute(0, 2, 1, 3).contiguous() + x = x.view(n, int(h / 2), int(w / 2), int(c * 4)) + return x + +class MultimodalProjectorConfig(PretrainedConfig): + model_type = "v2l_projector" + + def __init__(self, mm_projector_type: str=None, **kwargs): + super().__init__() + self.mm_projector_type = mm_projector_type + + +class MultimodalProjector(PreTrainedModel): + config_class = MultimodalProjectorConfig + + def __init__( + self, mm_projector_cfg: MultimodalProjectorConfig, config: PretrainedConfig + ): + super().__init__(mm_projector_cfg) + mm_projector_type = mm_projector_cfg.mm_projector_type + if mm_projector_type == "identity": + self.layers = IdentityMap() + elif mm_projector_type == "linear": + self.layers = nn.Linear(config.mm_hidden_size, config.hidden_size) + elif mm_projector_type == "mlp_downsample": + self.layers = nn.Sequential( + DownSampleBlock(), + nn.LayerNorm(config.mm_hidden_size * 4), + nn.Linear(config.mm_hidden_size * 4, config.hidden_size), + nn.GELU(), + nn.Linear(config.hidden_size, config.hidden_size) + ) + else: + mlp_gelu_match = re.match(r"^mlp(\d+)x_gelu$", mm_projector_type) + if mlp_gelu_match: + mlp_depth = int(mlp_gelu_match.group(1)) + modules = [nn.Linear(config.mm_hidden_size, config.hidden_size)] + for _ in range(1, mlp_depth): + modules.append(nn.GELU()) + modules.append(nn.Linear(config.hidden_size, config.hidden_size)) + self.layers = nn.Sequential(*modules) + else: + raise ValueError(f"Unknown projector type: {mm_projector_type}") + + def forward(self, x, *args, **kwargs): + return self.layers(x) + +AutoConfig.register("v2l_projector", MultimodalProjectorConfig) +AutoModel.register(MultimodalProjectorConfig, MultimodalProjector) \ No newline at end of file diff --git a/dam/model/multimodal_projector/builder.py b/dam/model/multimodal_projector/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f2854975b3a0f36eb242920503d30a81710603e9 --- /dev/null +++ b/dam/model/multimodal_projector/builder.py @@ -0,0 +1,30 @@ +# This file is modified from https://github.com/haotian-liu/LLaVA/ + +import os +import torch + +from .base_projector import MultimodalProjectorConfig, MultimodalProjector +from transformers import PretrainedConfig, PreTrainedModel + + +def build_mm_projector( + model_type_or_path: str, config: PretrainedConfig +) -> PreTrainedModel: + if model_type_or_path is None: + return None + + ## load from pretrained model + if config.resume_path: + assert os.path.exists( + model_type_or_path + ), f"Resume mm projector path {model_type_or_path} does not exist!" + return MultimodalProjector.from_pretrained( + model_type_or_path, config, torch_dtype=eval(config.model_dtype) + ) + ## build from scratch + else: + mm_projector_cfg = MultimodalProjectorConfig(model_type_or_path) + mm_projector = MultimodalProjector(mm_projector_cfg, config).to( + eval(config.model_dtype) + ) + return mm_projector diff --git a/dam/model/utils.py b/dam/model/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..47c74caac63c585dadc0c39f509d38409477d87f --- /dev/null +++ b/dam/model/utils.py @@ -0,0 +1,98 @@ +# Copyright 2024 NVIDIA CORPORATION & AFFILIATES +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# This file is modified from https://github.com/haotian-liu/LLaVA/ +import os, os.path as osp +from transformers import AutoConfig +from transformers import PretrainedConfig +from huggingface_hub import snapshot_download, repo_exists +from huggingface_hub.utils import validate_repo_id, HFValidationError + +def get_model_config(config): + # `mask_encoder_cfg` and `context_provider_cfg` are optional + default_keys = ["llm_cfg", "vision_tower_cfg", "mm_projector_cfg", "mask_encoder_cfg", "context_provider_cfg"] + + if hasattr(config, "_name_or_path") and len(config._name_or_path) >= 2: + root_path = config._name_or_path + else: + root_path = config.resume_path + + # download from huggingface + if root_path is not None and not osp.exists(root_path): + try: + valid_hf_repo = repo_exists(root_path) + except HFValidationError as e: + valid_hf_repo = False + if valid_hf_repo: + root_path = snapshot_download(root_path) + + return_list = [] + for key in default_keys: + cfg = getattr(config, key, None) + if isinstance(cfg, dict): + try: + return_list.append(os.path.join(root_path, key[:-4])) + except: + raise ValueError(f"Cannot find resume path in config for {key}!") + elif isinstance(cfg, PretrainedConfig): + return_list.append(os.path.join(root_path, key[:-4])) + elif isinstance(cfg, str): + return_list.append(cfg) + elif cfg is None: + # We still return even if the cfg is None or does not exist + return_list.append(cfg) + + return return_list + + +def is_mm_model(model_path): + """ + Check if the model at the given path is a visual language model. + + Args: + model_path (str): The path to the model. + + Returns: + bool: True if the model is an MM model, False otherwise. + """ + config = AutoConfig.from_pretrained(model_path) + architectures = config.architectures + for architecture in architectures: + if "llava" in architecture.lower(): + return True + return False + + +def auto_upgrade(config): + cfg = AutoConfig.from_pretrained(config) + if "llava" in config and "llava" not in cfg.model_type: + assert cfg.model_type == "llama" + print( + "You are using newer LLaVA code base, while the checkpoint of v0 is from older code base." + ) + print( + "You must upgrade the checkpoint to the new code base (this can be done automatically)." + ) + confirm = input("Please confirm that you want to upgrade the checkpoint. [Y/N]") + if confirm.lower() in ["y", "yes"]: + print("Upgrading checkpoint...") + assert len(cfg.architectures) == 1 + setattr(cfg.__class__, "model_type", "llava") + cfg.architectures[0] = "LlavaLlamaForCausalLM" + cfg.save_pretrained(config) + print("Checkpoint upgraded.") + else: + print("Checkpoint upgrade aborted.") + exit(1) diff --git a/dist/examples/1.jpg b/dist/examples/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f80eb273dacf05b3c6054ca08cef64628161067b --- /dev/null +++ b/dist/examples/1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5062538fc67074179eb884fb1d514854af6e759bc8ac623f94035835472937e +size 221810 diff --git a/dist/examples/10.jpg b/dist/examples/10.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6644fd3133ff430035467a9271e4995f580dcf68 --- /dev/null +++ b/dist/examples/10.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f222d08703255914ed6cbda7d0c5fd8b772d7b975f3ffd73ee47f24f7eaabe +size 490556 diff --git a/dist/examples/11.jpg b/dist/examples/11.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a8bba89114d6ecd9d3bc306832dab61e98f0a1f6 --- /dev/null +++ b/dist/examples/11.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5fe5c49581143bd2b7a46cda4ad845aab653525a745becc0fb138121e2f18b +size 581820 diff --git a/dist/examples/12.jpg b/dist/examples/12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..74ad0bcbc6ffd840cf697e2e155beb61ba1fe491 --- /dev/null +++ b/dist/examples/12.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0b7ffd8be90c36c1cb9089b45a0ad0025e1f3e0f492ba443ce4617816a4293 +size 807826 diff --git a/dist/examples/13.jpg b/dist/examples/13.jpg new file mode 100644 index 0000000000000000000000000000000000000000..383379ad4b9d9728067ffb9da26c0f1d67bfc583 --- /dev/null +++ b/dist/examples/13.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a6f7231ef75268579c8bf1147f4a93743e252f268a0e16b71be589267d9b93 +size 395727 diff --git a/dist/examples/14.jpg b/dist/examples/14.jpg new file mode 100644 index 0000000000000000000000000000000000000000..67520d5fa6d0c0f82e22a7cddc422582c51efc87 --- /dev/null +++ b/dist/examples/14.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a935a30e764ac5bb68bafb97f3e41a666683c6e1bffa1cb85f24a3b6ea60308d +size 478925 diff --git a/dist/examples/15.jpg b/dist/examples/15.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6290ef3999652ea8440c242cadc8e7ec574e1dfd --- /dev/null +++ b/dist/examples/15.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b29517d3a6457bf8bd0b83a80cbeb24c2466bf3e5804bd503ebe60e430d784 +size 448722 diff --git a/dist/examples/16.jpg b/dist/examples/16.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1dd6b79d4b8b501252e94a81bfd63815e1cc65f1 --- /dev/null +++ b/dist/examples/16.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127d96b506fceef4d5cda79115fa153a8c7bc566100e72a0f24331f1e6e6bfa5 +size 586775 diff --git a/dist/examples/17.jpg b/dist/examples/17.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ec2a7a81a3309dcc230a3b00948c0d3d0573d065 --- /dev/null +++ b/dist/examples/17.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9744b2d6d964b869519ba38bd5f9ddf04c9965272e965b94987651fd136d8cd +size 337456 diff --git a/dist/examples/18.jpg b/dist/examples/18.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f377c64d7db74f697fcf97483faccab7feae2ee3 --- /dev/null +++ b/dist/examples/18.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a838a228e75c7418c087cc3afc7682447bc43fc69be48879eef749abe8c432e0 +size 399792 diff --git a/dist/examples/19.jpg b/dist/examples/19.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d0a6e1e314a1ec875684b78b577f5f7c50a3897c --- /dev/null +++ b/dist/examples/19.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e1d00cf80c608e900afbefe5d1bd70ce7e8f221c252e5e931297a4051a93df +size 740774 diff --git a/dist/examples/2.jpg b/dist/examples/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9870800e31a802d152f4b569fceb1753e970f423 --- /dev/null +++ b/dist/examples/2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9011049db02799c9bf68ba228445968a4dc2d097df8f3559c4e18a8a09a4f7f +size 500692 diff --git a/dist/examples/20.jpg b/dist/examples/20.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3dae2d9b9f28523c49d3ec890540d73a0f581934 --- /dev/null +++ b/dist/examples/20.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a8d9c57310b74ba148a22ae0d0073987126e1c91d73b97761a87bc309a4527 +size 315608 diff --git a/dist/examples/21.jpg b/dist/examples/21.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a8bba89114d6ecd9d3bc306832dab61e98f0a1f6 --- /dev/null +++ b/dist/examples/21.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5fe5c49581143bd2b7a46cda4ad845aab653525a745becc0fb138121e2f18b +size 581820 diff --git a/dist/examples/3.jpg b/dist/examples/3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a6b898f4558d34b4a3fcd44dcffda58bbea2b942 --- /dev/null +++ b/dist/examples/3.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5159bf7114d08967f95475176670043115b157bf700efa34190260cd917662 +size 1025438 diff --git a/dist/examples/4.jpg b/dist/examples/4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..231d5237cdd8700383ab39bc4c2fe12180fc5d7b --- /dev/null +++ b/dist/examples/4.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39174b4188bc6d928cf0153f0d3a3224e15c9823f8cdc99b4ad6627067741bb8 +size 707645 diff --git a/dist/examples/5.jpg b/dist/examples/5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..db9215dfbaefa5f1c64c03dd1b928de1c6117ff8 --- /dev/null +++ b/dist/examples/5.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02c393a23aadd1304497e3a9b41144df166d1cfda33ea3e00eed94e27da3aa4 +size 1372251 diff --git a/dist/examples/6.jpg b/dist/examples/6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..552b2fb89d997e0352feab99fd493c892a746a4d --- /dev/null +++ b/dist/examples/6.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580b829320a4d09f101b0abc9c03ef65476a6697f6f6f2b1c5aa785b3af6ee9c +size 158279 diff --git a/dist/examples/7.jpg b/dist/examples/7.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cffbe5ccb6baa239a3a970655dac89cd31e73274 --- /dev/null +++ b/dist/examples/7.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286b3a5693322edf01870a561e35016ed46a7cb4b9194c58e2f3526eab1f9efc +size 376329 diff --git a/dist/examples/8.jpg b/dist/examples/8.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b357441a21b0798d1f27a3a1f3812966acb678dc --- /dev/null +++ b/dist/examples/8.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da217a04901a4461d1bc76c08ef20be72f3b4c4161e69689ecd72f27e6deef5 +size 277103 diff --git a/dist/examples/9.jpg b/dist/examples/9.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e24986628df0542b3a0a872860487a433ff2199 --- /dev/null +++ b/dist/examples/9.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb5acb53dfc78e74008d113b22f5a2fb1e2c7b33cb8eadf4983d709bfe366ba +size 335061 diff --git a/dist/index.html b/dist/index.html new file mode 100644 index 0000000000000000000000000000000000000000..2f653d76c418cd727f1ed986507d65ada26b2a87 --- /dev/null +++ b/dist/index.html @@ -0,0 +1 @@ +Describe Anything Demo
\ No newline at end of file diff --git a/dist/js/bundle.81bfe2a9b31f7f903131.min.js b/dist/js/bundle.81bfe2a9b31f7f903131.min.js new file mode 100644 index 0000000000000000000000000000000000000000..cae5ed0f884bd1ac65f68f9fff69f5d428b1f6fb --- /dev/null +++ b/dist/js/bundle.81bfe2a9b31f7f903131.min.js @@ -0,0 +1,3 @@ +/*! For license information please see bundle.81bfe2a9b31f7f903131.min.js.LICENSE.txt */ +(()=>{var __webpack_modules__={49:(e,t,n)=>{"use strict";n.d(t,{A:()=>s});var r=n(354),o=n.n(r),i=n(314),a=n.n(i)()(o());a.push([e.id,"*, ::before, ::after {\n --tw-border-spacing-x: 0;\n --tw-border-spacing-y: 0;\n --tw-translate-x: 0;\n --tw-translate-y: 0;\n --tw-rotate: 0;\n --tw-skew-x: 0;\n --tw-skew-y: 0;\n --tw-scale-x: 1;\n --tw-scale-y: 1;\n --tw-pan-x: ;\n --tw-pan-y: ;\n --tw-pinch-zoom: ;\n --tw-scroll-snap-strictness: proximity;\n --tw-gradient-from-position: ;\n --tw-gradient-via-position: ;\n --tw-gradient-to-position: ;\n --tw-ordinal: ;\n --tw-slashed-zero: ;\n --tw-numeric-figure: ;\n --tw-numeric-spacing: ;\n --tw-numeric-fraction: ;\n --tw-ring-inset: ;\n --tw-ring-offset-width: 0px;\n --tw-ring-offset-color: #fff;\n --tw-ring-color: rgba(59, 130, 246, 0.5);\n --tw-ring-offset-shadow: 0 0 rgba(0,0,0,0);\n --tw-ring-shadow: 0 0 rgba(0,0,0,0);\n --tw-shadow: 0 0 rgba(0,0,0,0);\n --tw-shadow-colored: 0 0 rgba(0,0,0,0);\n --tw-blur: ;\n --tw-brightness: ;\n --tw-contrast: ;\n --tw-grayscale: ;\n --tw-hue-rotate: ;\n --tw-invert: ;\n --tw-saturate: ;\n --tw-sepia: ;\n --tw-drop-shadow: ;\n --tw-backdrop-blur: ;\n --tw-backdrop-brightness: ;\n --tw-backdrop-contrast: ;\n --tw-backdrop-grayscale: ;\n --tw-backdrop-hue-rotate: ;\n --tw-backdrop-invert: ;\n --tw-backdrop-opacity: ;\n --tw-backdrop-saturate: ;\n --tw-backdrop-sepia: ;\n --tw-contain-size: ;\n --tw-contain-layout: ;\n --tw-contain-paint: ;\n --tw-contain-style: ;\n}\n\n::backdrop {\n --tw-border-spacing-x: 0;\n --tw-border-spacing-y: 0;\n --tw-translate-x: 0;\n --tw-translate-y: 0;\n --tw-rotate: 0;\n --tw-skew-x: 0;\n --tw-skew-y: 0;\n --tw-scale-x: 1;\n --tw-scale-y: 1;\n --tw-pan-x: ;\n --tw-pan-y: ;\n --tw-pinch-zoom: ;\n --tw-scroll-snap-strictness: proximity;\n --tw-gradient-from-position: ;\n --tw-gradient-via-position: ;\n --tw-gradient-to-position: ;\n --tw-ordinal: ;\n --tw-slashed-zero: ;\n --tw-numeric-figure: ;\n --tw-numeric-spacing: ;\n --tw-numeric-fraction: ;\n --tw-ring-inset: ;\n --tw-ring-offset-width: 0px;\n --tw-ring-offset-color: #fff;\n --tw-ring-color: rgba(59, 130, 246, 0.5);\n --tw-ring-offset-shadow: 0 0 rgba(0,0,0,0);\n --tw-ring-shadow: 0 0 rgba(0,0,0,0);\n --tw-shadow: 0 0 rgba(0,0,0,0);\n --tw-shadow-colored: 0 0 rgba(0,0,0,0);\n --tw-blur: ;\n --tw-brightness: ;\n --tw-contrast: ;\n --tw-grayscale: ;\n --tw-hue-rotate: ;\n --tw-invert: ;\n --tw-saturate: ;\n --tw-sepia: ;\n --tw-drop-shadow: ;\n --tw-backdrop-blur: ;\n --tw-backdrop-brightness: ;\n --tw-backdrop-contrast: ;\n --tw-backdrop-grayscale: ;\n --tw-backdrop-hue-rotate: ;\n --tw-backdrop-invert: ;\n --tw-backdrop-opacity: ;\n --tw-backdrop-saturate: ;\n --tw-backdrop-sepia: ;\n --tw-contain-size: ;\n --tw-contain-layout: ;\n --tw-contain-paint: ;\n --tw-contain-style: ;\n}/*\n! tailwindcss v3.4.14 | MIT License | https://tailwindcss.com\n*//*\n1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)\n2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)\n*/\n\n*,\n::before,\n::after {\n box-sizing: border-box; /* 1 */\n border-width: 0; /* 2 */\n border-style: solid; /* 2 */\n border-color: #e5e7eb; /* 2 */\n}\n\n::before,\n::after {\n --tw-content: '';\n}\n\n/*\n1. Use a consistent sensible line-height in all browsers.\n2. Prevent adjustments of font size after orientation changes in iOS.\n3. Use a more readable tab size.\n4. Use the user's configured `sans` font-family by default.\n5. Use the user's configured `sans` font-feature-settings by default.\n6. Use the user's configured `sans` font-variation-settings by default.\n7. Disable tap highlights on iOS\n*/\n\nhtml,\n:host {\n line-height: 1.5; /* 1 */\n -webkit-text-size-adjust: 100%; /* 2 */\n -moz-tab-size: 4; /* 3 */\n -o-tab-size: 4;\n tab-size: 4; /* 3 */\n font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, sans-serif, \"Apple Color Emoji\", \"Segoe UI Emoji\", \"Segoe UI Symbol\", \"Noto Color Emoji\"; /* 4 */\n font-feature-settings: normal; /* 5 */\n font-variation-settings: normal; /* 6 */\n -webkit-tap-highlight-color: transparent; /* 7 */\n}\n\n/*\n1. Remove the margin in all browsers.\n2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.\n*/\n\nbody {\n margin: 0; /* 1 */\n line-height: inherit; /* 2 */\n}\n\n/*\n1. Add the correct height in Firefox.\n2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)\n3. Ensure horizontal rules are visible by default.\n*/\n\nhr {\n height: 0; /* 1 */\n color: inherit; /* 2 */\n border-top-width: 1px; /* 3 */\n}\n\n/*\nAdd the correct text decoration in Chrome, Edge, and Safari.\n*/\n\nabbr:where([title]) {\n text-decoration: underline;\n -webkit-text-decoration: underline dotted currentColor;\n text-decoration: underline dotted currentColor;\n}\n\n/*\nRemove the default font size and weight for headings.\n*/\n\nh1,\nh2,\nh3,\nh4,\nh5,\nh6 {\n font-size: inherit;\n font-weight: inherit;\n}\n\n/*\nReset links to optimize for opt-in styling instead of opt-out.\n*/\n\na {\n color: inherit;\n text-decoration: inherit;\n}\n\n/*\nAdd the correct font weight in Edge and Safari.\n*/\n\nb,\nstrong {\n font-weight: bolder;\n}\n\n/*\n1. Use the user's configured `mono` font-family by default.\n2. Use the user's configured `mono` font-feature-settings by default.\n3. Use the user's configured `mono` font-variation-settings by default.\n4. Correct the odd `em` font sizing in all browsers.\n*/\n\ncode,\nkbd,\nsamp,\npre {\n font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, \"Liberation Mono\", \"Courier New\", monospace; /* 1 */\n font-feature-settings: normal; /* 2 */\n font-variation-settings: normal; /* 3 */\n font-size: 1em; /* 4 */\n}\n\n/*\nAdd the correct font size in all browsers.\n*/\n\nsmall {\n font-size: 80%;\n}\n\n/*\nPrevent `sub` and `sup` elements from affecting the line height in all browsers.\n*/\n\nsub,\nsup {\n font-size: 75%;\n line-height: 0;\n position: relative;\n vertical-align: baseline;\n}\n\nsub {\n bottom: -0.25em;\n}\n\nsup {\n top: -0.5em;\n}\n\n/*\n1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)\n2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)\n3. Remove gaps between table borders by default.\n*/\n\ntable {\n text-indent: 0; /* 1 */\n border-color: inherit; /* 2 */\n border-collapse: collapse; /* 3 */\n}\n\n/*\n1. Change the font styles in all browsers.\n2. Remove the margin in Firefox and Safari.\n3. Remove default padding in all browsers.\n*/\n\nbutton,\ninput,\noptgroup,\nselect,\ntextarea {\n font-family: inherit; /* 1 */\n font-feature-settings: inherit; /* 1 */\n font-variation-settings: inherit; /* 1 */\n font-size: 100%; /* 1 */\n font-weight: inherit; /* 1 */\n line-height: inherit; /* 1 */\n letter-spacing: inherit; /* 1 */\n color: inherit; /* 1 */\n margin: 0; /* 2 */\n padding: 0; /* 3 */\n}\n\n/*\nRemove the inheritance of text transform in Edge and Firefox.\n*/\n\nbutton,\nselect {\n text-transform: none;\n}\n\n/*\n1. Correct the inability to style clickable types in iOS and Safari.\n2. Remove default button styles.\n*/\n\nbutton,\ninput:where([type='button']),\ninput:where([type='reset']),\ninput:where([type='submit']) {\n -webkit-appearance: button; /* 1 */\n background-color: transparent; /* 2 */\n background-image: none; /* 2 */\n}\n\n/*\nUse the modern Firefox focus style for all focusable elements.\n*/\n\n:-moz-focusring {\n outline: auto;\n}\n\n/*\nRemove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)\n*/\n\n:-moz-ui-invalid {\n box-shadow: none;\n}\n\n/*\nAdd the correct vertical alignment in Chrome and Firefox.\n*/\n\nprogress {\n vertical-align: baseline;\n}\n\n/*\nCorrect the cursor style of increment and decrement buttons in Safari.\n*/\n\n::-webkit-inner-spin-button,\n::-webkit-outer-spin-button {\n height: auto;\n}\n\n/*\n1. Correct the odd appearance in Chrome and Safari.\n2. Correct the outline style in Safari.\n*/\n\n[type='search'] {\n -webkit-appearance: textfield; /* 1 */\n outline-offset: -2px; /* 2 */\n}\n\n/*\nRemove the inner padding in Chrome and Safari on macOS.\n*/\n\n::-webkit-search-decoration {\n -webkit-appearance: none;\n}\n\n/*\n1. Correct the inability to style clickable types in iOS and Safari.\n2. Change font properties to `inherit` in Safari.\n*/\n\n::-webkit-file-upload-button {\n -webkit-appearance: button; /* 1 */\n font: inherit; /* 2 */\n}\n\n/*\nAdd the correct display in Chrome and Safari.\n*/\n\nsummary {\n display: list-item;\n}\n\n/*\nRemoves the default spacing and border for appropriate elements.\n*/\n\nblockquote,\ndl,\ndd,\nh1,\nh2,\nh3,\nh4,\nh5,\nh6,\nhr,\nfigure,\np,\npre {\n margin: 0;\n}\n\nfieldset {\n margin: 0;\n padding: 0;\n}\n\nlegend {\n padding: 0;\n}\n\nol,\nul,\nmenu {\n list-style: none;\n margin: 0;\n padding: 0;\n}\n\n/*\nReset default styling for dialogs.\n*/\ndialog {\n padding: 0;\n}\n\n/*\nPrevent resizing textareas horizontally by default.\n*/\n\ntextarea {\n resize: vertical;\n}\n\n/*\n1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)\n2. Set the default placeholder color to the user's configured gray 400 color.\n*/\n\ninput::-moz-placeholder, textarea::-moz-placeholder {\n opacity: 1; /* 1 */\n color: #9ca3af; /* 2 */\n}\n\ninput::placeholder,\ntextarea::placeholder {\n opacity: 1; /* 1 */\n color: #9ca3af; /* 2 */\n}\n\n/*\nSet the default cursor for buttons.\n*/\n\nbutton,\n[role=\"button\"] {\n cursor: pointer;\n}\n\n/*\nMake sure disabled buttons don't get the pointer cursor.\n*/\n:disabled {\n cursor: default;\n}\n\n/*\n1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)\n2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)\n This can trigger a poorly considered lint error in some tools but is included by design.\n*/\n\nimg,\nsvg,\nvideo,\ncanvas,\naudio,\niframe,\nembed,\nobject {\n display: block; /* 1 */\n vertical-align: middle; /* 2 */\n}\n\n/*\nConstrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)\n*/\n\nimg,\nvideo {\n max-width: 100%;\n height: auto;\n}\n\n/* Make elements with the HTML hidden attribute stay hidden by default */\n[hidden]:where(:not([hidden=\"until-found\"])) {\n display: none;\n}\n.container {\n width: 100%;\n}\n@media (min-width: 640px) {\n\n .container {\n max-width: 640px;\n }\n}\n@media (min-width: 768px) {\n\n .container {\n max-width: 768px;\n }\n}\n@media (min-width: 1024px) {\n\n .container {\n max-width: 1024px;\n }\n}\n@media (min-width: 1280px) {\n\n .container {\n max-width: 1280px;\n }\n}\n@media (min-width: 1536px) {\n\n .container {\n max-width: 1536px;\n }\n}\n.pointer-events-none {\n pointer-events: none;\n}\n.fixed {\n position: fixed;\n}\n.absolute {\n position: absolute;\n}\n.relative {\n position: relative;\n}\n.inset-0 {\n top: 0px;\n right: 0px;\n bottom: 0px;\n left: 0px;\n}\n.left-0 {\n left: 0px;\n}\n.right-0 {\n right: 0px;\n}\n.right-4 {\n right: 1rem;\n}\n.top-1\\/2 {\n top: 50%;\n}\n.top-4 {\n top: 1rem;\n}\n.z-10 {\n z-index: 10;\n}\n.z-50 {\n z-index: 50;\n}\n.mx-4 {\n margin-left: 1rem;\n margin-right: 1rem;\n}\n.mb-2 {\n margin-bottom: 0.5rem;\n}\n.mb-4 {\n margin-bottom: 1rem;\n}\n.mb-6 {\n margin-bottom: 1.5rem;\n}\n.mb-8 {\n margin-bottom: 2rem;\n}\n.mt-4 {\n margin-top: 1rem;\n}\n.flex {\n display: flex;\n}\n.hidden {\n display: none;\n}\n.h-16 {\n height: 4rem;\n}\n.h-6 {\n height: 1.5rem;\n}\n.h-8 {\n height: 2rem;\n}\n.h-\\[150px\\] {\n height: 150px;\n}\n.h-full {\n height: 100%;\n}\n.h-screen {\n height: 100vh;\n}\n.max-h-\\[calc\\(100vh-300px\\)\\] {\n max-height: calc(100vh - 300px);\n}\n.max-h-full {\n max-height: 100%;\n}\n.w-16 {\n width: 4rem;\n}\n.w-6 {\n width: 1.5rem;\n}\n.w-8 {\n width: 2rem;\n}\n.w-\\[200px\\] {\n width: 200px;\n}\n.w-full {\n width: 100%;\n}\n.max-w-\\[2200px\\] {\n max-width: 2200px;\n}\n.max-w-full {\n max-width: 100%;\n}\n.max-w-md {\n max-width: 28rem;\n}\n.flex-1 {\n flex: 1 1 0%;\n}\n.flex-grow {\n flex-grow: 1;\n}\n.-translate-y-1\\/2 {\n --tw-translate-y: -50%;\n transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));\n}\n.cursor-not-allowed {\n cursor: not-allowed;\n}\n.cursor-pointer {\n cursor: pointer;\n}\n.resize {\n resize: both;\n}\n.flex-col {\n flex-direction: column;\n}\n.flex-wrap {\n flex-wrap: wrap;\n}\n.items-center {\n align-items: center;\n}\n.justify-center {\n justify-content: center;\n}\n.justify-between {\n justify-content: space-between;\n}\n.gap-2 {\n gap: 0.5rem;\n}\n.gap-4 {\n gap: 1rem;\n}\n.gap-6 {\n gap: 1.5rem;\n}\n.gap-8 {\n gap: 2rem;\n}\n.space-y-4 > :not([hidden]) ~ :not([hidden]) {\n --tw-space-y-reverse: 0;\n margin-top: calc(1rem * (1 - var(--tw-space-y-reverse)));\n margin-top: calc(1rem * calc(1 - var(--tw-space-y-reverse)));\n margin-bottom: calc(1rem * var(--tw-space-y-reverse));\n}\n.overflow-hidden {\n overflow: hidden;\n}\n.rounded {\n border-radius: 0.25rem;\n}\n.rounded-full {\n border-radius: 9999px;\n}\n.rounded-lg {\n border-radius: 0.5rem;\n}\n.rounded-sm {\n border-radius: 0.125rem;\n}\n.border-4 {\n border-width: 4px;\n}\n.border-b {\n border-bottom-width: 1px;\n}\n.border-dashed {\n border-style: dashed;\n}\n.border-blue-500 {\n --tw-border-opacity: 1;\n border-color: rgba(59, 130, 246, var(--tw-border-opacity));\n}\n.border-gray-300 {\n --tw-border-opacity: 1;\n border-color: rgba(209, 213, 219, var(--tw-border-opacity));\n}\n.bg-black {\n --tw-bg-opacity: 1;\n background-color: rgba(0, 0, 0, var(--tw-bg-opacity));\n}\n.bg-blue-50 {\n --tw-bg-opacity: 1;\n background-color: rgba(239, 246, 255, var(--tw-bg-opacity));\n}\n.bg-blue-500 {\n --tw-bg-opacity: 1;\n background-color: rgba(59, 130, 246, var(--tw-bg-opacity));\n}\n.bg-gray-500 {\n --tw-bg-opacity: 1;\n background-color: rgba(107, 114, 128, var(--tw-bg-opacity));\n}\n.bg-red-100 {\n --tw-bg-opacity: 1;\n background-color: rgba(254, 226, 226, var(--tw-bg-opacity));\n}\n.bg-red-600 {\n --tw-bg-opacity: 1;\n background-color: rgba(220, 38, 38, var(--tw-bg-opacity));\n}\n.bg-white {\n --tw-bg-opacity: 1;\n background-color: rgba(255, 255, 255, var(--tw-bg-opacity));\n}\n.bg-opacity-50 {\n --tw-bg-opacity: 0.5;\n}\n.bg-opacity-75 {\n --tw-bg-opacity: 0.75;\n}\n.object-contain {\n -o-object-fit: contain;\n object-fit: contain;\n}\n.object-cover {\n -o-object-fit: cover;\n object-fit: cover;\n}\n.p-4 {\n padding: 1rem;\n}\n.p-6 {\n padding: 1.5rem;\n}\n.p-8 {\n padding: 2rem;\n}\n.px-16 {\n padding-left: 4rem;\n padding-right: 4rem;\n}\n.px-4 {\n padding-left: 1rem;\n padding-right: 1rem;\n}\n.px-8 {\n padding-left: 2rem;\n padding-right: 2rem;\n}\n.py-2 {\n padding-top: 0.5rem;\n padding-bottom: 0.5rem;\n}\n.pl-5 {\n padding-left: 1.25rem;\n}\n.pr-5 {\n padding-right: 1.25rem;\n}\n.text-left {\n text-align: left;\n}\n.text-center {\n text-align: center;\n}\n.text-3xl {\n font-size: 1.875rem;\n line-height: 2.25rem;\n}\n.text-lg {\n font-size: 1.125rem;\n line-height: 1.75rem;\n}\n.text-sm {\n font-size: 0.875rem;\n line-height: 1.25rem;\n}\n.font-bold {\n font-weight: 700;\n}\n.font-medium {\n font-weight: 500;\n}\n.font-semibold {\n font-weight: 600;\n}\n.text-gray-300 {\n --tw-text-opacity: 1;\n color: rgba(209, 213, 219, var(--tw-text-opacity));\n}\n.text-gray-500 {\n --tw-text-opacity: 1;\n color: rgba(107, 114, 128, var(--tw-text-opacity));\n}\n.text-gray-600 {\n --tw-text-opacity: 1;\n color: rgba(75, 85, 99, var(--tw-text-opacity));\n}\n.text-gray-800 {\n --tw-text-opacity: 1;\n color: rgba(31, 41, 55, var(--tw-text-opacity));\n}\n.text-gray-900 {\n --tw-text-opacity: 1;\n color: rgba(17, 24, 39, var(--tw-text-opacity));\n}\n.text-red-600 {\n --tw-text-opacity: 1;\n color: rgba(220, 38, 38, var(--tw-text-opacity));\n}\n.text-white {\n --tw-text-opacity: 1;\n color: rgba(255, 255, 255, var(--tw-text-opacity));\n}\n.underline {\n text-decoration-line: underline;\n}\n.opacity-40 {\n opacity: 0.4;\n}\n.shadow-lg {\n --tw-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -4px rgba(0, 0, 0, 0.1);\n --tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);\n box-shadow: 0 0 rgba(0,0,0,0), 0 0 rgba(0,0,0,0), var(--tw-shadow);\n box-shadow: var(--tw-ring-offset-shadow, 0 0 rgba(0,0,0,0)), var(--tw-ring-shadow, 0 0 rgba(0,0,0,0)), var(--tw-shadow);\n}\n.shadow-xl {\n --tw-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 8px 10px -6px rgba(0, 0, 0, 0.1);\n --tw-shadow-colored: 0 20px 25px -5px var(--tw-shadow-color), 0 8px 10px -6px var(--tw-shadow-color);\n box-shadow: 0 0 rgba(0,0,0,0), 0 0 rgba(0,0,0,0), var(--tw-shadow);\n box-shadow: var(--tw-ring-offset-shadow, 0 0 rgba(0,0,0,0)), var(--tw-ring-shadow, 0 0 rgba(0,0,0,0)), var(--tw-shadow);\n}\n.filter {\n filter: var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);\n}\n.transition-colors {\n transition-property: color, background-color, border-color, text-decoration-color, fill, stroke;\n transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);\n transition-duration: 150ms;\n}\n.transition-opacity {\n transition-property: opacity;\n transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);\n transition-duration: 150ms;\n}\n\n.fixed {\n position: fixed;\n}\n\n.inset-0 {\n top: 0;\n right: 0;\n bottom: 0;\n left: 0;\n}\n\n.bg-opacity-75 {\n --tw-bg-opacity: 0.75;\n}\n\n.z-50 {\n z-index: 50;\n}\n\n.description-container {\n margin: 20px;\n display: flex;\n gap: 20px;\n height: 140px;\n}\n\n.description-box {\n flex: 1;\n background-color: #f5f5f5;\n border-radius: 4px;\n padding: 15px;\n margin-bottom: 0;\n color: #333;\n overflow-y: auto;\n}\n\n.description-box.describing, .description-box.ready {\n background-color: #e9ecef;\n color: #6c757d;\n font-style: italic;\n }\n\n.description-controls {\n display: flex;\n flex-direction: column;\n justify-content: space-between;\n width: 200px;\n gap: 10px;\n}\n\n.description-controls button {\n padding: 8px 16px;\n border: none;\n border-radius: 4px;\n background-color: #007bff;\n color: white;\n cursor: pointer;\n white-space: nowrap;\n }\n\n.description-controls button:disabled {\n background-color: #cccccc;\n cursor: not-allowed;\n }\n\n.description-controls button:hover:not(:disabled) {\n background-color: #0056b3;\n }\n\n.description-controls button.reset-button {\n background-color: #007bff;\n }\n\n.description-controls button.reset-button:hover:not(:disabled) {\n background-color: #0056b3;\n }\n\n.description-controls button.reset-button:disabled {\n background-color: #cccccc;\n }\n\n#root {\n display: flex;\n flex-direction: column;\n height: 100vh;\n}\n\n.stage-container {\n flex: 1;\n min-height: 0;\n display: flex;\n align-items: center;\n justify-content: center;\n overflow: hidden;\n}\n\n.hover\\:bg-blue-700:hover {\n --tw-bg-opacity: 1;\n background-color: rgba(29, 78, 216, var(--tw-bg-opacity));\n}\n\n.hover\\:bg-red-700:hover {\n --tw-bg-opacity: 1;\n background-color: rgba(185, 28, 28, var(--tw-bg-opacity));\n}\n\n.hover\\:text-gray-800:hover {\n --tw-text-opacity: 1;\n color: rgba(31, 41, 55, var(--tw-text-opacity));\n}\n\n.hover\\:opacity-80:hover {\n opacity: 0.8;\n}\n\n@media (min-width: 640px) {\n\n .sm\\:flex-row {\n flex-direction: row;\n }\n\n .sm\\:space-x-8 > :not([hidden]) ~ :not([hidden]) {\n --tw-space-x-reverse: 0;\n margin-right: calc(2rem * var(--tw-space-x-reverse));\n margin-left: calc(2rem * (1 - var(--tw-space-x-reverse)));\n margin-left: calc(2rem * calc(1 - var(--tw-space-x-reverse)));\n }\n\n .sm\\:text-left {\n text-align: left;\n }\n}\n","",{version:3,sources:["webpack://./src/assets/scss/App.scss"],names:[],mappings:"AAAA;EAAA,wBAAc;EAAd,wBAAc;EAAd,mBAAc;EAAd,mBAAc;EAAd,cAAc;EAAd,cAAc;EAAd,cAAc;EAAd,eAAc;EAAd,eAAc;EAAd,aAAc;EAAd,aAAc;EAAd,kBAAc;EAAd,sCAAc;EAAd,8BAAc;EAAd,6BAAc;EAAd,4BAAc;EAAd,eAAc;EAAd,oBAAc;EAAd,sBAAc;EAAd,uBAAc;EAAd,wBAAc;EAAd,kBAAc;EAAd,2BAAc;EAAd,4BAAc;EAAd,wCAAc;EAAd,0CAAc;EAAd,mCAAc;EAAd,8BAAc;EAAd,sCAAc;EAAd,YAAc;EAAd,kBAAc;EAAd,gBAAc;EAAd,iBAAc;EAAd,kBAAc;EAAd,cAAc;EAAd,gBAAc;EAAd,aAAc;EAAd,mBAAc;EAAd,qBAAc;EAAd,2BAAc;EAAd,yBAAc;EAAd,0BAAc;EAAd,2BAAc;EAAd,uBAAc;EAAd,wBAAc;EAAd,yBAAc;EAAd,sBAAc;EAAd,oBAAc;EAAd,sBAAc;EAAd,qBAAc;EAAd;AAAc;;AAAd;EAAA,wBAAc;EAAd,wBAAc;EAAd,mBAAc;EAAd,mBAAc;EAAd,cAAc;EAAd,cAAc;EAAd,cAAc;EAAd,eAAc;EAAd,eAAc;EAAd,aAAc;EAAd,aAAc;EAAd,kBAAc;EAAd,sCAAc;EAAd,8BAAc;EAAd,6BAAc;EAAd,4BAAc;EAAd,eAAc;EAAd,oBAAc;EAAd,sBAAc;EAAd,uBAAc;EAAd,wBAAc;EAAd,kBAAc;EAAd,2BAAc;EAAd,4BAAc;EAAd,wCAAc;EAAd,0CAAc;EAAd,mCAAc;EAAd,8BAAc;EAAd,sCAAc;EAAd,YAAc;EAAd,kBAAc;EAAd,gBAAc;EAAd,iBAAc;EAAd,kBAAc;EAAd,cAAc;EAAd,gBAAc;EAAd,aAAc;EAAd,mBAAc;EAAd,qBAAc;EAAd,2BAAc;EAAd,yBAAc;EAAd,0BAAc;EAAd,2BAAc;EAAd,uBAAc;EAAd,wBAAc;EAAd,yBAAc;EAAd,sBAAc;EAAd,oBAAc;EAAd,sBAAc;EAAd,qBAAc;EAAd;AAAc,CAAd;;CAAc,CAAd;;;CAAc;;AAAd;;;EAAA,sBAAc,EAAd,MAAc;EAAd,eAAc,EAAd,MAAc;EAAd,mBAAc,EAAd,MAAc;EAAd,qBAAc,EAAd,MAAc;AAAA;;AAAd;;EAAA,gBAAc;AAAA;;AAAd;;;;;;;;CAAc;;AAAd;;EAAA,gBAAc,EAAd,MAAc;EAAd,8BAAc,EAAd,MAAc;EAAd,gBAAc,EAAd,MAAc;EAAd,cAAc;KAAd,WAAc,EAAd,MAAc;EAAd,8LAAc,EAAd,MAAc;EAAd,6BAAc,EAAd,MAAc;EAAd,+BAAc,EAAd,MAAc;EAAd,wCAAc,EAAd,MAAc;AAAA;;AAAd;;;CAAc;;AAAd;EAAA,SAAc,EAAd,MAAc;EAAd,oBAAc,EAAd,MAAc;AAAA;;AAAd;;;;CAAc;;AAAd;EAAA,SAAc,EAAd,MAAc;EAAd,cAAc,EAAd,MAAc;EAAd,qBAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,0BAAc;EAAd,sDAAc;UAAd,8CAAc;AAAA;;AAAd;;CAAc;;AAAd;;;;;;EAAA,kBAAc;EAAd,oBAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,cAAc;EAAd,wBAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,mBAAc;AAAA;;AAAd;;;;;CAAc;;AAAd;;;;EAAA,+GAAc,EAAd,MAAc;EAAd,6BAAc,EAAd,MAAc;EAAd,+BAAc,EAAd,MAAc;EAAd,cAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,cAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,cAAc;EAAd,cAAc;EAAd,kBAAc;EAAd,wBAAc;AAAA;;AAAd;EAAA,eAAc;AAAA;;AAAd;EAAA,WAAc;AAAA;;AAAd;;;;CAAc;;AAAd;EAAA,cAAc,EAAd,MAAc;EAAd,qBAAc,EAAd,MAAc;EAAd,yBAAc,EAAd,MAAc;AAAA;;AAAd;;;;CAAc;;AAAd;;;;;EAAA,oBAAc,EAAd,MAAc;EAAd,8BAAc,EAAd,MAAc;EAAd,gCAAc,EAAd,MAAc;EAAd,eAAc,EAAd,MAAc;EAAd,oBAAc,EAAd,MAAc;EAAd,oBAAc,EAAd,MAAc;EAAd,uBAAc,EAAd,MAAc;EAAd,cAAc,EAAd,MAAc;EAAd,SAAc,EAAd,MAAc;EAAd,UAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,oBAAc;AAAA;;AAAd;;;CAAc;;AAAd;;;;EAAA,0BAAc,EAAd,MAAc;EAAd,6BAAc,EAAd,MAAc;EAAd,sBAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,aAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,gBAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,wBAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,YAAc;AAAA;;AAAd;;;CAAc;;AAAd;EAAA,6BAAc,EAAd,MAAc;EAAd,oBAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,wBAAc;AAAA;;AAAd;;;CAAc;;AAAd;EAAA,0BAAc,EAAd,MAAc;EAAd,aAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,kBAAc;AAAA;;AAAd;;CAAc;;AAAd;;;;;;;;;;;;;EAAA,SAAc;AAAA;;AAAd;EAAA,SAAc;EAAd,UAAc;AAAA;;AAAd;EAAA,UAAc;AAAA;;AAAd;;;EAAA,gBAAc;EAAd,SAAc;EAAd,UAAc;AAAA;;AAAd;;CAAc;AAAd;EAAA,UAAc;AAAA;;AAAd;;CAAc;;AAAd;EAAA,gBAAc;AAAA;;AAAd;;;CAAc;;AAAd;EAAA,UAAc,EAAd,MAAc;EAAd,cAAc,EAAd,MAAc;AAAA;;AAAd;;EAAA,UAAc,EAAd,MAAc;EAAd,cAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,eAAc;AAAA;;AAAd;;CAAc;AAAd;EAAA,eAAc;AAAA;;AAAd;;;;CAAc;;AAAd;;;;;;;;EAAA,cAAc,EAAd,MAAc;EAAd,sBAAc,EAAd,MAAc;AAAA;;AAAd;;CAAc;;AAAd;;EAAA,eAAc;EAAd,YAAc;AAAA;;AAAd,wEAAc;AAAd;EAAA,aAAc;AAAA;AACd;EAAA;AAAoB;AAApB;;EAAA;IAAA;EAAoB;AAAA;AAApB;;EAAA;IAAA;EAAoB;AAAA;AAApB;;EAAA;IAAA;EAAoB;AAAA;AAApB;;EAAA;IAAA;EAAoB;AAAA;AAApB;;EAAA;IAAA;EAAoB;AAAA;AACpB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,QAAmB;EAAnB,UAAmB;EAAnB,WAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,iBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,sBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,uBAAmB;EAAnB,wDAAmB;EAAnB,4DAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,sBAAmB;EAAnB;AAAmB;AAAnB;EAAA,sBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,sBAAmB;KAAnB;AAAmB;AAAnB;EAAA,oBAAmB;KAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,kBAAmB;EAAnB;AAAmB;AAAnB;EAAA,mBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,mBAAmB;EAAnB;AAAmB;AAAnB;EAAA,mBAAmB;EAAnB;AAAmB;AAAnB;EAAA,mBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA,oBAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,mFAAmB;EAAnB,mGAAmB;EAAnB,kEAAmB;EAAnB;AAAmB;AAAnB;EAAA,oFAAmB;EAAnB,oGAAmB;EAAnB,kEAAmB;EAAnB;AAAmB;AAAnB;EAAA;AAAmB;AAAnB;EAAA,+FAAmB;EAAnB,wDAAmB;EAAnB;AAAmB;AAAnB;EAAA,4BAAmB;EAAnB,wDAAmB;EAAnB;AAAmB;;AAEnB;EACE,eAAe;AACjB;;AAEA;EACE,MAAM;EACN,QAAQ;EACR,SAAS;EACT,OAAO;AACT;;AAEA;EACE,qBAAqB;AACvB;;AAEA;EACE,WAAW;AACb;;AAEA;EACE,YAAY;EACZ,aAAa;EACb,SAAS;EACT,aAAa;AACf;;AAEA;EACE,OAAO;EACP,yBAAyB;EACzB,kBAAkB;EAClB,aAAa;EACb,gBAAgB;EAChB,WAAW;EACX,gBAAgB;AAOlB;;AALE;IACE,yBAAyB;IACzB,cAAc;IACd,kBAAkB;EACpB;;AAGF;EACE,aAAa;EACb,sBAAsB;EACtB,8BAA8B;EAC9B,YAAY;EACZ,SAAS;AAgCX;;AA9BE;IACE,iBAAiB;IACjB,YAAY;IACZ,kBAAkB;IAClB,yBAAyB;IACzB,YAAY;IACZ,eAAe;IACf,mBAAmB;EAsBrB;;AApBE;MACE,yBAAyB;MACzB,mBAAmB;IACrB;;AAEA;MACE,yBAAyB;IAC3B;;AAEA;QACI,yBAAyB;IAS7B;;AAPE;QACE,yBAAyB;MAC3B;;AAEA;QACE,yBAAyB;MAC3B;;AAKN;EACE,aAAa;EACb,sBAAsB;EACtB,aAAa;AACf;;AAEA;EACE,OAAO;EACP,aAAa;EACb,aAAa;EACb,mBAAmB;EACnB,uBAAuB;EACvB,gBAAgB;AAClB;;AAlGA;EAAA,kBAmGA;EAnGA;AAmGA;;AAnGA;EAAA,kBAmGA;EAnGA;AAmGA;;AAnGA;EAAA,oBAmGA;EAnGA;AAmGA;;AAnGA;EAAA;AAmGA;;AAnGA;;EAAA;IAAA;EAmGA;;EAnGA;IAAA,uBAmGA;IAnGA,oDAmGA;IAnGA,yDAmGA;IAnGA;EAmGA;;EAnGA;IAAA;EAmGA;AAAA",sourcesContent:["@tailwind base;\n@tailwind components;\n@tailwind utilities;\n\n.fixed {\n position: fixed;\n}\n\n.inset-0 {\n top: 0;\n right: 0;\n bottom: 0;\n left: 0;\n}\n\n.bg-opacity-75 {\n --tw-bg-opacity: 0.75;\n}\n\n.z-50 {\n z-index: 50;\n}\n\n.description-container {\n margin: 20px;\n display: flex;\n gap: 20px;\n height: 140px;\n}\n\n.description-box {\n flex: 1;\n background-color: #f5f5f5;\n border-radius: 4px;\n padding: 15px;\n margin-bottom: 0;\n color: #333;\n overflow-y: auto;\n \n &.describing, &.ready {\n background-color: #e9ecef;\n color: #6c757d;\n font-style: italic;\n }\n}\n\n.description-controls {\n display: flex;\n flex-direction: column;\n justify-content: space-between;\n width: 200px;\n gap: 10px;\n \n button {\n padding: 8px 16px;\n border: none;\n border-radius: 4px;\n background-color: #007bff;\n color: white;\n cursor: pointer;\n white-space: nowrap;\n \n &:disabled {\n background-color: #cccccc;\n cursor: not-allowed;\n }\n \n &:hover:not(:disabled) {\n background-color: #0056b3;\n }\n\n &.reset-button {\n background-color: #007bff;\n \n &:hover:not(:disabled) {\n background-color: #0056b3;\n }\n \n &:disabled {\n background-color: #cccccc;\n }\n }\n }\n}\n\n#root {\n display: flex;\n flex-direction: column;\n height: 100vh;\n}\n\n.stage-container {\n flex: 1;\n min-height: 0;\n display: flex;\n align-items: center;\n justify-content: center;\n overflow: hidden;\n}\n"],sourceRoot:""}]);const s=a},314:e=>{"use strict";e.exports=function(e){var t=[];return t.toString=function(){return this.map((function(t){var n="",r=void 0!==t[5];return t[4]&&(n+="@supports (".concat(t[4],") {")),t[2]&&(n+="@media ".concat(t[2]," {")),r&&(n+="@layer".concat(t[5].length>0?" ".concat(t[5]):""," {")),n+=e(t),r&&(n+="}"),t[2]&&(n+="}"),t[4]&&(n+="}"),n})).join("")},t.i=function(e,n,r,o,i){"string"==typeof e&&(e=[[null,e,void 0]]);var a={};if(r)for(var s=0;s0?" ".concat(c[5]):""," {").concat(c[1],"}")),c[5]=i),n&&(c[2]?(c[1]="@media ".concat(c[2]," {").concat(c[1],"}"),c[2]=n):c[2]=n),o&&(c[4]?(c[1]="@supports (".concat(c[4],") {").concat(c[1],"}"),c[4]=o):c[4]="".concat(o)),t.push(c))}},t}},354:e=>{"use strict";e.exports=function(e){var t=e[1],n=e[3];if(!n)return t;if("function"==typeof btoa){var r=btoa(unescape(encodeURIComponent(JSON.stringify(n)))),o="sourceMappingURL=data:application/json;charset=utf-8;base64,".concat(r),i="/*# ".concat(o," */");return[t].concat([i]).join("\n")}return[t].join("\n")}},450:(e,t,n)=>{"use strict";n.r(t),n.d(t,{InferenceSession:()=>h,Tensor:()=>d,env:()=>a,registerBackend:()=>i});const r={},o=[],i=(e,t,n)=>{if(!t||"function"!=typeof t.init||"function"!=typeof t.createSessionHandler)throw new TypeError("not a valid backend");{const i=r[e];if(void 0===i)r[e]={backend:t,priority:n};else{if(i.priority>n)return;if(i.priority===n&&i.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${n}`)}if(n>=0){const t=o.indexOf(e);-1!==t&&o.splice(t,1);for(let t=0;t{let t=1;for(let n=0;n{const o=document.createElement("canvas"),i=o.getContext("2d");if(!e||!i)return r();const a=new Image;a.crossOrigin="Anonymous",a.src=e,a.onload=()=>{o.width=a.width,o.height=a.height,i.drawImage(a,0,0,o.width,o.height);const e=i.getImageData(0,0,o.width,o.height);if(void 0!==t){if(void 0!==t.height&&t.height!==o.height)throw new Error("Image input config height doesn't match ImageBitmap height");if(s.height=o.height,void 0!==t.width&&t.width!==o.width)throw new Error("Image input config width doesn't match ImageBitmap width");s.width=o.width}else s.height=o.height,s.width=o.width;n(f.bufferToTensor(e.data,s))}}));throw new Error("Input data provided is not supported - aborted tensor creation")}{const n="RGBA";let r,o;if(void 0!==t&&void 0!==t.resizedWidth&&void 0!==t.resizedHeight?(r=t.resizedHeight,o=t.resizedWidth):(r=e.height,o=e.width),void 0!==t){if(s=t,void 0!==t.bitmapFormat&&t.bitmapFormat!==n)throw new Error("Image input config format must be RGBA for ImageData");s.bitmapFormat="RGBA"}else s.bitmapFormat="RGBA";if(s.height=r,s.width=o,void 0!==t){const t=document.createElement("canvas");t.width=o,t.height=r;const n=t.getContext("2d");if(null==n)throw new Error("Can not access image data");n.putImageData(e,0,0),a=n.getImageData(0,0,o,r).data}else a=e.data}}if(void 0!==a)return f.bufferToTensor(a,s);throw new Error("Input data provided is not supported - aborted tensor creation")}toImageData(e){var t,n;const r=document.createElement("canvas").getContext("2d");let o;if(null==r)throw new Error("Can not access image data");{const i=this.dims[3],a=this.dims[2],s=this.dims[1],u=void 0!==e&&void 0!==e.format?e.format:"RGB",l=void 0!==e&&void 0!==(null===(t=e.norm)||void 0===t?void 0:t.mean)?e.norm.mean:255,c=void 0!==e&&void 0!==(null===(n=e.norm)||void 0===n?void 0:n.bias)?e.norm.bias:0,f=a*i;if(void 0!==e){if(void 0!==e.height&&e.height!==a)throw new Error("Image output config height doesn't match tensor height");if(void 0!==e.width&&e.width!==i)throw new Error("Image output config width doesn't match tensor width");if(void 0!==e.format&&4===s&&"RGBA"!==e.format||3===s&&"RGB"!==e.format&&"BGR"!==e.format)throw new Error("Tensor format doesn't match input tensor dims")}const d=4;let p=0,h=1,g=2,m=3,b=0,y=f,A=2*f,v=-1;"RGBA"===u?(b=0,y=f,A=2*f,v=3*f):"RGB"===u?(b=0,y=f,A=2*f):"RBG"===u&&(b=0,A=f,y=2*f),o=r.createImageData(i,a);for(let e=0;e=r.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${r.byteLength}).`);if(u=e.byteLength-o,"number"==typeof n){if(u=n,!Number.isSafeInteger(u))throw new RangeError("'byteLength' must be an integer.");if(u<=0||o+u>r.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${r.byteLength-o}].`);if("object"==typeof i&&null!==i)s=i;else if(void 0!==i)throw new TypeError("'options' must be an object.")}else if(void 0!==n)throw new TypeError("'byteLength' must be a number.")}else if(void 0!==t)throw new TypeError("'options' must be an object.");a=new Uint8Array(r,o,u)}}const u=(s.executionProviders||[]).map((e=>"string"==typeof e?e:e.name)),l=await(async e=>{const t=0===e.length?o:e,n=[];for(const e of t){const t=r[e];if(t){if(t.initialized)return t.backend;if(t.aborted)continue;const r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init()),await t.initPromise,t.initialized=!0,t.backend}catch(o){r||n.push({name:e,err:o}),t.aborted=!0}finally{delete t.initPromise}}}throw new Error(`no available backend found. ERR: ${n.map((e=>`[${e.name}] ${e.err}`)).join(", ")}`)})(u),c=await l.createSessionHandler(a,s);return new p(c)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}}const h=p},264:(module,__unused_webpack_exports,__webpack_require__)=>{var process=__webpack_require__(606),e;self,e=__WEBPACK_EXTERNAL_MODULE__1670__=>(()=>{var __webpack_modules__={3474:(e,t,n)=>{var r,o=(r=(r="undefined"!=typeof document&&document.currentScript?document.currentScript.src:void 0)||"/index.js",function(e){function t(){return D.buffer!=B&&W(D.buffer),$}function o(){return D.buffer!=B&&W(D.buffer),R}function i(){return D.buffer!=B&&W(D.buffer),L}function a(){return D.buffer!=B&&W(D.buffer),F}function s(){return D.buffer!=B&&W(D.buffer),M}var u,l,c;e=e||{},u||(u=void 0!==e?e:{}),u.ready=new Promise((function(e,t){l=e,c=t}));var f,d,p,h,g,m,b=Object.assign({},u),y="./this.program",A=(e,t)=>{throw t},v="object"==typeof window,w="function"==typeof importScripts,_="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node,x=u.ENVIRONMENT_IS_PTHREAD||!1,T="";function E(e){return u.locateFile?u.locateFile(e,T):T+e}if(_){let t;T=w?n(908).dirname(T)+"/":"//",m=()=>{g||(h=n(1384),g=n(908))},f=function(e,t){return m(),e=g.normalize(e),h.readFileSync(e,t?void 0:"utf8")},p=e=>((e=f(e,!0)).buffer||(e=new Uint8Array(e)),e),d=(e,t,n)=>{m(),e=g.normalize(e),h.readFile(e,(function(e,r){e?n(e):t(r.buffer)}))},1{if(J())throw process.exitCode=e,t;t instanceof le||P("exiting due to exception: "+t),process.exit(e)},u.inspect=function(){return"[Emscripten Module object]"};try{t=n(9925)}catch(e){throw console.error('The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?'),e}n.g.Worker=t.Worker}else(v||w)&&(w?T=self.location.href:"undefined"!=typeof document&&document.currentScript&&(T=document.currentScript.src),r&&(T=r),T=0!==T.indexOf("blob:")?T.substr(0,T.replace(/[?#].*/,"").lastIndexOf("/")+1):"",_||(f=e=>{var t=new XMLHttpRequest;return t.open("GET",e,!1),t.send(null),t.responseText},w&&(p=e=>{var t=new XMLHttpRequest;return t.open("GET",e,!1),t.responseType="arraybuffer",t.send(null),new Uint8Array(t.response)}),d=(e,t,n)=>{var r=new XMLHttpRequest;r.open("GET",e,!0),r.responseType="arraybuffer",r.onload=()=>{200==r.status||0==r.status&&r.response?t(r.response):n()},r.onerror=n,r.send(null)}));_&&"undefined"==typeof performance&&(n.g.performance=n(6953).performance);var S=console.log.bind(console),O=console.warn.bind(console);_&&(m(),S=e=>h.writeSync(1,e+"\n"),O=e=>h.writeSync(2,e+"\n"));var k,I=u.print||S,P=u.printErr||O;Object.assign(u,b),b=null,u.thisProgram&&(y=u.thisProgram),u.quit&&(A=u.quit),u.wasmBinary&&(k=u.wasmBinary);var C=u.noExitRuntime||!1;"object"!=typeof WebAssembly&&ie("no native wasm support detected");var D,N,B,$,R,L,F,M,j=!1,U="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0;function z(e,t,n){var r=(t>>>=0)+n;for(n=t;e[n]&&!(n>=r);)++n;if(16(o=224==(240&o)?(15&o)<<12|i<<6|a:(7&o)<<18|i<<12|a<<6|63&e[t++])?r+=String.fromCharCode(o):(o-=65536,r+=String.fromCharCode(55296|o>>10,56320|1023&o))}}else r+=String.fromCharCode(o)}return r}function V(e,t){return(e>>>=0)?z(o(),e,t):""}function G(e,t,n,r){if(!(0>>=0;r=n+r-1;for(var i=0;i=a&&(a=65536+((1023&a)<<10)|1023&e.charCodeAt(++i)),127>=a){if(n>=r)break;t[n++>>>0]=a}else{if(2047>=a){if(n+1>=r)break;t[n++>>>0]=192|a>>6}else{if(65535>=a){if(n+2>=r)break;t[n++>>>0]=224|a>>12}else{if(n+3>=r)break;t[n++>>>0]=240|a>>18,t[n++>>>0]=128|a>>12&63}t[n++>>>0]=128|a>>6&63}t[n++>>>0]=128|63&a}}return t[n>>>0]=0,n-o}function H(e){for(var t=0,n=0;n=r?t++:2047>=r?t+=2:55296<=r&&57343>=r?(t+=4,++n):t+=3}return t}function W(e){B=e,u.HEAP8=$=new Int8Array(e),u.HEAP16=new Int16Array(e),u.HEAP32=L=new Int32Array(e),u.HEAPU8=R=new Uint8Array(e),u.HEAPU16=new Uint16Array(e),u.HEAPU32=F=new Uint32Array(e),u.HEAPF32=new Float32Array(e),u.HEAPF64=M=new Float64Array(e)}x&&(B=u.buffer);var q=u.INITIAL_MEMORY||16777216;if(x)D=u.wasmMemory,B=u.buffer;else if(u.wasmMemory)D=u.wasmMemory;else if(!((D=new WebAssembly.Memory({initial:q/65536,maximum:65536,shared:!0})).buffer instanceof SharedArrayBuffer))throw P("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),_&&console.log("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and also use a recent version)"),Error("bad memory");D&&(B=D.buffer),q=B.byteLength,W(B);var X,Y=[],K=[],Q=[],Z=[];function J(){return C||!1}function ee(){var e=u.preRun.shift();Y.unshift(e)}var te,ne=0,re=null,oe=null;function ie(e){throw x?postMessage({cmd:"onAbort",arg:e}):u.onAbort&&u.onAbort(e),P(e="Aborted("+e+")"),j=!0,e=new WebAssembly.RuntimeError(e+". Build with -sASSERTIONS for more info."),c(e),e}function ae(){return te.startsWith("data:application/octet-stream;base64,")}function se(){var e=te;try{if(e==te&&k)return new Uint8Array(k);if(p)return p(e);throw"both async and sync fetching of the wasm failed"}catch(e){ie(e)}}te="ort-wasm-threaded.wasm",ae()||(te=E(te));var ue={};function le(e){this.name="ExitStatus",this.message="Program terminated with exit("+e+")",this.status=e}function ce(e){(e=he.Vb[e])||ie(),he.mc(e)}function fe(e){var t=he.Cc();if(!t)return 6;he.ac.push(t),he.Vb[e.Ub]=t,t.Ub=e.Ub;var n={cmd:"run",start_routine:e.Ic,arg:e.zc,pthread_ptr:e.Ub};return t.$b=()=>{n.time=performance.now(),t.postMessage(n,e.Nc)},t.loaded&&(t.$b(),delete t.$b),0}function de(e){if(x)return qe(1,1,e);J()||(he.oc(),u.onExit&&u.onExit(e),j=!0),A(e,new le(e))}function pe(e,t){if(!t&&x)throw me(e),"unwind";J()||x||(bt(),ge(Q),mt(0),rt[1].length&&ot(1,10),rt[2].length&&ot(2,10),he.oc()),de(e)}var he={Yb:[],ac:[],qc:[],Vb:{},fc:function(){x&&he.Ec()},Pc:function(){},Ec:function(){he.receiveObjectTransfer=he.Gc,he.threadInitTLS=he.pc,he.setExitStatus=he.nc,C=!1},nc:function(){},oc:function(){for(var e of Object.values(he.Vb))he.mc(e);for(e of he.Yb)e.terminate();he.Yb=[]},mc:function(e){var t=e.Ub;delete he.Vb[t],he.Yb.push(e),he.ac.splice(he.ac.indexOf(e),1),e.Ub=0,_t(t)},Gc:function(){},pc:function(){he.qc.forEach((e=>e()))},Fc:function(e,t){e.onmessage=n=>{var r=(n=n.data).cmd;if(e.Ub&&(he.Bc=e.Ub),n.targetThread&&n.targetThread!=pt()){var o=he.Vb[n.Qc];o?o.postMessage(n,n.transferList):P('Internal error! Worker sent a message "'+r+'" to target pthread '+n.targetThread+", but that thread no longer exists!")}else"processProxyingQueue"===r?Ue(n.queue):"spawnThread"===r?fe(n):"cleanupThread"===r?ce(n.thread):"killThread"===r?(n=n.thread,r=he.Vb[n],delete he.Vb[n],r.terminate(),_t(n),he.ac.splice(he.ac.indexOf(r),1),r.Ub=0):"cancelThread"===r?he.Vb[n.thread].postMessage({cmd:"cancel"}):"loaded"===r?(e.loaded=!0,t&&t(e),e.$b&&(e.$b(),delete e.$b)):"print"===r?I("Thread "+n.threadId+": "+n.text):"printErr"===r?P("Thread "+n.threadId+": "+n.text):"alert"===r?alert("Thread "+n.threadId+": "+n.text):"setimmediate"===n.target?e.postMessage(n):"onAbort"===r?u.onAbort&&u.onAbort(n.arg):r&&P("worker sent an unknown command "+r);he.Bc=void 0},e.onerror=e=>{throw P("worker sent an error! "+e.filename+":"+e.lineno+": "+e.message),e},_&&(e.on("message",(function(t){e.onmessage({data:t})})),e.on("error",(function(t){e.onerror(t)})),e.on("detachedExit",(function(){}))),e.postMessage({cmd:"load",urlOrBlob:u.mainScriptUrlOrBlob||r,wasmMemory:D,wasmModule:N})},yc:function(){var e=E("ort-wasm-threaded.worker.js");he.Yb.push(new Worker(e))},Cc:function(){return 0==he.Yb.length&&(he.yc(),he.Fc(he.Yb[0])),he.Yb.pop()}};function ge(e){for(;0>2>>>0];e=i()[e+48>>2>>>0],Et(t,t-e),Ot(t)};var be=[];function ye(e){var t=be[e];return t||(e>=be.length&&(be.length=e+1),be[e]=t=X.get(e)),t}u.invokeEntryPoint=function(e,t){e=ye(e)(t),J()?he.nc(e):xt(e)};var Ae,ve,we=[],_e=0,xe=0;function Te(e){this.Zb=e,this.Sb=e-24,this.xc=function(e){a()[this.Sb+4>>2>>>0]=e},this.bc=function(){return a()[this.Sb+4>>2>>>0]},this.wc=function(e){a()[this.Sb+8>>2>>>0]=e},this.Dc=function(){return a()[this.Sb+8>>2>>>0]},this.rc=function(){i()[this.Sb>>2>>>0]=0},this.hc=function(e){e=e?1:0,t()[(this.Sb+12|0)>>>0]=e},this.uc=function(){return 0!=t()[(this.Sb+12|0)>>>0]},this.ic=function(e){e=e?1:0,t()[(this.Sb+13|0)>>>0]=e},this.kc=function(){return 0!=t()[(this.Sb+13|0)>>>0]},this.fc=function(e,t){this.cc(0),this.xc(e),this.wc(t),this.rc(),this.hc(!1),this.ic(!1)},this.sc=function(){Atomics.add(i(),this.Sb>>2,1)},this.Hc=function(){return 1===Atomics.sub(i(),this.Sb>>2,1)},this.cc=function(e){a()[this.Sb+16>>2>>>0]=e},this.tc=function(){return a()[this.Sb+16>>2>>>0]},this.vc=function(){if(Pt(this.bc()))return a()[this.Zb>>2>>>0];var e=this.tc();return 0!==e?e:this.Zb}}function Ee(e){return gt(new Te(e).Sb)}function Se(e,t,n,r){return x?qe(3,1,e,t,n,r):Oe(e,t,n,r)}function Oe(e,t,n,r){if("undefined"==typeof SharedArrayBuffer)return P("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var o=[];return x&&0===o.length?Se(e,t,n,r):(e={Ic:n,Ub:e,zc:r,Nc:o},x?(e.Oc="spawnThread",postMessage(e,o),0):fe(e))}function ke(e,t,n){return x?qe(4,1,e,t,n):0}function Ie(e,t){if(x)return qe(5,1,e,t)}function Pe(e,t){if(x)return qe(6,1,e,t)}function Ce(e,t,n){if(x)return qe(7,1,e,t,n)}function De(e,t,n){return x?qe(8,1,e,t,n):0}function Ne(e,t){if(x)return qe(9,1,e,t)}function Be(e,t,n){if(x)return qe(10,1,e,t,n)}function $e(e,t,n,r){if(x)return qe(11,1,e,t,n,r)}function Re(e,t,n,r){if(x)return qe(12,1,e,t,n,r)}function Le(e,t,n,r){if(x)return qe(13,1,e,t,n,r)}function Fe(e){if(x)return qe(14,1,e)}function Me(e,t){if(x)return qe(15,1,e,t)}function je(e,t,n){if(x)return qe(16,1,e,t,n)}function Ue(e){Atomics.store(i(),e>>2,1),pt()&&wt(e),Atomics.compareExchange(i(),e>>2,1,0)}function ze(e){return a()[e>>>2]+4294967296*i()[e+4>>>2]}function Ve(e,t,n,r,o,i){return x?qe(17,1,e,t,n,r,o,i):-52}function Ge(e,t,n,r,o,i){if(x)return qe(18,1,e,t,n,r,o,i)}function He(e){var n=H(e)+1,r=ht(n);return r&&G(e,t(),r,n),r}function We(e,t,n){function r(e){return(e=e.toTimeString().match(/\(([A-Za-z ]+)\)$/))?e[1]:"GMT"}if(x)return qe(19,1,e,t,n);var o=(new Date).getFullYear(),s=new Date(o,0,1),u=new Date(o,6,1);o=s.getTimezoneOffset();var l=u.getTimezoneOffset(),c=Math.max(o,l);i()[e>>2>>>0]=60*c,i()[t>>2>>>0]=Number(o!=l),e=r(s),t=r(u),e=He(e),t=He(t),l>2>>>0]=e,a()[n+4>>2>>>0]=t):(a()[n>>2>>>0]=t,a()[n+4>>2>>>0]=e)}function qe(e,t){var n=arguments.length-2,r=arguments;return function(e){var t=St();return e=e(),Ot(t),e}((()=>{for(var o=kt(8*n),i=o>>3,a=0;a>>0]=u}return vt(e,n,o,t)}))}u.executeNotifiedProxyingQueue=Ue,ve=_?()=>{var e=process.hrtime();return 1e3*e[0]+e[1]/1e6}:x?()=>performance.now()-u.__performance_now_clock_drift:()=>performance.now();var Xe,Ye=[],Ke={};function Qe(){if(!Xe){var e,t={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:("object"==typeof navigator&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:y||"./this.program"};for(e in Ke)void 0===Ke[e]?delete t[e]:t[e]=Ke[e];var n=[];for(e in t)n.push(e+"="+t[e]);Xe=n}return Xe}function Ze(e,n){if(x)return qe(20,1,e,n);var r=0;return Qe().forEach((function(o,i){var s=n+r;for(i=a()[e+4*i>>2>>>0]=s,s=0;s>>0]=o.charCodeAt(s);t()[(0|i)>>>0]=0,r+=o.length+1})),0}function Je(e,t){if(x)return qe(21,1,e,t);var n=Qe();a()[e>>2>>>0]=n.length;var r=0;return n.forEach((function(e){r+=e.length+1})),a()[t>>2>>>0]=r,0}function et(e){return x?qe(22,1,e):52}function tt(e,t,n,r){return x?qe(23,1,e,t,n,r):52}function nt(e,t,n,r,o){return x?qe(24,1,e,t,n,r,o):70}var rt=[null,[],[]];function ot(e,t){var n=rt[e];0===t||10===t?((1===e?I:P)(z(n,0)),n.length=0):n.push(t)}function it(e,t,n,r){if(x)return qe(25,1,e,t,n,r);for(var i=0,s=0;s>2>>>0],l=a()[t+4>>2>>>0];t+=8;for(var c=0;c>>0]);i+=l}return a()[r>>2>>>0]=i,0}var at=0;function st(e){return 0==e%4&&(0!=e%100||0==e%400)}var ut=[31,29,31,30,31,30,31,31,30,31,30,31],lt=[31,28,31,30,31,30,31,31,30,31,30,31];function ct(e,n,r,o){function a(e,t,n){for(e="number"==typeof e?e.toString():e||"";e.lengthe?-1:0r-e.getDate())){e.setDate(e.getDate()+t);break}t-=r-e.getDate()+1,e.setDate(1),11>n?e.setMonth(n+1):(e.setMonth(0),e.setFullYear(e.getFullYear()+1))}return n=new Date(e.getFullYear()+1,0,4),t=l(new Date(e.getFullYear(),0,4)),n=l(n),0>=u(t,e)?0>=u(n,e)?e.getFullYear()+1:e.getFullYear():e.getFullYear()-1}var f=i()[o+40>>2>>>0];for(var d in o={Lc:i()[o>>2>>>0],Kc:i()[o+4>>2>>>0],dc:i()[o+8>>2>>>0],jc:i()[o+12>>2>>>0],ec:i()[o+16>>2>>>0],Xb:i()[o+20>>2>>>0],Tb:i()[o+24>>2>>>0],Wb:i()[o+28>>2>>>0],Rc:i()[o+32>>2>>>0],Jc:i()[o+36>>2>>>0],Mc:f?V(f):""},r=V(r),f={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})r=r.replace(new RegExp(d,"g"),f[d]);var p="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),h="January February March April May June July August September October November December".split(" ");for(d in f={"%a":function(e){return p[e.Tb].substring(0,3)},"%A":function(e){return p[e.Tb]},"%b":function(e){return h[e.ec].substring(0,3)},"%B":function(e){return h[e.ec]},"%C":function(e){return s((e.Xb+1900)/100|0,2)},"%d":function(e){return s(e.jc,2)},"%e":function(e){return a(e.jc,2," ")},"%g":function(e){return c(e).toString().substring(2)},"%G":function(e){return c(e)},"%H":function(e){return s(e.dc,2)},"%I":function(e){return 0==(e=e.dc)?e=12:12e.dc?"AM":"PM"},"%S":function(e){return s(e.Lc,2)},"%t":function(){return"\t"},"%u":function(e){return e.Tb||7},"%U":function(e){return s(Math.floor((e.Wb+7-e.Tb)/7),2)},"%V":function(e){var t=Math.floor((e.Wb+7-(e.Tb+6)%7)/7);if(2>=(e.Tb+371-e.Wb-2)%7&&t++,t)53==t&&(4==(n=(e.Tb+371-e.Wb)%7)||3==n&&st(e.Xb)||(t=1));else{t=52;var n=(e.Tb+7-e.Wb-1)%7;(4==n||5==n&&st(e.Xb%400-1))&&t++}return s(t,2)},"%w":function(e){return e.Tb},"%W":function(e){return s(Math.floor((e.Wb+7-(e.Tb+6)%7)/7),2)},"%y":function(e){return(e.Xb+1900).toString().substring(2)},"%Y":function(e){return e.Xb+1900},"%z":function(e){var t=0<=(e=e.Jc);return e=Math.abs(e)/60,(t?"+":"-")+String("0000"+(e/60*100+e%60)).slice(-4)},"%Z":function(e){return e.Mc},"%%":function(){return"%"}},r=r.replace(/%%/g,"\0\0"),f)r.includes(d)&&(r=r.replace(new RegExp(d,"g"),f[d](o)));return d=function(e){var t=Array(H(e)+1);return G(e,t,0,t.length),t}(r=r.replace(/\0\0/g,"%")),d.length>n?0:(function(e,n){t().set(e,n>>>0)}(d,e),d.length-1)}he.fc();var ft=[null,de,me,Se,ke,Ie,Pe,Ce,De,Ne,Be,$e,Re,Le,Fe,Me,je,Ve,Ge,We,Ze,Je,et,tt,nt,it],dt={b:function(e){return ht(e+24)+24},n:function(e){return(e=new Te(e)).uc()||(e.hc(!0),_e--),e.ic(!1),we.push(e),e.sc(),e.vc()},ma:function(e){throw P("Unexpected exception thrown, this is not properly supported - aborting"),j=!0,e},x:function(){Tt(0);var e=we.pop();if(e.Hc()&&!e.kc()){var t=e.Dc();t&&ye(t)(e.Zb),Ee(e.Zb)}xe=0},e:function(){var e=xe;if(!e)return at=0;var t=new Te(e);t.cc(e);var n=t.bc();if(!n)return at=0,e;for(var r=Array.prototype.slice.call(arguments),o=0;oUe(r)));else if(x)postMessage({targetThread:e,cmd:"processProxyingQueue",queue:r});else{if(!(e=he.Vb[e]))return;e.postMessage({cmd:"processProxyingQueue",queue:r})}return 1},Ea:function(){return-1},Pa:function(e,t){e=new Date(1e3*ze(e)),i()[t>>2>>>0]=e.getUTCSeconds(),i()[t+4>>2>>>0]=e.getUTCMinutes(),i()[t+8>>2>>>0]=e.getUTCHours(),i()[t+12>>2>>>0]=e.getUTCDate(),i()[t+16>>2>>>0]=e.getUTCMonth(),i()[t+20>>2>>>0]=e.getUTCFullYear()-1900,i()[t+24>>2>>>0]=e.getUTCDay(),e=(e.getTime()-Date.UTC(e.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,i()[t+28>>2>>>0]=e},Qa:function(e,t){e=new Date(1e3*ze(e)),i()[t>>2>>>0]=e.getSeconds(),i()[t+4>>2>>>0]=e.getMinutes(),i()[t+8>>2>>>0]=e.getHours(),i()[t+12>>2>>>0]=e.getDate(),i()[t+16>>2>>>0]=e.getMonth(),i()[t+20>>2>>>0]=e.getFullYear()-1900,i()[t+24>>2>>>0]=e.getDay();var n=new Date(e.getFullYear(),0,1),r=(e.getTime()-n.getTime())/864e5|0;i()[t+28>>2>>>0]=r,i()[t+36>>2>>>0]=-60*e.getTimezoneOffset(),r=new Date(e.getFullYear(),6,1).getTimezoneOffset(),e=0|(r!=(n=n.getTimezoneOffset())&&e.getTimezoneOffset()==Math.min(n,r)),i()[t+32>>2>>>0]=e},Ra:function(e){var t=new Date(i()[e+20>>2>>>0]+1900,i()[e+16>>2>>>0],i()[e+12>>2>>>0],i()[e+8>>2>>>0],i()[e+4>>2>>>0],i()[e>>2>>>0],0),n=i()[e+32>>2>>>0],r=t.getTimezoneOffset(),o=new Date(t.getFullYear(),0,1),a=new Date(t.getFullYear(),6,1).getTimezoneOffset(),s=o.getTimezoneOffset(),u=Math.min(s,a);return 0>n?i()[e+32>>2>>>0]=Number(a!=s&&u==r):0>2>>>0]=t.getDay(),n=(t.getTime()-o.getTime())/864e5|0,i()[e+28>>2>>>0]=n,i()[e>>2>>>0]=t.getSeconds(),i()[e+4>>2>>>0]=t.getMinutes(),i()[e+8>>2>>>0]=t.getHours(),i()[e+12>>2>>>0]=t.getDate(),i()[e+16>>2>>>0]=t.getMonth(),t.getTime()/1e3|0},Aa:Ve,Ba:Ge,Sa:function e(t,n,r){e.Ac||(e.Ac=!0,We(t,n,r))},y:function(){ie("")},U:function(){if(!_&&!w){var e="Blocking on the main thread is very dangerous, see https://emscripten.org/docs/porting/pthreads.html#blocking-on-the-main-browser-thread";Ae||(Ae={}),Ae[e]||(Ae[e]=1,_&&(e="warning: "+e),P(e))}},ra:function(){return 4294901760},B:ve,Ia:function(e,t,n){o().copyWithin(e>>>0,t>>>0,t+n>>>0)},F:function(){return _?n(3993).cpus().length:navigator.hardwareConcurrency},Da:function(e,t,n){Ye.length=t,n>>=3;for(var r=0;r>>0];return(0>e?ue[-e-1]:ft[e]).apply(null,Ye)},qa:function(e){var t=o().length;if((e>>>=0)<=t||4294901760=n;n*=2){var r=t*(1+.2/n);r=Math.min(r,e+100663296);var i=Math;r=Math.max(e,r),i=i.min.call(i,4294901760,r+(65536-r%65536)%65536);e:{try{D.grow(i-B.byteLength+65535>>>16),W(D.buffer);var a=1;break e}catch(e){}a=void 0}if(a)return!0}return!1},Na:function(){throw"unwind"},Ga:Ze,Ha:Je,J:pe,I:et,S:tt,ga:nt,R:it,d:function(){return at},na:function e(r,o){e.lc||(e.lc=function(){if("object"==typeof crypto&&"function"==typeof crypto.getRandomValues){var e=new Uint8Array(1);return()=>(crypto.getRandomValues(e),e[0])}if(_)try{var t=n(Object(function(){var e=new Error("Cannot find module 'crypto'");throw e.code="MODULE_NOT_FOUND",e}()));return()=>t.randomBytes(1)[0]}catch(e){}return()=>ie("randomDevice")}());for(var i=0;i>>0]=e.lc();return 0},ia:function(e,t,n){var r=St();try{return ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},ja:function(e,t,n){var r=St();try{return ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},K:function(e){var t=St();try{return ye(e)()}catch(e){if(Ot(t),e!==e+0)throw e;Tt(1,0)}},f:function(e,t){var n=St();try{return ye(e)(t)}catch(e){if(Ot(n),e!==e+0)throw e;Tt(1,0)}},P:function(e,t,n){var r=St();try{return ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},Q:function(e,t,n){var r=St();try{return ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},k:function(e,t,n){var r=St();try{return ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},p:function(e,t,n,r){var o=St();try{return ye(e)(t,n,r)}catch(e){if(Ot(o),e!==e+0)throw e;Tt(1,0)}},q:function(e,t,n,r,o){var i=St();try{return ye(e)(t,n,r,o)}catch(e){if(Ot(i),e!==e+0)throw e;Tt(1,0)}},N:function(e,t,n,r,o,i){var a=St();try{return ye(e)(t,n,r,o,i)}catch(e){if(Ot(a),e!==e+0)throw e;Tt(1,0)}},s:function(e,t,n,r,o,i){var a=St();try{return ye(e)(t,n,r,o,i)}catch(e){if(Ot(a),e!==e+0)throw e;Tt(1,0)}},w:function(e,t,n,r,o,i,a){var s=St();try{return ye(e)(t,n,r,o,i,a)}catch(e){if(Ot(s),e!==e+0)throw e;Tt(1,0)}},L:function(e,t,n,r,o,i,a,s){var u=St();try{return ye(e)(t,n,r,o,i,a,s)}catch(e){if(Ot(u),e!==e+0)throw e;Tt(1,0)}},E:function(e,t,n,r,o,i,a,s,u,l,c,f){var d=St();try{return ye(e)(t,n,r,o,i,a,s,u,l,c,f)}catch(e){if(Ot(d),e!==e+0)throw e;Tt(1,0)}},aa:function(e,t,n,r,o,i,a,s){var u=St();try{return Mt(e,t,n,r,o,i,a,s)}catch(e){if(Ot(u),e!==e+0)throw e;Tt(1,0)}},_:function(e,t,n,r,o,i,a){var s=St();try{return Dt(e,t,n,r,o,i,a)}catch(e){if(Ot(s),e!==e+0)throw e;Tt(1,0)}},Z:function(e,t,n,r,o){var i=St();try{return jt(e,t,n,r,o)}catch(e){if(Ot(i),e!==e+0)throw e;Tt(1,0)}},ca:function(e,t,n,r){var o=St();try{return Lt(e,t,n,r)}catch(e){if(Ot(o),e!==e+0)throw e;Tt(1,0)}},$:function(e){var t=St();try{return Ct(e)}catch(e){if(Ot(t),e!==e+0)throw e;Tt(1,0)}},ba:function(e,t){var n=St();try{return Ft(e,t)}catch(e){if(Ot(n),e!==e+0)throw e;Tt(1,0)}},Y:function(e,t,n){var r=St();try{return Nt(e,t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},g:function(e){var t=St();try{ye(e)()}catch(e){if(Ot(t),e!==e+0)throw e;Tt(1,0)}},r:function(e,t){var n=St();try{ye(e)(t)}catch(e){if(Ot(n),e!==e+0)throw e;Tt(1,0)}},i:function(e,t,n){var r=St();try{ye(e)(t,n)}catch(e){if(Ot(r),e!==e+0)throw e;Tt(1,0)}},ha:function(e,t,n,r){var o=St();try{ye(e)(t,n,r)}catch(e){if(Ot(o),e!==e+0)throw e;Tt(1,0)}},m:function(e,t,n,r){var o=St();try{ye(e)(t,n,r)}catch(e){if(Ot(o),e!==e+0)throw e;Tt(1,0)}},v:function(e,t,n,r,o){var i=St();try{ye(e)(t,n,r,o)}catch(e){if(Ot(i),e!==e+0)throw e;Tt(1,0)}},u:function(e,t,n,r,o,i){var a=St();try{ye(e)(t,n,r,o,i)}catch(e){if(Ot(a),e!==e+0)throw e;Tt(1,0)}},O:function(e,t,n,r,o,i,a){var s=St();try{ye(e)(t,n,r,o,i,a)}catch(e){if(Ot(s),e!==e+0)throw e;Tt(1,0)}},A:function(e,t,n,r,o,i,a,s){var u=St();try{ye(e)(t,n,r,o,i,a,s)}catch(e){if(Ot(u),e!==e+0)throw e;Tt(1,0)}},ka:function(e,t,n,r,o,i,a,s,u){var l=St();try{ye(e)(t,n,r,o,i,a,s,u)}catch(e){if(Ot(l),e!==e+0)throw e;Tt(1,0)}},C:function(e,t,n,r,o,i,a,s,u,l,c){var f=St();try{ye(e)(t,n,r,o,i,a,s,u,l,c)}catch(e){if(Ot(f),e!==e+0)throw e;Tt(1,0)}},D:function(e,t,n,r,o,i,a,s,u,l,c,f,d,p,h,g){var m=St();try{ye(e)(t,n,r,o,i,a,s,u,l,c,f,d,p,h,g)}catch(e){if(Ot(m),e!==e+0)throw e;Tt(1,0)}},fa:function(e,t,n,r,o,i,a,s){var u=St();try{Bt(e,t,n,r,o,i,a,s)}catch(e){if(Ot(u),e!==e+0)throw e;Tt(1,0)}},da:function(e,t,n,r,o,i,a,s,u,l,c,f){var d=St();try{Rt(e,t,n,r,o,i,a,s,u,l,c,f)}catch(e){if(Ot(d),e!==e+0)throw e;Tt(1,0)}},ea:function(e,t,n,r,o,i){var a=St();try{$t(e,t,n,r,o,i)}catch(e){if(Ot(a),e!==e+0)throw e;Tt(1,0)}},o:function(e){return e},a:D||u.wasmMemory,G:function(e){at=e},la:ct,z:function(e,t,n,r){return ct(e,t,n,r)}};!function(){function e(e,t){u.asm=e.exports,he.qc.push(u.asm.sb),X=u.asm.ub,K.unshift(u.asm.Va),N=t,x||(ne--,u.monitorRunDependencies&&u.monitorRunDependencies(ne),0==ne&&(null!==re&&(clearInterval(re),re=null),oe&&(e=oe,oe=null,e())))}function t(t){e(t.instance,t.module)}function n(e){return function(){if(!k&&(v||w)){if("function"==typeof fetch&&!te.startsWith("file://"))return fetch(te,{credentials:"same-origin"}).then((function(e){if(!e.ok)throw"failed to load wasm binary file at '"+te+"'";return e.arrayBuffer()})).catch((function(){return se()}));if(d)return new Promise((function(e,t){d(te,(function(t){e(new Uint8Array(t))}),t)}))}return Promise.resolve().then((function(){return se()}))}().then((function(e){return WebAssembly.instantiate(e,r)})).then((function(e){return e})).then(e,(function(e){P("failed to asynchronously prepare wasm: "+e),ie(e)}))}var r={a:dt};if(x||(ne++,u.monitorRunDependencies&&u.monitorRunDependencies(ne)),u.instantiateWasm)try{return u.instantiateWasm(r,e)}catch(e){return P("Module.instantiateWasm callback failed with error: "+e),!1}(k||"function"!=typeof WebAssembly.instantiateStreaming||ae()||te.startsWith("file://")||_||"function"!=typeof fetch?n(t):fetch(te,{credentials:"same-origin"}).then((function(e){return WebAssembly.instantiateStreaming(e,r).then(t,(function(e){return P("wasm streaming compile failed: "+e),P("falling back to ArrayBuffer instantiation"),n(t)}))}))).catch(c)}(),u.___wasm_call_ctors=function(){return(u.___wasm_call_ctors=u.asm.Va).apply(null,arguments)},u._OrtInit=function(){return(u._OrtInit=u.asm.Wa).apply(null,arguments)},u._OrtCreateSessionOptions=function(){return(u._OrtCreateSessionOptions=u.asm.Xa).apply(null,arguments)},u._OrtAppendExecutionProvider=function(){return(u._OrtAppendExecutionProvider=u.asm.Ya).apply(null,arguments)},u._OrtAddSessionConfigEntry=function(){return(u._OrtAddSessionConfigEntry=u.asm.Za).apply(null,arguments)},u._OrtReleaseSessionOptions=function(){return(u._OrtReleaseSessionOptions=u.asm._a).apply(null,arguments)},u._OrtCreateSession=function(){return(u._OrtCreateSession=u.asm.$a).apply(null,arguments)},u._OrtReleaseSession=function(){return(u._OrtReleaseSession=u.asm.ab).apply(null,arguments)},u._OrtGetInputCount=function(){return(u._OrtGetInputCount=u.asm.bb).apply(null,arguments)},u._OrtGetOutputCount=function(){return(u._OrtGetOutputCount=u.asm.cb).apply(null,arguments)},u._OrtGetInputName=function(){return(u._OrtGetInputName=u.asm.db).apply(null,arguments)},u._OrtGetOutputName=function(){return(u._OrtGetOutputName=u.asm.eb).apply(null,arguments)},u._OrtFree=function(){return(u._OrtFree=u.asm.fb).apply(null,arguments)},u._OrtCreateTensor=function(){return(u._OrtCreateTensor=u.asm.gb).apply(null,arguments)},u._OrtGetTensorData=function(){return(u._OrtGetTensorData=u.asm.hb).apply(null,arguments)},u._OrtReleaseTensor=function(){return(u._OrtReleaseTensor=u.asm.ib).apply(null,arguments)},u._OrtCreateRunOptions=function(){return(u._OrtCreateRunOptions=u.asm.jb).apply(null,arguments)},u._OrtAddRunConfigEntry=function(){return(u._OrtAddRunConfigEntry=u.asm.kb).apply(null,arguments)},u._OrtReleaseRunOptions=function(){return(u._OrtReleaseRunOptions=u.asm.lb).apply(null,arguments)},u._OrtRun=function(){return(u._OrtRun=u.asm.mb).apply(null,arguments)},u._OrtEndProfiling=function(){return(u._OrtEndProfiling=u.asm.nb).apply(null,arguments)};var pt=u._pthread_self=function(){return(pt=u._pthread_self=u.asm.ob).apply(null,arguments)},ht=u._malloc=function(){return(ht=u._malloc=u.asm.pb).apply(null,arguments)},gt=u._free=function(){return(gt=u._free=u.asm.qb).apply(null,arguments)},mt=u._fflush=function(){return(mt=u._fflush=u.asm.rb).apply(null,arguments)};u.__emscripten_tls_init=function(){return(u.__emscripten_tls_init=u.asm.sb).apply(null,arguments)};var bt=u.___funcs_on_exit=function(){return(bt=u.___funcs_on_exit=u.asm.tb).apply(null,arguments)},yt=u.__emscripten_thread_init=function(){return(yt=u.__emscripten_thread_init=u.asm.vb).apply(null,arguments)};u.__emscripten_thread_crashed=function(){return(u.__emscripten_thread_crashed=u.asm.wb).apply(null,arguments)};var At,vt=u._emscripten_run_in_main_runtime_thread_js=function(){return(vt=u._emscripten_run_in_main_runtime_thread_js=u.asm.xb).apply(null,arguments)},wt=u.__emscripten_proxy_execute_task_queue=function(){return(wt=u.__emscripten_proxy_execute_task_queue=u.asm.yb).apply(null,arguments)},_t=u.__emscripten_thread_free_data=function(){return(_t=u.__emscripten_thread_free_data=u.asm.zb).apply(null,arguments)},xt=u.__emscripten_thread_exit=function(){return(xt=u.__emscripten_thread_exit=u.asm.Ab).apply(null,arguments)},Tt=u._setThrew=function(){return(Tt=u._setThrew=u.asm.Bb).apply(null,arguments)},Et=u._emscripten_stack_set_limits=function(){return(Et=u._emscripten_stack_set_limits=u.asm.Cb).apply(null,arguments)},St=u.stackSave=function(){return(St=u.stackSave=u.asm.Db).apply(null,arguments)},Ot=u.stackRestore=function(){return(Ot=u.stackRestore=u.asm.Eb).apply(null,arguments)},kt=u.stackAlloc=function(){return(kt=u.stackAlloc=u.asm.Fb).apply(null,arguments)},It=u.___cxa_can_catch=function(){return(It=u.___cxa_can_catch=u.asm.Gb).apply(null,arguments)},Pt=u.___cxa_is_pointer_type=function(){return(Pt=u.___cxa_is_pointer_type=u.asm.Hb).apply(null,arguments)},Ct=u.dynCall_j=function(){return(Ct=u.dynCall_j=u.asm.Ib).apply(null,arguments)},Dt=u.dynCall_iiiiij=function(){return(Dt=u.dynCall_iiiiij=u.asm.Jb).apply(null,arguments)},Nt=u.dynCall_jii=function(){return(Nt=u.dynCall_jii=u.asm.Kb).apply(null,arguments)},Bt=u.dynCall_viiiiij=function(){return(Bt=u.dynCall_viiiiij=u.asm.Lb).apply(null,arguments)},$t=u.dynCall_vjji=function(){return($t=u.dynCall_vjji=u.asm.Mb).apply(null,arguments)},Rt=u.dynCall_viiijjjii=function(){return(Rt=u.dynCall_viiijjjii=u.asm.Nb).apply(null,arguments)},Lt=u.dynCall_iij=function(){return(Lt=u.dynCall_iij=u.asm.Ob).apply(null,arguments)},Ft=u.dynCall_ji=function(){return(Ft=u.dynCall_ji=u.asm.Pb).apply(null,arguments)},Mt=u.dynCall_iiiiiij=function(){return(Mt=u.dynCall_iiiiiij=u.asm.Qb).apply(null,arguments)},jt=u.dynCall_iiij=function(){return(jt=u.dynCall_iiij=u.asm.Rb).apply(null,arguments)};function Ut(){function e(){if(!At&&(At=!0,u.calledRun=!0,!j)&&(x||ge(K),l(u),u.onRuntimeInitialized&&u.onRuntimeInitialized(),!x)){if(u.postRun)for("function"==typeof u.postRun&&(u.postRun=[u.postRun]);u.postRun.length;){var e=u.postRun.shift();Z.unshift(e)}ge(Z)}}if(!(0{var r,o=(r=(r="undefined"!=typeof document&&document.currentScript?document.currentScript.src:void 0)||"/index.js",function(e){var t,o,i;e=e||{},t||(t=void 0!==e?e:{}),t.ready=new Promise((function(e,t){o=e,i=t}));var a,s,u,l,c,f,d=Object.assign({},t),p="./this.program",h=(e,t)=>{throw t},g="object"==typeof window,m="function"==typeof importScripts,b="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node,y="";b?(y=m?n(908).dirname(y)+"/":"//",f=()=>{c||(l=n(1384),c=n(908))},a=function(e,t){return f(),e=c.normalize(e),l.readFileSync(e,t?void 0:"utf8")},u=e=>((e=a(e,!0)).buffer||(e=new Uint8Array(e)),e),s=(e,t,n)=>{f(),e=c.normalize(e),l.readFile(e,(function(e,r){e?n(e):t(r.buffer)}))},1{if(_||0{var t=new XMLHttpRequest;return t.open("GET",e,!1),t.send(null),t.responseText},m&&(u=e=>{var t=new XMLHttpRequest;return t.open("GET",e,!1),t.responseType="arraybuffer",t.send(null),new Uint8Array(t.response)}),s=(e,t,n)=>{var r=new XMLHttpRequest;r.open("GET",e,!0),r.responseType="arraybuffer",r.onload=()=>{200==r.status||0==r.status&&r.response?t(r.response):n()},r.onerror=n,r.send(null)});var A,v=t.print||console.log.bind(console),w=t.printErr||console.warn.bind(console);Object.assign(t,d),d=null,t.thisProgram&&(p=t.thisProgram),t.quit&&(h=t.quit),t.wasmBinary&&(A=t.wasmBinary);var _=t.noExitRuntime||!1;"object"!=typeof WebAssembly&&q("no native wasm support detected");var x,T,E,S,O,k,I=!1,P="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0;function C(e,t,n){var r=(t>>>=0)+n;for(n=t;e[n]&&!(n>=r);)++n;if(16(o=224==(240&o)?(15&o)<<12|i<<6|a:(7&o)<<18|i<<12|a<<6|63&e[t++])?r+=String.fromCharCode(o):(o-=65536,r+=String.fromCharCode(55296|o>>10,56320|1023&o))}}else r+=String.fromCharCode(o)}return r}function D(e,t){return(e>>>=0)?C(S,e,t):""}function N(e,t,n,r){if(!(0>>=0;r=n+r-1;for(var i=0;i=a&&(a=65536+((1023&a)<<10)|1023&e.charCodeAt(++i)),127>=a){if(n>=r)break;t[n++>>>0]=a}else{if(2047>=a){if(n+1>=r)break;t[n++>>>0]=192|a>>6}else{if(65535>=a){if(n+2>=r)break;t[n++>>>0]=224|a>>12}else{if(n+3>=r)break;t[n++>>>0]=240|a>>18,t[n++>>>0]=128|a>>12&63}t[n++>>>0]=128|a>>6&63}t[n++>>>0]=128|63&a}}return t[n>>>0]=0,n-o}function B(e){for(var t=0,n=0;n=r?t++:2047>=r?t+=2:55296<=r&&57343>=r?(t+=4,++n):t+=3}return t}function $(){var e=x.buffer;T=e,t.HEAP8=E=new Int8Array(e),t.HEAP16=new Int16Array(e),t.HEAP32=O=new Int32Array(e),t.HEAPU8=S=new Uint8Array(e),t.HEAPU16=new Uint16Array(e),t.HEAPU32=k=new Uint32Array(e),t.HEAPF32=new Float32Array(e),t.HEAPF64=new Float64Array(e)}var R,L=[],F=[],M=[],j=[],U=0;function z(){var e=t.preRun.shift();L.unshift(e)}var V,G=0,H=null,W=null;function q(e){throw t.onAbort&&t.onAbort(e),w(e="Aborted("+e+")"),I=!0,e=new WebAssembly.RuntimeError(e+". Build with -sASSERTIONS for more info."),i(e),e}function X(){return V.startsWith("data:application/octet-stream;base64,")}if(V="ort-wasm.wasm",!X()){var Y=V;V=t.locateFile?t.locateFile(Y,y):y+Y}function K(){var e=V;try{if(e==V&&A)return new Uint8Array(A);if(u)return u(e);throw"both async and sync fetching of the wasm failed"}catch(e){q(e)}}function Q(e){this.name="ExitStatus",this.message="Program terminated with exit("+e+")",this.status=e}function Z(e){for(;0>2>>>0]=e},this.Eb=function(){return k[this.zb+4>>2>>>0]},this.Sb=function(e){k[this.zb+8>>2>>>0]=e},this.Wb=function(){return k[this.zb+8>>2>>>0]},this.Tb=function(){O[this.zb>>2>>>0]=0},this.Ib=function(e){E[(this.zb+12|0)>>>0]=e?1:0},this.Pb=function(){return 0!=E[(this.zb+12|0)>>>0]},this.Jb=function(e){E[(this.zb+13|0)>>>0]=e?1:0},this.Lb=function(){return 0!=E[(this.zb+13|0)>>>0]},this.Rb=function(e,t){this.Fb(0),this.Ub(e),this.Sb(t),this.Tb(),this.Ib(!1),this.Jb(!1)},this.Nb=function(){O[this.zb>>2>>>0]+=1},this.Xb=function(){var e=O[this.zb>>2>>>0];return O[this.zb>>2>>>0]=e-1,1===e},this.Fb=function(e){k[this.zb+16>>2>>>0]=e},this.Ob=function(){return k[this.zb+16>>2>>>0]},this.Qb=function(){if(ke(this.Eb()))return k[this.Db>>2>>>0];var e=this.Ob();return 0!==e?e:this.Db}}function re(e){return ve(new ne(e).zb)}var oe=[];function ie(e){var t=oe[e];return t||(e>=oe.length&&(oe.length=e+1),oe[e]=t=R.get(e)),t}function ae(e){var t=B(e)+1,n=Ae(t);return n&&N(e,E,n,t),n}var se={};function ue(){if(!le){var e,t={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:("object"==typeof navigator&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:p||"./this.program"};for(e in se)void 0===se[e]?delete t[e]:t[e]=se[e];var n=[];for(e in t)n.push(e+"="+t[e]);le=n}return le}var le,ce=[null,[],[]];function fe(e,t){var n=ce[e];0===t||10===t?((1===e?v:w)(C(n,0)),n.length=0):n.push(t)}var de=0;function pe(e){return 0==e%4&&(0!=e%100||0==e%400)}var he=[31,29,31,30,31,30,31,31,30,31,30,31],ge=[31,28,31,30,31,30,31,31,30,31,30,31];function me(e,t,n,r){function o(e,t,n){for(e="number"==typeof e?e.toString():e||"";e.lengthe?-1:0r-e.getDate())){e.setDate(e.getDate()+t);break}t-=r-e.getDate()+1,e.setDate(1),11>n?e.setMonth(n+1):(e.setMonth(0),e.setFullYear(e.getFullYear()+1))}return n=new Date(e.getFullYear()+1,0,4),t=s(new Date(e.getFullYear(),0,4)),n=s(n),0>=a(t,e)?0>=a(n,e)?e.getFullYear()+1:e.getFullYear():e.getFullYear()-1}var l=O[r+40>>2>>>0];for(var c in r={$b:O[r>>2>>>0],Zb:O[r+4>>2>>>0],Gb:O[r+8>>2>>>0],Kb:O[r+12>>2>>>0],Hb:O[r+16>>2>>>0],Cb:O[r+20>>2>>>0],Ab:O[r+24>>2>>>0],Bb:O[r+28>>2>>>0],bc:O[r+32>>2>>>0],Yb:O[r+36>>2>>>0],ac:l?D(l):""},n=D(n),l={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})n=n.replace(new RegExp(c,"g"),l[c]);var f="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),d="January February March April May June July August September October November December".split(" ");for(c in l={"%a":function(e){return f[e.Ab].substring(0,3)},"%A":function(e){return f[e.Ab]},"%b":function(e){return d[e.Hb].substring(0,3)},"%B":function(e){return d[e.Hb]},"%C":function(e){return i((e.Cb+1900)/100|0,2)},"%d":function(e){return i(e.Kb,2)},"%e":function(e){return o(e.Kb,2," ")},"%g":function(e){return u(e).toString().substring(2)},"%G":function(e){return u(e)},"%H":function(e){return i(e.Gb,2)},"%I":function(e){return 0==(e=e.Gb)?e=12:12e.Gb?"AM":"PM"},"%S":function(e){return i(e.$b,2)},"%t":function(){return"\t"},"%u":function(e){return e.Ab||7},"%U":function(e){return i(Math.floor((e.Bb+7-e.Ab)/7),2)},"%V":function(e){var t=Math.floor((e.Bb+7-(e.Ab+6)%7)/7);if(2>=(e.Ab+371-e.Bb-2)%7&&t++,t)53==t&&(4==(n=(e.Ab+371-e.Bb)%7)||3==n&&pe(e.Cb)||(t=1));else{t=52;var n=(e.Ab+7-e.Bb-1)%7;(4==n||5==n&&pe(e.Cb%400-1))&&t++}return i(t,2)},"%w":function(e){return e.Ab},"%W":function(e){return i(Math.floor((e.Bb+7-(e.Ab+6)%7)/7),2)},"%y":function(e){return(e.Cb+1900).toString().substring(2)},"%Y":function(e){return e.Cb+1900},"%z":function(e){var t=0<=(e=e.Yb);return e=Math.abs(e)/60,(t?"+":"-")+String("0000"+(e/60*100+e%60)).slice(-4)},"%Z":function(e){return e.ac},"%%":function(){return"%"}},n=n.replace(/%%/g,"\0\0"),l)n.includes(c)&&(n=n.replace(new RegExp(c,"g"),l[c](r)));return c=function(e){var t=Array(B(e)+1);return N(e,t,0,t.length),t}(n=n.replace(/\0\0/g,"%")),c.length>t?0:(E.set(c,e>>>0),c.length-1)}var be={a:function(e){return Ae(e+24)+24},m:function(e){return(e=new ne(e)).Pb()||(e.Ib(!0),ee--),e.Jb(!1),J.push(e),e.Nb(),e.Qb()},ia:function(e){throw w("Unexpected exception thrown, this is not properly supported - aborting"),I=!0,e},w:function(){xe(0);var e=J.pop();if(e.Xb()&&!e.Lb()){var t=e.Wb();t&&ie(t)(e.Db),re(e.Db)}te=0},d:function(){var e=te;if(!e)return de=0;var t=new ne(e);t.Fb(e);var n=t.Eb();if(!n)return de=0,e;for(var r=Array.prototype.slice.call(arguments),o=0;o>>2]+4294967296*O[e+4>>>2])),O[t>>2>>>0]=e.getUTCSeconds(),O[t+4>>2>>>0]=e.getUTCMinutes(),O[t+8>>2>>>0]=e.getUTCHours(),O[t+12>>2>>>0]=e.getUTCDate(),O[t+16>>2>>>0]=e.getUTCMonth(),O[t+20>>2>>>0]=e.getUTCFullYear()-1900,O[t+24>>2>>>0]=e.getUTCDay(),O[t+28>>2>>>0]=(e.getTime()-Date.UTC(e.getUTCFullYear(),0,1,0,0,0,0))/864e5|0},Ea:function(e,t){e=new Date(1e3*(k[e>>>2]+4294967296*O[e+4>>>2])),O[t>>2>>>0]=e.getSeconds(),O[t+4>>2>>>0]=e.getMinutes(),O[t+8>>2>>>0]=e.getHours(),O[t+12>>2>>>0]=e.getDate(),O[t+16>>2>>>0]=e.getMonth(),O[t+20>>2>>>0]=e.getFullYear()-1900,O[t+24>>2>>>0]=e.getDay();var n=new Date(e.getFullYear(),0,1);O[t+28>>2>>>0]=(e.getTime()-n.getTime())/864e5|0,O[t+36>>2>>>0]=-60*e.getTimezoneOffset();var r=new Date(e.getFullYear(),6,1).getTimezoneOffset();n=n.getTimezoneOffset(),O[t+32>>2>>>0]=0|(r!=n&&e.getTimezoneOffset()==Math.min(n,r))},Fa:function(e){var t=new Date(O[e+20>>2>>>0]+1900,O[e+16>>2>>>0],O[e+12>>2>>>0],O[e+8>>2>>>0],O[e+4>>2>>>0],O[e>>2>>>0],0),n=O[e+32>>2>>>0],r=t.getTimezoneOffset(),o=new Date(t.getFullYear(),0,1),i=new Date(t.getFullYear(),6,1).getTimezoneOffset(),a=o.getTimezoneOffset(),s=Math.min(a,i);return 0>n?O[e+32>>2>>>0]=Number(i!=a&&s==r):0>2>>>0]=t.getDay(),O[e+28>>2>>>0]=(t.getTime()-o.getTime())/864e5|0,O[e>>2>>>0]=t.getSeconds(),O[e+4>>2>>>0]=t.getMinutes(),O[e+8>>2>>>0]=t.getHours(),O[e+12>>2>>>0]=t.getDate(),O[e+16>>2>>>0]=t.getMonth(),t.getTime()/1e3|0},sa:function(){return-52},ta:function(){},Ga:function e(t,n,r){e.Vb||(e.Vb=!0,function(e,t,n){function r(e){return(e=e.toTimeString().match(/\(([A-Za-z ]+)\)$/))?e[1]:"GMT"}var o=(new Date).getFullYear(),i=new Date(o,0,1),a=new Date(o,6,1);o=i.getTimezoneOffset();var s=a.getTimezoneOffset();O[e>>2>>>0]=60*Math.max(o,s),O[t>>2>>>0]=Number(o!=s),e=r(i),t=r(a),e=ae(e),t=ae(t),s>2>>>0]=e,k[n+4>>2>>>0]=t):(k[n>>2>>>0]=t,k[n+4>>2>>>0]=e)}(t,n,r))},B:function(){q("")},ma:function(){return 4294901760},I:b?()=>{var e=process.hrtime();return 1e3*e[0]+e[1]/1e6}:()=>performance.now(),xa:function(e,t,n){S.copyWithin(e>>>0,t>>>0,t+n>>>0)},G:function(e){var t=S.length;if(4294901760<(e>>>=0))return!1;for(var n=1;4>=n;n*=2){var r=t*(1+.2/n);r=Math.min(r,e+100663296);var o=Math;r=Math.max(e,r),o=o.min.call(o,4294901760,r+(65536-r%65536)%65536);e:{try{x.grow(o-T.byteLength+65535>>>16),$();var i=1;break e}catch(e){}i=void 0}if(i)return!0}return!1},va:function(e,t){var n=0;return ue().forEach((function(r,o){var i=t+n;for(o=k[e+4*o>>2>>>0]=i,i=0;i>>0]=r.charCodeAt(i);E[(0|o)>>>0]=0,n+=r.length+1})),0},wa:function(e,t){var n=ue();k[e>>2>>>0]=n.length;var r=0;return n.forEach((function(e){r+=e.length+1})),k[t>>2>>>0]=r,0},ba:function(e){_||0>2>>>0],s=k[t+4>>2>>>0];t+=8;for(var u=0;u>>0]);o+=s}return k[r>>2>>>0]=o,0},c:function(){return de},ja:function e(t,r){e.Mb||(e.Mb=function(){if("object"==typeof crypto&&"function"==typeof crypto.getRandomValues){var e=new Uint8Array(1);return()=>(crypto.getRandomValues(e),e[0])}if(b)try{var t=n(Object(function(){var e=new Error("Cannot find module 'crypto'");throw e.code="MODULE_NOT_FOUND",e}()));return()=>t.randomBytes(1)[0]}catch(e){}return()=>q("randomDevice")}());for(var o=0;o>>0]=e.Mb();return 0},ea:function(e,t,n){var r=Te();try{return ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},fa:function(e,t,n){var r=Te();try{return ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},J:function(e){var t=Te();try{return ie(e)()}catch(e){if(Ee(t),e!==e+0)throw e;xe(1,0)}},e:function(e,t){var n=Te();try{return ie(e)(t)}catch(e){if(Ee(n),e!==e+0)throw e;xe(1,0)}},N:function(e,t,n){var r=Te();try{return ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},O:function(e,t,n){var r=Te();try{return ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},j:function(e,t,n){var r=Te();try{return ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},o:function(e,t,n,r){var o=Te();try{return ie(e)(t,n,r)}catch(e){if(Ee(o),e!==e+0)throw e;xe(1,0)}},p:function(e,t,n,r,o){var i=Te();try{return ie(e)(t,n,r,o)}catch(e){if(Ee(i),e!==e+0)throw e;xe(1,0)}},M:function(e,t,n,r,o,i){var a=Te();try{return ie(e)(t,n,r,o,i)}catch(e){if(Ee(a),e!==e+0)throw e;xe(1,0)}},r:function(e,t,n,r,o,i){var a=Te();try{return ie(e)(t,n,r,o,i)}catch(e){if(Ee(a),e!==e+0)throw e;xe(1,0)}},v:function(e,t,n,r,o,i,a){var s=Te();try{return ie(e)(t,n,r,o,i,a)}catch(e){if(Ee(s),e!==e+0)throw e;xe(1,0)}},K:function(e,t,n,r,o,i,a,s){var u=Te();try{return ie(e)(t,n,r,o,i,a,s)}catch(e){if(Ee(u),e!==e+0)throw e;xe(1,0)}},D:function(e,t,n,r,o,i,a,s,u,l,c,f){var d=Te();try{return ie(e)(t,n,r,o,i,a,s,u,l,c,f)}catch(e){if(Ee(d),e!==e+0)throw e;xe(1,0)}},X:function(e,t,n,r,o,i,a,s){var u=Te();try{return Le(e,t,n,r,o,i,a,s)}catch(e){if(Ee(u),e!==e+0)throw e;xe(1,0)}},V:function(e,t,n,r,o,i,a){var s=Te();try{return Pe(e,t,n,r,o,i,a)}catch(e){if(Ee(s),e!==e+0)throw e;xe(1,0)}},U:function(e,t,n,r,o){var i=Te();try{return Fe(e,t,n,r,o)}catch(e){if(Ee(i),e!==e+0)throw e;xe(1,0)}},Z:function(e,t,n,r){var o=Te();try{return $e(e,t,n,r)}catch(e){if(Ee(o),e!==e+0)throw e;xe(1,0)}},W:function(e){var t=Te();try{return Ie(e)}catch(e){if(Ee(t),e!==e+0)throw e;xe(1,0)}},Y:function(e,t){var n=Te();try{return Re(e,t)}catch(e){if(Ee(n),e!==e+0)throw e;xe(1,0)}},T:function(e,t,n){var r=Te();try{return Ce(e,t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},f:function(e){var t=Te();try{ie(e)()}catch(e){if(Ee(t),e!==e+0)throw e;xe(1,0)}},q:function(e,t){var n=Te();try{ie(e)(t)}catch(e){if(Ee(n),e!==e+0)throw e;xe(1,0)}},h:function(e,t,n){var r=Te();try{ie(e)(t,n)}catch(e){if(Ee(r),e!==e+0)throw e;xe(1,0)}},da:function(e,t,n,r){var o=Te();try{ie(e)(t,n,r)}catch(e){if(Ee(o),e!==e+0)throw e;xe(1,0)}},l:function(e,t,n,r){var o=Te();try{ie(e)(t,n,r)}catch(e){if(Ee(o),e!==e+0)throw e;xe(1,0)}},t:function(e,t,n,r,o){var i=Te();try{ie(e)(t,n,r,o)}catch(e){if(Ee(i),e!==e+0)throw e;xe(1,0)}},u:function(e,t,n,r,o,i){var a=Te();try{ie(e)(t,n,r,o,i)}catch(e){if(Ee(a),e!==e+0)throw e;xe(1,0)}},x:function(e,t,n,r,o,i,a){var s=Te();try{ie(e)(t,n,r,o,i,a)}catch(e){if(Ee(s),e!==e+0)throw e;xe(1,0)}},z:function(e,t,n,r,o,i,a,s){var u=Te();try{ie(e)(t,n,r,o,i,a,s)}catch(e){if(Ee(u),e!==e+0)throw e;xe(1,0)}},ga:function(e,t,n,r,o,i,a,s,u){var l=Te();try{ie(e)(t,n,r,o,i,a,s,u)}catch(e){if(Ee(l),e!==e+0)throw e;xe(1,0)}},A:function(e,t,n,r,o,i,a,s,u,l,c){var f=Te();try{ie(e)(t,n,r,o,i,a,s,u,l,c)}catch(e){if(Ee(f),e!==e+0)throw e;xe(1,0)}},C:function(e,t,n,r,o,i,a,s,u,l,c,f,d,p,h,g){var m=Te();try{ie(e)(t,n,r,o,i,a,s,u,l,c,f,d,p,h,g)}catch(e){if(Ee(m),e!==e+0)throw e;xe(1,0)}},aa:function(e,t,n,r,o,i,a,s){var u=Te();try{De(e,t,n,r,o,i,a,s)}catch(e){if(Ee(u),e!==e+0)throw e;xe(1,0)}},_:function(e,t,n,r,o,i,a,s,u,l,c,f){var d=Te();try{Be(e,t,n,r,o,i,a,s,u,l,c,f)}catch(e){if(Ee(d),e!==e+0)throw e;xe(1,0)}},$:function(e,t,n,r,o,i){var a=Te();try{Ne(e,t,n,r,o,i)}catch(e){if(Ee(a),e!==e+0)throw e;xe(1,0)}},n:function(e){return e},F:function(e){de=e},ha:me,y:function(e,t,n,r){return me(e,t,n,r)}};!function(){function e(e){t.asm=e.exports,x=t.asm.Ka,$(),R=t.asm.ib,F.unshift(t.asm.La),G--,t.monitorRunDependencies&&t.monitorRunDependencies(G),0==G&&(null!==H&&(clearInterval(H),H=null),W&&(e=W,W=null,e()))}function n(t){e(t.instance)}function r(e){return function(){if(!A&&(g||m)){if("function"==typeof fetch&&!V.startsWith("file://"))return fetch(V,{credentials:"same-origin"}).then((function(e){if(!e.ok)throw"failed to load wasm binary file at '"+V+"'";return e.arrayBuffer()})).catch((function(){return K()}));if(s)return new Promise((function(e,t){s(V,(function(t){e(new Uint8Array(t))}),t)}))}return Promise.resolve().then((function(){return K()}))}().then((function(e){return WebAssembly.instantiate(e,o)})).then((function(e){return e})).then(e,(function(e){w("failed to asynchronously prepare wasm: "+e),q(e)}))}var o={a:be};if(G++,t.monitorRunDependencies&&t.monitorRunDependencies(G),t.instantiateWasm)try{return t.instantiateWasm(o,e)}catch(e){return w("Module.instantiateWasm callback failed with error: "+e),!1}(A||"function"!=typeof WebAssembly.instantiateStreaming||X()||V.startsWith("file://")||b||"function"!=typeof fetch?r(n):fetch(V,{credentials:"same-origin"}).then((function(e){return WebAssembly.instantiateStreaming(e,o).then(n,(function(e){return w("wasm streaming compile failed: "+e),w("falling back to ArrayBuffer instantiation"),r(n)}))}))).catch(i)}(),t.___wasm_call_ctors=function(){return(t.___wasm_call_ctors=t.asm.La).apply(null,arguments)},t._OrtInit=function(){return(t._OrtInit=t.asm.Ma).apply(null,arguments)},t._OrtCreateSessionOptions=function(){return(t._OrtCreateSessionOptions=t.asm.Na).apply(null,arguments)},t._OrtAppendExecutionProvider=function(){return(t._OrtAppendExecutionProvider=t.asm.Oa).apply(null,arguments)},t._OrtAddSessionConfigEntry=function(){return(t._OrtAddSessionConfigEntry=t.asm.Pa).apply(null,arguments)},t._OrtReleaseSessionOptions=function(){return(t._OrtReleaseSessionOptions=t.asm.Qa).apply(null,arguments)},t._OrtCreateSession=function(){return(t._OrtCreateSession=t.asm.Ra).apply(null,arguments)},t._OrtReleaseSession=function(){return(t._OrtReleaseSession=t.asm.Sa).apply(null,arguments)},t._OrtGetInputCount=function(){return(t._OrtGetInputCount=t.asm.Ta).apply(null,arguments)},t._OrtGetOutputCount=function(){return(t._OrtGetOutputCount=t.asm.Ua).apply(null,arguments)},t._OrtGetInputName=function(){return(t._OrtGetInputName=t.asm.Va).apply(null,arguments)},t._OrtGetOutputName=function(){return(t._OrtGetOutputName=t.asm.Wa).apply(null,arguments)},t._OrtFree=function(){return(t._OrtFree=t.asm.Xa).apply(null,arguments)},t._OrtCreateTensor=function(){return(t._OrtCreateTensor=t.asm.Ya).apply(null,arguments)},t._OrtGetTensorData=function(){return(t._OrtGetTensorData=t.asm.Za).apply(null,arguments)},t._OrtReleaseTensor=function(){return(t._OrtReleaseTensor=t.asm._a).apply(null,arguments)},t._OrtCreateRunOptions=function(){return(t._OrtCreateRunOptions=t.asm.$a).apply(null,arguments)},t._OrtAddRunConfigEntry=function(){return(t._OrtAddRunConfigEntry=t.asm.ab).apply(null,arguments)},t._OrtReleaseRunOptions=function(){return(t._OrtReleaseRunOptions=t.asm.bb).apply(null,arguments)},t._OrtRun=function(){return(t._OrtRun=t.asm.cb).apply(null,arguments)},t._OrtEndProfiling=function(){return(t._OrtEndProfiling=t.asm.db).apply(null,arguments)};var ye,Ae=t._malloc=function(){return(Ae=t._malloc=t.asm.eb).apply(null,arguments)},ve=t._free=function(){return(ve=t._free=t.asm.fb).apply(null,arguments)},we=t._fflush=function(){return(we=t._fflush=t.asm.gb).apply(null,arguments)},_e=t.___funcs_on_exit=function(){return(_e=t.___funcs_on_exit=t.asm.hb).apply(null,arguments)},xe=t._setThrew=function(){return(xe=t._setThrew=t.asm.jb).apply(null,arguments)},Te=t.stackSave=function(){return(Te=t.stackSave=t.asm.kb).apply(null,arguments)},Ee=t.stackRestore=function(){return(Ee=t.stackRestore=t.asm.lb).apply(null,arguments)},Se=t.stackAlloc=function(){return(Se=t.stackAlloc=t.asm.mb).apply(null,arguments)},Oe=t.___cxa_can_catch=function(){return(Oe=t.___cxa_can_catch=t.asm.nb).apply(null,arguments)},ke=t.___cxa_is_pointer_type=function(){return(ke=t.___cxa_is_pointer_type=t.asm.ob).apply(null,arguments)},Ie=t.dynCall_j=function(){return(Ie=t.dynCall_j=t.asm.pb).apply(null,arguments)},Pe=t.dynCall_iiiiij=function(){return(Pe=t.dynCall_iiiiij=t.asm.qb).apply(null,arguments)},Ce=t.dynCall_jii=function(){return(Ce=t.dynCall_jii=t.asm.rb).apply(null,arguments)},De=t.dynCall_viiiiij=function(){return(De=t.dynCall_viiiiij=t.asm.sb).apply(null,arguments)},Ne=t.dynCall_vjji=function(){return(Ne=t.dynCall_vjji=t.asm.tb).apply(null,arguments)},Be=t.dynCall_viiijjjii=function(){return(Be=t.dynCall_viiijjjii=t.asm.ub).apply(null,arguments)},$e=t.dynCall_iij=function(){return($e=t.dynCall_iij=t.asm.vb).apply(null,arguments)},Re=t.dynCall_ji=function(){return(Re=t.dynCall_ji=t.asm.wb).apply(null,arguments)},Le=t.dynCall_iiiiiij=function(){return(Le=t.dynCall_iiiiiij=t.asm.xb).apply(null,arguments)},Fe=t.dynCall_iiij=function(){return(Fe=t.dynCall_iiij=t.asm.yb).apply(null,arguments)};function Me(){function e(){if(!ye&&(ye=!0,t.calledRun=!0,!I)){if(Z(F),o(t),t.onRuntimeInitialized&&t.onRuntimeInitialized(),t.postRun)for("function"==typeof t.postRun&&(t.postRun=[t.postRun]);t.postRun.length;){var e=t.postRun.shift();j.unshift(e)}Z(j)}}if(!(0{"use strict";e.exports=function(e,t){for(var n=new Array(arguments.length-1),r=0,o=2,i=!0;o{"use strict";var n=t;n.length=function(e){var t=e.length;if(!t)return 0;for(var n=0;--t%4>1&&"="===e.charAt(t);)++n;return Math.ceil(3*e.length)/4-n};for(var r=new Array(64),o=new Array(123),i=0;i<64;)o[r[i]=i<26?i+65:i<52?i+71:i<62?i-4:i-59|43]=i++;n.encode=function(e,t,n){for(var o,i=null,a=[],s=0,u=0;t>2],o=(3&l)<<4,u=1;break;case 1:a[s++]=r[o|l>>4],o=(15&l)<<2,u=2;break;case 2:a[s++]=r[o|l>>6],a[s++]=r[63&l],u=0}s>8191&&((i||(i=[])).push(String.fromCharCode.apply(String,a)),s=0)}return u&&(a[s++]=r[o],a[s++]=61,1===u&&(a[s++]=61)),i?(s&&i.push(String.fromCharCode.apply(String,a.slice(0,s))),i.join("")):String.fromCharCode.apply(String,a.slice(0,s))};var a="invalid encoding";n.decode=function(e,t,n){for(var r,i=n,s=0,u=0;u1)break;if(void 0===(l=o[l]))throw Error(a);switch(s){case 0:r=l,s=1;break;case 1:t[n++]=r<<2|(48&l)>>4,r=l,s=2;break;case 2:t[n++]=(15&r)<<4|(60&l)>>2,r=l,s=3;break;case 3:t[n++]=(3&r)<<6|l,s=0}}if(1===s)throw Error(a);return n-i},n.test=function(e){return/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/.test(e)}},9211:e=>{"use strict";function t(){this._listeners={}}e.exports=t,t.prototype.on=function(e,t,n){return(this._listeners[e]||(this._listeners[e]=[])).push({fn:t,ctx:n||this}),this},t.prototype.off=function(e,t){if(void 0===e)this._listeners={};else if(void 0===t)this._listeners[e]=[];else for(var n=this._listeners[e],r=0;r{"use strict";function t(e){return"undefined"!=typeof Float32Array?function(){var t=new Float32Array([-0]),n=new Uint8Array(t.buffer),r=128===n[3];function o(e,r,o){t[0]=e,r[o]=n[0],r[o+1]=n[1],r[o+2]=n[2],r[o+3]=n[3]}function i(e,r,o){t[0]=e,r[o]=n[3],r[o+1]=n[2],r[o+2]=n[1],r[o+3]=n[0]}function a(e,r){return n[0]=e[r],n[1]=e[r+1],n[2]=e[r+2],n[3]=e[r+3],t[0]}function s(e,r){return n[3]=e[r],n[2]=e[r+1],n[1]=e[r+2],n[0]=e[r+3],t[0]}e.writeFloatLE=r?o:i,e.writeFloatBE=r?i:o,e.readFloatLE=r?a:s,e.readFloatBE=r?s:a}():function(){function t(e,t,n,r){var o=t<0?1:0;if(o&&(t=-t),0===t)e(1/t>0?0:2147483648,n,r);else if(isNaN(t))e(2143289344,n,r);else if(t>34028234663852886e22)e((o<<31|2139095040)>>>0,n,r);else if(t<11754943508222875e-54)e((o<<31|Math.round(t/1401298464324817e-60))>>>0,n,r);else{var i=Math.floor(Math.log(t)/Math.LN2);e((o<<31|i+127<<23|8388607&Math.round(t*Math.pow(2,-i)*8388608))>>>0,n,r)}}function a(e,t,n){var r=e(t,n),o=2*(r>>31)+1,i=r>>>23&255,a=8388607&r;return 255===i?a?NaN:o*(1/0):0===i?1401298464324817e-60*o*a:o*Math.pow(2,i-150)*(a+8388608)}e.writeFloatLE=t.bind(null,n),e.writeFloatBE=t.bind(null,r),e.readFloatLE=a.bind(null,o),e.readFloatBE=a.bind(null,i)}(),"undefined"!=typeof Float64Array?function(){var t=new Float64Array([-0]),n=new Uint8Array(t.buffer),r=128===n[7];function o(e,r,o){t[0]=e,r[o]=n[0],r[o+1]=n[1],r[o+2]=n[2],r[o+3]=n[3],r[o+4]=n[4],r[o+5]=n[5],r[o+6]=n[6],r[o+7]=n[7]}function i(e,r,o){t[0]=e,r[o]=n[7],r[o+1]=n[6],r[o+2]=n[5],r[o+3]=n[4],r[o+4]=n[3],r[o+5]=n[2],r[o+6]=n[1],r[o+7]=n[0]}function a(e,r){return n[0]=e[r],n[1]=e[r+1],n[2]=e[r+2],n[3]=e[r+3],n[4]=e[r+4],n[5]=e[r+5],n[6]=e[r+6],n[7]=e[r+7],t[0]}function s(e,r){return n[7]=e[r],n[6]=e[r+1],n[5]=e[r+2],n[4]=e[r+3],n[3]=e[r+4],n[2]=e[r+5],n[1]=e[r+6],n[0]=e[r+7],t[0]}e.writeDoubleLE=r?o:i,e.writeDoubleBE=r?i:o,e.readDoubleLE=r?a:s,e.readDoubleBE=r?s:a}():function(){function t(e,t,n,r,o,i){var a=r<0?1:0;if(a&&(r=-r),0===r)e(0,o,i+t),e(1/r>0?0:2147483648,o,i+n);else if(isNaN(r))e(0,o,i+t),e(2146959360,o,i+n);else if(r>17976931348623157e292)e(0,o,i+t),e((a<<31|2146435072)>>>0,o,i+n);else{var s;if(r<22250738585072014e-324)e((s=r/5e-324)>>>0,o,i+t),e((a<<31|s/4294967296)>>>0,o,i+n);else{var u=Math.floor(Math.log(r)/Math.LN2);1024===u&&(u=1023),e(4503599627370496*(s=r*Math.pow(2,-u))>>>0,o,i+t),e((a<<31|u+1023<<20|1048576*s&1048575)>>>0,o,i+n)}}}function a(e,t,n,r,o){var i=e(r,o+t),a=e(r,o+n),s=2*(a>>31)+1,u=a>>>20&2047,l=4294967296*(1048575&a)+i;return 2047===u?l?NaN:s*(1/0):0===u?5e-324*s*l:s*Math.pow(2,u-1075)*(l+4503599627370496)}e.writeDoubleLE=t.bind(null,n,0,4),e.writeDoubleBE=t.bind(null,r,4,0),e.readDoubleLE=a.bind(null,o,0,4),e.readDoubleBE=a.bind(null,i,4,0)}(),e}function n(e,t,n){t[n]=255&e,t[n+1]=e>>>8&255,t[n+2]=e>>>16&255,t[n+3]=e>>>24}function r(e,t,n){t[n]=e>>>24,t[n+1]=e>>>16&255,t[n+2]=e>>>8&255,t[n+3]=255&e}function o(e,t){return(e[t]|e[t+1]<<8|e[t+2]<<16|e[t+3]<<24)>>>0}function i(e,t){return(e[t]<<24|e[t+1]<<16|e[t+2]<<8|e[t+3])>>>0}e.exports=t(t)},7199:module=>{"use strict";function inquire(moduleName){try{var mod=eval("quire".replace(/^/,"re"))(moduleName);if(mod&&(mod.length||Object.keys(mod).length))return mod}catch(e){}return null}module.exports=inquire},6662:e=>{"use strict";e.exports=function(e,t,n){var r=n||8192,o=r>>>1,i=null,a=r;return function(n){if(n<1||n>o)return e(n);a+n>r&&(i=e(r),a=0);var s=t.call(i,a,a+=n);return 7&a&&(a=1+(7|a)),s}}},4997:(e,t)=>{"use strict";var n=t;n.length=function(e){for(var t=0,n=0,r=0;r191&&r<224?i[a++]=(31&r)<<6|63&e[t++]:r>239&&r<365?(r=((7&r)<<18|(63&e[t++])<<12|(63&e[t++])<<6|63&e[t++])-65536,i[a++]=55296+(r>>10),i[a++]=56320+(1023&r)):i[a++]=(15&r)<<12|(63&e[t++])<<6|63&e[t++],a>8191&&((o||(o=[])).push(String.fromCharCode.apply(String,i)),a=0);return o?(a&&o.push(String.fromCharCode.apply(String,i.slice(0,a))),o.join("")):String.fromCharCode.apply(String,i.slice(0,a))},n.write=function(e,t,n){for(var r,o,i=n,a=0;a>6|192,t[n++]=63&r|128):55296==(64512&r)&&56320==(64512&(o=e.charCodeAt(a+1)))?(r=65536+((1023&r)<<10)+(1023&o),++a,t[n++]=r>>18|240,t[n++]=r>>12&63|128,t[n++]=r>>6&63|128,t[n++]=63&r|128):(t[n++]=r>>12|224,t[n++]=r>>6&63|128,t[n++]=63&r|128);return n-i}},3442:(e,t)=>{"use strict";t.__esModule=!0;var n=function(){function e(t){if(!t)throw new TypeError("Invalid argument; `value` has no value.");this.value=e.EMPTY,t&&e.isGuid(t)&&(this.value=t)}return e.isGuid=function(t){var n=t.toString();return t&&(t instanceof e||e.validator.test(n))},e.create=function(){return new e([e.gen(2),e.gen(1),e.gen(1),e.gen(1),e.gen(3)].join("-"))},e.createEmpty=function(){return new e("emptyguid")},e.parse=function(t){return new e(t)},e.raw=function(){return[e.gen(2),e.gen(1),e.gen(1),e.gen(1),e.gen(3)].join("-")},e.gen=function(e){for(var t="",n=0;n{e.exports=n;var t=null;try{t=new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,13,2,96,0,1,127,96,4,127,127,127,127,1,127,3,7,6,0,1,1,1,1,1,6,6,1,127,1,65,0,11,7,50,6,3,109,117,108,0,1,5,100,105,118,95,115,0,2,5,100,105,118,95,117,0,3,5,114,101,109,95,115,0,4,5,114,101,109,95,117,0,5,8,103,101,116,95,104,105,103,104,0,0,10,191,1,6,4,0,35,0,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,126,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,127,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,128,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,129,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,130,34,4,66,32,135,167,36,0,32,4,167,11])),{}).exports}catch(e){}function n(e,t,n){this.low=0|e,this.high=0|t,this.unsigned=!!n}function r(e){return!0===(e&&e.__isLong__)}n.prototype.__isLong__,Object.defineProperty(n.prototype,"__isLong__",{value:!0}),n.isLong=r;var o={},i={};function a(e,t){var n,r,a;return t?(a=0<=(e>>>=0)&&e<256)&&(r=i[e])?r:(n=u(e,(0|e)<0?-1:0,!0),a&&(i[e]=n),n):(a=-128<=(e|=0)&&e<128)&&(r=o[e])?r:(n=u(e,e<0?-1:0,!1),a&&(o[e]=n),n)}function s(e,t){if(isNaN(e))return t?b:m;if(t){if(e<0)return b;if(e>=p)return _}else{if(e<=-h)return x;if(e+1>=h)return w}return e<0?s(-e,t).neg():u(e%d|0,e/d|0,t)}function u(e,t,r){return new n(e,t,r)}n.fromInt=a,n.fromNumber=s,n.fromBits=u;var l=Math.pow;function c(e,t,n){if(0===e.length)throw Error("empty string");if("NaN"===e||"Infinity"===e||"+Infinity"===e||"-Infinity"===e)return m;if("number"==typeof t?(n=t,t=!1):t=!!t,(n=n||10)<2||360)throw Error("interior hyphen");if(0===r)return c(e.substring(1),t,n).neg();for(var o=s(l(n,8)),i=m,a=0;a>>0:this.low},T.toNumber=function(){return this.unsigned?(this.high>>>0)*d+(this.low>>>0):this.high*d+(this.low>>>0)},T.toString=function(e){if((e=e||10)<2||36>>0).toString(e);if((i=u).isZero())return c+a;for(;c.length<6;)c="0"+c;a=""+c+a}},T.getHighBits=function(){return this.high},T.getHighBitsUnsigned=function(){return this.high>>>0},T.getLowBits=function(){return this.low},T.getLowBitsUnsigned=function(){return this.low>>>0},T.getNumBitsAbs=function(){if(this.isNegative())return this.eq(x)?64:this.neg().getNumBitsAbs();for(var e=0!=this.high?this.high:this.low,t=31;t>0&&!(e&1<=0},T.isOdd=function(){return!(1&~this.low)},T.isEven=function(){return!(1&this.low)},T.equals=function(e){return r(e)||(e=f(e)),(this.unsigned===e.unsigned||this.high>>>31!=1||e.high>>>31!=1)&&this.high===e.high&&this.low===e.low},T.eq=T.equals,T.notEquals=function(e){return!this.eq(e)},T.neq=T.notEquals,T.ne=T.notEquals,T.lessThan=function(e){return this.comp(e)<0},T.lt=T.lessThan,T.lessThanOrEqual=function(e){return this.comp(e)<=0},T.lte=T.lessThanOrEqual,T.le=T.lessThanOrEqual,T.greaterThan=function(e){return this.comp(e)>0},T.gt=T.greaterThan,T.greaterThanOrEqual=function(e){return this.comp(e)>=0},T.gte=T.greaterThanOrEqual,T.ge=T.greaterThanOrEqual,T.compare=function(e){if(r(e)||(e=f(e)),this.eq(e))return 0;var t=this.isNegative(),n=e.isNegative();return t&&!n?-1:!t&&n?1:this.unsigned?e.high>>>0>this.high>>>0||e.high===this.high&&e.low>>>0>this.low>>>0?-1:1:this.sub(e).isNegative()?-1:1},T.comp=T.compare,T.negate=function(){return!this.unsigned&&this.eq(x)?x:this.not().add(y)},T.neg=T.negate,T.add=function(e){r(e)||(e=f(e));var t=this.high>>>16,n=65535&this.high,o=this.low>>>16,i=65535&this.low,a=e.high>>>16,s=65535&e.high,l=e.low>>>16,c=0,d=0,p=0,h=0;return p+=(h+=i+(65535&e.low))>>>16,d+=(p+=o+l)>>>16,c+=(d+=n+s)>>>16,c+=t+a,u((p&=65535)<<16|(h&=65535),(c&=65535)<<16|(d&=65535),this.unsigned)},T.subtract=function(e){return r(e)||(e=f(e)),this.add(e.neg())},T.sub=T.subtract,T.multiply=function(e){if(this.isZero())return m;if(r(e)||(e=f(e)),t)return u(t.mul(this.low,this.high,e.low,e.high),t.get_high(),this.unsigned);if(e.isZero())return m;if(this.eq(x))return e.isOdd()?x:m;if(e.eq(x))return this.isOdd()?x:m;if(this.isNegative())return e.isNegative()?this.neg().mul(e.neg()):this.neg().mul(e).neg();if(e.isNegative())return this.mul(e.neg()).neg();if(this.lt(g)&&e.lt(g))return s(this.toNumber()*e.toNumber(),this.unsigned);var n=this.high>>>16,o=65535&this.high,i=this.low>>>16,a=65535&this.low,l=e.high>>>16,c=65535&e.high,d=e.low>>>16,p=65535&e.low,h=0,b=0,y=0,A=0;return y+=(A+=a*p)>>>16,b+=(y+=i*p)>>>16,y&=65535,b+=(y+=a*d)>>>16,h+=(b+=o*p)>>>16,b&=65535,h+=(b+=i*d)>>>16,b&=65535,h+=(b+=a*c)>>>16,h+=n*p+o*d+i*c+a*l,u((y&=65535)<<16|(A&=65535),(h&=65535)<<16|(b&=65535),this.unsigned)},T.mul=T.multiply,T.divide=function(e){if(r(e)||(e=f(e)),e.isZero())throw Error("division by zero");var n,o,i;if(t)return this.unsigned||-2147483648!==this.high||-1!==e.low||-1!==e.high?u((this.unsigned?t.div_u:t.div_s)(this.low,this.high,e.low,e.high),t.get_high(),this.unsigned):this;if(this.isZero())return this.unsigned?b:m;if(this.unsigned){if(e.unsigned||(e=e.toUnsigned()),e.gt(this))return b;if(e.gt(this.shru(1)))return A;i=b}else{if(this.eq(x))return e.eq(y)||e.eq(v)?x:e.eq(x)?y:(n=this.shr(1).div(e).shl(1)).eq(m)?e.isNegative()?y:v:(o=this.sub(e.mul(n)),i=n.add(o.div(e)));if(e.eq(x))return this.unsigned?b:m;if(this.isNegative())return e.isNegative()?this.neg().div(e.neg()):this.neg().div(e).neg();if(e.isNegative())return this.div(e.neg()).neg();i=m}for(o=this;o.gte(e);){n=Math.max(1,Math.floor(o.toNumber()/e.toNumber()));for(var a=Math.ceil(Math.log(n)/Math.LN2),c=a<=48?1:l(2,a-48),d=s(n),p=d.mul(e);p.isNegative()||p.gt(o);)p=(d=s(n-=c,this.unsigned)).mul(e);d.isZero()&&(d=y),i=i.add(d),o=o.sub(p)}return i},T.div=T.divide,T.modulo=function(e){return r(e)||(e=f(e)),t?u((this.unsigned?t.rem_u:t.rem_s)(this.low,this.high,e.low,e.high),t.get_high(),this.unsigned):this.sub(this.div(e).mul(e))},T.mod=T.modulo,T.rem=T.modulo,T.not=function(){return u(~this.low,~this.high,this.unsigned)},T.and=function(e){return r(e)||(e=f(e)),u(this.low&e.low,this.high&e.high,this.unsigned)},T.or=function(e){return r(e)||(e=f(e)),u(this.low|e.low,this.high|e.high,this.unsigned)},T.xor=function(e){return r(e)||(e=f(e)),u(this.low^e.low,this.high^e.high,this.unsigned)},T.shiftLeft=function(e){return r(e)&&(e=e.toInt()),0==(e&=63)?this:e<32?u(this.low<>>32-e,this.unsigned):u(0,this.low<>>e|this.high<<32-e,this.high>>e,this.unsigned):u(this.high>>e-32,this.high>=0?0:-1,this.unsigned)},T.shr=T.shiftRight,T.shiftRightUnsigned=function(e){if(r(e)&&(e=e.toInt()),0==(e&=63))return this;var t=this.high;return e<32?u(this.low>>>e|t<<32-e,t>>>e,this.unsigned):u(32===e?t:t>>>e-32,0,this.unsigned)},T.shru=T.shiftRightUnsigned,T.shr_u=T.shiftRightUnsigned,T.toSigned=function(){return this.unsigned?u(this.low,this.high,!1):this},T.toUnsigned=function(){return this.unsigned?this:u(this.low,this.high,!0)},T.toBytes=function(e){return e?this.toBytesLE():this.toBytesBE()},T.toBytesLE=function(){var e=this.high,t=this.low;return[255&t,t>>>8&255,t>>>16&255,t>>>24,255&e,e>>>8&255,e>>>16&255,e>>>24]},T.toBytesBE=function(){var e=this.high,t=this.low;return[e>>>24,e>>>16&255,e>>>8&255,255&e,t>>>24,t>>>16&255,t>>>8&255,255&t]},n.fromBytes=function(e,t,r){return r?n.fromBytesLE(e,t):n.fromBytesBE(e,t)},n.fromBytesLE=function(e,t){return new n(e[0]|e[1]<<8|e[2]<<16|e[3]<<24,e[4]|e[5]<<8|e[6]<<16|e[7]<<24,t)},n.fromBytesBE=function(e,t){return new n(e[4]<<24|e[5]<<16|e[6]<<8|e[7],e[0]<<24|e[1]<<16|e[2]<<8|e[3],t)}},1446:(e,t,n)=>{"use strict";var r,o,i,a=n(2100),s=a.Reader,u=a.Writer,l=a.util,c=a.roots.default||(a.roots.default={});c.onnx=((i={}).Version=(r={},(o=Object.create(r))[r[0]="_START_VERSION"]=0,o[r[1]="IR_VERSION_2017_10_10"]=1,o[r[2]="IR_VERSION_2017_10_30"]=2,o[r[3]="IR_VERSION_2017_11_3"]=3,o[r[4]="IR_VERSION_2019_1_22"]=4,o[r[5]="IR_VERSION"]=5,o),i.AttributeProto=function(){function e(e){if(this.floats=[],this.ints=[],this.strings=[],this.tensors=[],this.graphs=[],e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.name=e.string();break;case 21:r.refAttrName=e.string();break;case 13:r.docString=e.string();break;case 20:r.type=e.int32();break;case 2:r.f=e.float();break;case 3:r.i=e.int64();break;case 4:r.s=e.bytes();break;case 5:r.t=c.onnx.TensorProto.decode(e,e.uint32());break;case 6:r.g=c.onnx.GraphProto.decode(e,e.uint32());break;case 7:if(r.floats&&r.floats.length||(r.floats=[]),2==(7&o))for(var i=e.uint32()+e.pos;e.pos>>0,e.i.high>>>0).toNumber())),null!=e.s&&("string"==typeof e.s?l.base64.decode(e.s,t.s=l.newBuffer(l.base64.length(e.s)),0):e.s.length&&(t.s=e.s)),null!=e.t){if("object"!=typeof e.t)throw TypeError(".onnx.AttributeProto.t: object expected");t.t=c.onnx.TensorProto.fromObject(e.t)}if(null!=e.g){if("object"!=typeof e.g)throw TypeError(".onnx.AttributeProto.g: object expected");t.g=c.onnx.GraphProto.fromObject(e.g)}if(e.floats){if(!Array.isArray(e.floats))throw TypeError(".onnx.AttributeProto.floats: array expected");t.floats=[];for(var n=0;n>>0,e.ints[n].high>>>0).toNumber())}if(e.strings){if(!Array.isArray(e.strings))throw TypeError(".onnx.AttributeProto.strings: array expected");for(t.strings=[],n=0;n>>0,e.i.high>>>0).toNumber():e.i),null!=e.s&&e.hasOwnProperty("s")&&(n.s=t.bytes===String?l.base64.encode(e.s,0,e.s.length):t.bytes===Array?Array.prototype.slice.call(e.s):e.s),null!=e.t&&e.hasOwnProperty("t")&&(n.t=c.onnx.TensorProto.toObject(e.t,t)),null!=e.g&&e.hasOwnProperty("g")&&(n.g=c.onnx.GraphProto.toObject(e.g,t)),e.floats&&e.floats.length){n.floats=[];for(var o=0;o>>0,e.ints[o].high>>>0).toNumber():e.ints[o];if(e.strings&&e.strings.length)for(n.strings=[],o=0;o>>3){case 1:r.name=e.string();break;case 2:r.type=c.onnx.TypeProto.decode(e,e.uint32());break;case 3:r.docString=e.string();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.name&&e.hasOwnProperty("name")&&!l.isString(e.name))return"name: string expected";if(null!=e.type&&e.hasOwnProperty("type")){var t=c.onnx.TypeProto.verify(e.type);if(t)return"type."+t}return null!=e.docString&&e.hasOwnProperty("docString")&&!l.isString(e.docString)?"docString: string expected":null},e.fromObject=function(e){if(e instanceof c.onnx.ValueInfoProto)return e;var t=new c.onnx.ValueInfoProto;if(null!=e.name&&(t.name=String(e.name)),null!=e.type){if("object"!=typeof e.type)throw TypeError(".onnx.ValueInfoProto.type: object expected");t.type=c.onnx.TypeProto.fromObject(e.type)}return null!=e.docString&&(t.docString=String(e.docString)),t},e.toObject=function(e,t){t||(t={});var n={};return t.defaults&&(n.name="",n.type=null,n.docString=""),null!=e.name&&e.hasOwnProperty("name")&&(n.name=e.name),null!=e.type&&e.hasOwnProperty("type")&&(n.type=c.onnx.TypeProto.toObject(e.type,t)),null!=e.docString&&e.hasOwnProperty("docString")&&(n.docString=e.docString),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),i.NodeProto=function(){function e(e){if(this.input=[],this.output=[],this.attribute=[],e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.input&&r.input.length||(r.input=[]),r.input.push(e.string());break;case 2:r.output&&r.output.length||(r.output=[]),r.output.push(e.string());break;case 3:r.name=e.string();break;case 4:r.opType=e.string();break;case 7:r.domain=e.string();break;case 5:r.attribute&&r.attribute.length||(r.attribute=[]),r.attribute.push(c.onnx.AttributeProto.decode(e,e.uint32()));break;case 6:r.docString=e.string();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.input&&e.hasOwnProperty("input")){if(!Array.isArray(e.input))return"input: array expected";for(var t=0;t>>3){case 1:r.irVersion=e.int64();break;case 8:r.opsetImport&&r.opsetImport.length||(r.opsetImport=[]),r.opsetImport.push(c.onnx.OperatorSetIdProto.decode(e,e.uint32()));break;case 2:r.producerName=e.string();break;case 3:r.producerVersion=e.string();break;case 4:r.domain=e.string();break;case 5:r.modelVersion=e.int64();break;case 6:r.docString=e.string();break;case 7:r.graph=c.onnx.GraphProto.decode(e,e.uint32());break;case 14:r.metadataProps&&r.metadataProps.length||(r.metadataProps=[]),r.metadataProps.push(c.onnx.StringStringEntryProto.decode(e,e.uint32()));break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.irVersion&&e.hasOwnProperty("irVersion")&&!(l.isInteger(e.irVersion)||e.irVersion&&l.isInteger(e.irVersion.low)&&l.isInteger(e.irVersion.high)))return"irVersion: integer|Long expected";if(null!=e.opsetImport&&e.hasOwnProperty("opsetImport")){if(!Array.isArray(e.opsetImport))return"opsetImport: array expected";for(var t=0;t>>0,e.irVersion.high>>>0).toNumber())),e.opsetImport){if(!Array.isArray(e.opsetImport))throw TypeError(".onnx.ModelProto.opsetImport: array expected");t.opsetImport=[];for(var n=0;n>>0,e.modelVersion.high>>>0).toNumber())),null!=e.docString&&(t.docString=String(e.docString)),null!=e.graph){if("object"!=typeof e.graph)throw TypeError(".onnx.ModelProto.graph: object expected");t.graph=c.onnx.GraphProto.fromObject(e.graph)}if(e.metadataProps){if(!Array.isArray(e.metadataProps))throw TypeError(".onnx.ModelProto.metadataProps: array expected");for(t.metadataProps=[],n=0;n>>0,e.irVersion.high>>>0).toNumber():e.irVersion),null!=e.producerName&&e.hasOwnProperty("producerName")&&(n.producerName=e.producerName),null!=e.producerVersion&&e.hasOwnProperty("producerVersion")&&(n.producerVersion=e.producerVersion),null!=e.domain&&e.hasOwnProperty("domain")&&(n.domain=e.domain),null!=e.modelVersion&&e.hasOwnProperty("modelVersion")&&("number"==typeof e.modelVersion?n.modelVersion=t.longs===String?String(e.modelVersion):e.modelVersion:n.modelVersion=t.longs===String?l.Long.prototype.toString.call(e.modelVersion):t.longs===Number?new l.LongBits(e.modelVersion.low>>>0,e.modelVersion.high>>>0).toNumber():e.modelVersion),null!=e.docString&&e.hasOwnProperty("docString")&&(n.docString=e.docString),null!=e.graph&&e.hasOwnProperty("graph")&&(n.graph=c.onnx.GraphProto.toObject(e.graph,t)),e.opsetImport&&e.opsetImport.length){n.opsetImport=[];for(var o=0;o>>3){case 1:r.key=e.string();break;case 2:r.value=e.string();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){return"object"!=typeof e||null===e?"object expected":null!=e.key&&e.hasOwnProperty("key")&&!l.isString(e.key)?"key: string expected":null!=e.value&&e.hasOwnProperty("value")&&!l.isString(e.value)?"value: string expected":null},e.fromObject=function(e){if(e instanceof c.onnx.StringStringEntryProto)return e;var t=new c.onnx.StringStringEntryProto;return null!=e.key&&(t.key=String(e.key)),null!=e.value&&(t.value=String(e.value)),t},e.toObject=function(e,t){t||(t={});var n={};return t.defaults&&(n.key="",n.value=""),null!=e.key&&e.hasOwnProperty("key")&&(n.key=e.key),null!=e.value&&e.hasOwnProperty("value")&&(n.value=e.value),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),i.TensorAnnotation=function(){function e(e){if(this.quantParameterTensorNames=[],e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.tensorName=e.string();break;case 2:r.quantParameterTensorNames&&r.quantParameterTensorNames.length||(r.quantParameterTensorNames=[]),r.quantParameterTensorNames.push(c.onnx.StringStringEntryProto.decode(e,e.uint32()));break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.tensorName&&e.hasOwnProperty("tensorName")&&!l.isString(e.tensorName))return"tensorName: string expected";if(null!=e.quantParameterTensorNames&&e.hasOwnProperty("quantParameterTensorNames")){if(!Array.isArray(e.quantParameterTensorNames))return"quantParameterTensorNames: array expected";for(var t=0;t>>3){case 1:r.node&&r.node.length||(r.node=[]),r.node.push(c.onnx.NodeProto.decode(e,e.uint32()));break;case 2:r.name=e.string();break;case 5:r.initializer&&r.initializer.length||(r.initializer=[]),r.initializer.push(c.onnx.TensorProto.decode(e,e.uint32()));break;case 10:r.docString=e.string();break;case 11:r.input&&r.input.length||(r.input=[]),r.input.push(c.onnx.ValueInfoProto.decode(e,e.uint32()));break;case 12:r.output&&r.output.length||(r.output=[]),r.output.push(c.onnx.ValueInfoProto.decode(e,e.uint32()));break;case 13:r.valueInfo&&r.valueInfo.length||(r.valueInfo=[]),r.valueInfo.push(c.onnx.ValueInfoProto.decode(e,e.uint32()));break;case 14:r.quantizationAnnotation&&r.quantizationAnnotation.length||(r.quantizationAnnotation=[]),r.quantizationAnnotation.push(c.onnx.TensorAnnotation.decode(e,e.uint32()));break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.node&&e.hasOwnProperty("node")){if(!Array.isArray(e.node))return"node: array expected";for(var t=0;t>>3){case 1:if(r.dims&&r.dims.length||(r.dims=[]),2==(7&o))for(var i=e.uint32()+e.pos;e.pos>>0,e.dims[n].high>>>0).toNumber())}if(null!=e.dataType&&(t.dataType=0|e.dataType),null!=e.segment){if("object"!=typeof e.segment)throw TypeError(".onnx.TensorProto.segment: object expected");t.segment=c.onnx.TensorProto.Segment.fromObject(e.segment)}if(e.floatData){if(!Array.isArray(e.floatData))throw TypeError(".onnx.TensorProto.floatData: array expected");for(t.floatData=[],n=0;n>>0,e.int64Data[n].high>>>0).toNumber())}if(null!=e.name&&(t.name=String(e.name)),null!=e.docString&&(t.docString=String(e.docString)),null!=e.rawData&&("string"==typeof e.rawData?l.base64.decode(e.rawData,t.rawData=l.newBuffer(l.base64.length(e.rawData)),0):e.rawData.length&&(t.rawData=e.rawData)),e.externalData){if(!Array.isArray(e.externalData))throw TypeError(".onnx.TensorProto.externalData: array expected");for(t.externalData=[],n=0;n>>0,e.uint64Data[n].high>>>0).toNumber(!0))}return t},e.toObject=function(e,t){t||(t={});var n={};if((t.arrays||t.defaults)&&(n.dims=[],n.floatData=[],n.int32Data=[],n.stringData=[],n.int64Data=[],n.doubleData=[],n.uint64Data=[],n.externalData=[]),t.defaults&&(n.dataType=0,n.segment=null,n.name="",t.bytes===String?n.rawData="":(n.rawData=[],t.bytes!==Array&&(n.rawData=l.newBuffer(n.rawData))),n.docString="",n.dataLocation=t.enums===String?"DEFAULT":0),e.dims&&e.dims.length){n.dims=[];for(var r=0;r>>0,e.dims[r].high>>>0).toNumber():e.dims[r]}if(null!=e.dataType&&e.hasOwnProperty("dataType")&&(n.dataType=e.dataType),null!=e.segment&&e.hasOwnProperty("segment")&&(n.segment=c.onnx.TensorProto.Segment.toObject(e.segment,t)),e.floatData&&e.floatData.length)for(n.floatData=[],r=0;r>>0,e.int64Data[r].high>>>0).toNumber():e.int64Data[r];if(null!=e.name&&e.hasOwnProperty("name")&&(n.name=e.name),null!=e.rawData&&e.hasOwnProperty("rawData")&&(n.rawData=t.bytes===String?l.base64.encode(e.rawData,0,e.rawData.length):t.bytes===Array?Array.prototype.slice.call(e.rawData):e.rawData),e.doubleData&&e.doubleData.length)for(n.doubleData=[],r=0;r>>0,e.uint64Data[r].high>>>0).toNumber(!0):e.uint64Data[r];if(null!=e.docString&&e.hasOwnProperty("docString")&&(n.docString=e.docString),e.externalData&&e.externalData.length)for(n.externalData=[],r=0;r>>3){case 1:r.begin=e.int64();break;case 2:r.end=e.int64();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){return"object"!=typeof e||null===e?"object expected":null!=e.begin&&e.hasOwnProperty("begin")&&!(l.isInteger(e.begin)||e.begin&&l.isInteger(e.begin.low)&&l.isInteger(e.begin.high))?"begin: integer|Long expected":null!=e.end&&e.hasOwnProperty("end")&&!(l.isInteger(e.end)||e.end&&l.isInteger(e.end.low)&&l.isInteger(e.end.high))?"end: integer|Long expected":null},e.fromObject=function(e){if(e instanceof c.onnx.TensorProto.Segment)return e;var t=new c.onnx.TensorProto.Segment;return null!=e.begin&&(l.Long?(t.begin=l.Long.fromValue(e.begin)).unsigned=!1:"string"==typeof e.begin?t.begin=parseInt(e.begin,10):"number"==typeof e.begin?t.begin=e.begin:"object"==typeof e.begin&&(t.begin=new l.LongBits(e.begin.low>>>0,e.begin.high>>>0).toNumber())),null!=e.end&&(l.Long?(t.end=l.Long.fromValue(e.end)).unsigned=!1:"string"==typeof e.end?t.end=parseInt(e.end,10):"number"==typeof e.end?t.end=e.end:"object"==typeof e.end&&(t.end=new l.LongBits(e.end.low>>>0,e.end.high>>>0).toNumber())),t},e.toObject=function(e,t){t||(t={});var n={};if(t.defaults){if(l.Long){var r=new l.Long(0,0,!1);n.begin=t.longs===String?r.toString():t.longs===Number?r.toNumber():r}else n.begin=t.longs===String?"0":0;l.Long?(r=new l.Long(0,0,!1),n.end=t.longs===String?r.toString():t.longs===Number?r.toNumber():r):n.end=t.longs===String?"0":0}return null!=e.begin&&e.hasOwnProperty("begin")&&("number"==typeof e.begin?n.begin=t.longs===String?String(e.begin):e.begin:n.begin=t.longs===String?l.Long.prototype.toString.call(e.begin):t.longs===Number?new l.LongBits(e.begin.low>>>0,e.begin.high>>>0).toNumber():e.begin),null!=e.end&&e.hasOwnProperty("end")&&("number"==typeof e.end?n.end=t.longs===String?String(e.end):e.end:n.end=t.longs===String?l.Long.prototype.toString.call(e.end):t.longs===Number?new l.LongBits(e.end.low>>>0,e.end.high>>>0).toNumber():e.end),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),e.DataLocation=function(){var e={},t=Object.create(e);return t[e[0]="DEFAULT"]=0,t[e[1]="EXTERNAL"]=1,t}(),e}(),i.TensorShapeProto=function(){function e(e){if(this.dim=[],e)for(var t=Object.keys(e),n=0;n>>3==1?(r.dim&&r.dim.length||(r.dim=[]),r.dim.push(c.onnx.TensorShapeProto.Dimension.decode(e,e.uint32()))):e.skipType(7&o)}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.dim&&e.hasOwnProperty("dim")){if(!Array.isArray(e.dim))return"dim: array expected";for(var t=0;t>>3){case 1:r.dimValue=e.int64();break;case 2:r.dimParam=e.string();break;case 3:r.denotation=e.string();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";var t={};if(null!=e.dimValue&&e.hasOwnProperty("dimValue")&&(t.value=1,!(l.isInteger(e.dimValue)||e.dimValue&&l.isInteger(e.dimValue.low)&&l.isInteger(e.dimValue.high))))return"dimValue: integer|Long expected";if(null!=e.dimParam&&e.hasOwnProperty("dimParam")){if(1===t.value)return"value: multiple values";if(t.value=1,!l.isString(e.dimParam))return"dimParam: string expected"}return null!=e.denotation&&e.hasOwnProperty("denotation")&&!l.isString(e.denotation)?"denotation: string expected":null},e.fromObject=function(e){if(e instanceof c.onnx.TensorShapeProto.Dimension)return e;var t=new c.onnx.TensorShapeProto.Dimension;return null!=e.dimValue&&(l.Long?(t.dimValue=l.Long.fromValue(e.dimValue)).unsigned=!1:"string"==typeof e.dimValue?t.dimValue=parseInt(e.dimValue,10):"number"==typeof e.dimValue?t.dimValue=e.dimValue:"object"==typeof e.dimValue&&(t.dimValue=new l.LongBits(e.dimValue.low>>>0,e.dimValue.high>>>0).toNumber())),null!=e.dimParam&&(t.dimParam=String(e.dimParam)),null!=e.denotation&&(t.denotation=String(e.denotation)),t},e.toObject=function(e,t){t||(t={});var n={};return t.defaults&&(n.denotation=""),null!=e.dimValue&&e.hasOwnProperty("dimValue")&&("number"==typeof e.dimValue?n.dimValue=t.longs===String?String(e.dimValue):e.dimValue:n.dimValue=t.longs===String?l.Long.prototype.toString.call(e.dimValue):t.longs===Number?new l.LongBits(e.dimValue.low>>>0,e.dimValue.high>>>0).toNumber():e.dimValue,t.oneofs&&(n.value="dimValue")),null!=e.dimParam&&e.hasOwnProperty("dimParam")&&(n.dimParam=e.dimParam,t.oneofs&&(n.value="dimParam")),null!=e.denotation&&e.hasOwnProperty("denotation")&&(n.denotation=e.denotation),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),e}(),i.TypeProto=function(){function e(e){if(e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.tensorType=c.onnx.TypeProto.Tensor.decode(e,e.uint32());break;case 6:r.denotation=e.string();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.tensorType&&e.hasOwnProperty("tensorType")){var t=c.onnx.TypeProto.Tensor.verify(e.tensorType);if(t)return"tensorType."+t}return null!=e.denotation&&e.hasOwnProperty("denotation")&&!l.isString(e.denotation)?"denotation: string expected":null},e.fromObject=function(e){if(e instanceof c.onnx.TypeProto)return e;var t=new c.onnx.TypeProto;if(null!=e.tensorType){if("object"!=typeof e.tensorType)throw TypeError(".onnx.TypeProto.tensorType: object expected");t.tensorType=c.onnx.TypeProto.Tensor.fromObject(e.tensorType)}return null!=e.denotation&&(t.denotation=String(e.denotation)),t},e.toObject=function(e,t){t||(t={});var n={};return t.defaults&&(n.denotation=""),null!=e.tensorType&&e.hasOwnProperty("tensorType")&&(n.tensorType=c.onnx.TypeProto.Tensor.toObject(e.tensorType,t),t.oneofs&&(n.value="tensorType")),null!=e.denotation&&e.hasOwnProperty("denotation")&&(n.denotation=e.denotation),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e.Tensor=function(){function e(e){if(e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.elemType=e.int32();break;case 2:r.shape=c.onnx.TensorShapeProto.decode(e,e.uint32());break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){if("object"!=typeof e||null===e)return"object expected";if(null!=e.elemType&&e.hasOwnProperty("elemType")&&!l.isInteger(e.elemType))return"elemType: integer expected";if(null!=e.shape&&e.hasOwnProperty("shape")){var t=c.onnx.TensorShapeProto.verify(e.shape);if(t)return"shape."+t}return null},e.fromObject=function(e){if(e instanceof c.onnx.TypeProto.Tensor)return e;var t=new c.onnx.TypeProto.Tensor;if(null!=e.elemType&&(t.elemType=0|e.elemType),null!=e.shape){if("object"!=typeof e.shape)throw TypeError(".onnx.TypeProto.Tensor.shape: object expected");t.shape=c.onnx.TensorShapeProto.fromObject(e.shape)}return t},e.toObject=function(e,t){t||(t={});var n={};return t.defaults&&(n.elemType=0,n.shape=null),null!=e.elemType&&e.hasOwnProperty("elemType")&&(n.elemType=e.elemType),null!=e.shape&&e.hasOwnProperty("shape")&&(n.shape=c.onnx.TensorShapeProto.toObject(e.shape,t)),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),e}(),i.OperatorSetIdProto=function(){function e(e){if(e)for(var t=Object.keys(e),n=0;n>>3){case 1:r.domain=e.string();break;case 2:r.version=e.int64();break;default:e.skipType(7&o)}}return r},e.decodeDelimited=function(e){return e instanceof s||(e=new s(e)),this.decode(e,e.uint32())},e.verify=function(e){return"object"!=typeof e||null===e?"object expected":null!=e.domain&&e.hasOwnProperty("domain")&&!l.isString(e.domain)?"domain: string expected":null!=e.version&&e.hasOwnProperty("version")&&!(l.isInteger(e.version)||e.version&&l.isInteger(e.version.low)&&l.isInteger(e.version.high))?"version: integer|Long expected":null},e.fromObject=function(e){if(e instanceof c.onnx.OperatorSetIdProto)return e;var t=new c.onnx.OperatorSetIdProto;return null!=e.domain&&(t.domain=String(e.domain)),null!=e.version&&(l.Long?(t.version=l.Long.fromValue(e.version)).unsigned=!1:"string"==typeof e.version?t.version=parseInt(e.version,10):"number"==typeof e.version?t.version=e.version:"object"==typeof e.version&&(t.version=new l.LongBits(e.version.low>>>0,e.version.high>>>0).toNumber())),t},e.toObject=function(e,t){t||(t={});var n={};if(t.defaults)if(n.domain="",l.Long){var r=new l.Long(0,0,!1);n.version=t.longs===String?r.toString():t.longs===Number?r.toNumber():r}else n.version=t.longs===String?"0":0;return null!=e.domain&&e.hasOwnProperty("domain")&&(n.domain=e.domain),null!=e.version&&e.hasOwnProperty("version")&&("number"==typeof e.version?n.version=t.longs===String?String(e.version):e.version:n.version=t.longs===String?l.Long.prototype.toString.call(e.version):t.longs===Number?new l.LongBits(e.version.low>>>0,e.version.high>>>0).toNumber():e.version),n},e.prototype.toJSON=function(){return this.constructor.toObject(this,a.util.toJSONOptions)},e}(),i),e.exports=c},2100:(e,t,n)=>{"use strict";e.exports=n(9482)},9482:(e,t,n)=>{"use strict";var r=t;function o(){r.util._configure(),r.Writer._configure(r.BufferWriter),r.Reader._configure(r.BufferReader)}r.build="minimal",r.Writer=n(1173),r.BufferWriter=n(3155),r.Reader=n(1408),r.BufferReader=n(593),r.util=n(9693),r.rpc=n(5994),r.roots=n(5054),r.configure=o,o()},1408:(e,t,n)=>{"use strict";e.exports=u;var r,o=n(9693),i=o.LongBits,a=o.utf8;function s(e,t){return RangeError("index out of range: "+e.pos+" + "+(t||1)+" > "+e.len)}function u(e){this.buf=e,this.pos=0,this.len=e.length}var l,c="undefined"!=typeof Uint8Array?function(e){if(e instanceof Uint8Array||Array.isArray(e))return new u(e);throw Error("illegal buffer")}:function(e){if(Array.isArray(e))return new u(e);throw Error("illegal buffer")},f=function(){return o.Buffer?function(e){return(u.create=function(e){return o.Buffer.isBuffer(e)?new r(e):c(e)})(e)}:c};function d(){var e=new i(0,0),t=0;if(!(this.len-this.pos>4)){for(;t<3;++t){if(this.pos>=this.len)throw s(this);if(e.lo=(e.lo|(127&this.buf[this.pos])<<7*t)>>>0,this.buf[this.pos++]<128)return e}return e.lo=(e.lo|(127&this.buf[this.pos++])<<7*t)>>>0,e}for(;t<4;++t)if(e.lo=(e.lo|(127&this.buf[this.pos])<<7*t)>>>0,this.buf[this.pos++]<128)return e;if(e.lo=(e.lo|(127&this.buf[this.pos])<<28)>>>0,e.hi=(e.hi|(127&this.buf[this.pos])>>4)>>>0,this.buf[this.pos++]<128)return e;if(t=0,this.len-this.pos>4){for(;t<5;++t)if(e.hi=(e.hi|(127&this.buf[this.pos])<<7*t+3)>>>0,this.buf[this.pos++]<128)return e}else for(;t<5;++t){if(this.pos>=this.len)throw s(this);if(e.hi=(e.hi|(127&this.buf[this.pos])<<7*t+3)>>>0,this.buf[this.pos++]<128)return e}throw Error("invalid varint encoding")}function p(e,t){return(e[t-4]|e[t-3]<<8|e[t-2]<<16|e[t-1]<<24)>>>0}function h(){if(this.pos+8>this.len)throw s(this,8);return new i(p(this.buf,this.pos+=4),p(this.buf,this.pos+=4))}u.create=f(),u.prototype._slice=o.Array.prototype.subarray||o.Array.prototype.slice,u.prototype.uint32=(l=4294967295,function(){if(l=(127&this.buf[this.pos])>>>0,this.buf[this.pos++]<128)return l;if(l=(l|(127&this.buf[this.pos])<<7)>>>0,this.buf[this.pos++]<128)return l;if(l=(l|(127&this.buf[this.pos])<<14)>>>0,this.buf[this.pos++]<128)return l;if(l=(l|(127&this.buf[this.pos])<<21)>>>0,this.buf[this.pos++]<128)return l;if(l=(l|(15&this.buf[this.pos])<<28)>>>0,this.buf[this.pos++]<128)return l;if((this.pos+=5)>this.len)throw this.pos=this.len,s(this,10);return l}),u.prototype.int32=function(){return 0|this.uint32()},u.prototype.sint32=function(){var e=this.uint32();return e>>>1^-(1&e)},u.prototype.bool=function(){return 0!==this.uint32()},u.prototype.fixed32=function(){if(this.pos+4>this.len)throw s(this,4);return p(this.buf,this.pos+=4)},u.prototype.sfixed32=function(){if(this.pos+4>this.len)throw s(this,4);return 0|p(this.buf,this.pos+=4)},u.prototype.float=function(){if(this.pos+4>this.len)throw s(this,4);var e=o.float.readFloatLE(this.buf,this.pos);return this.pos+=4,e},u.prototype.double=function(){if(this.pos+8>this.len)throw s(this,4);var e=o.float.readDoubleLE(this.buf,this.pos);return this.pos+=8,e},u.prototype.bytes=function(){var e=this.uint32(),t=this.pos,n=this.pos+e;if(n>this.len)throw s(this,e);return this.pos+=e,Array.isArray(this.buf)?this.buf.slice(t,n):t===n?new this.buf.constructor(0):this._slice.call(this.buf,t,n)},u.prototype.string=function(){var e=this.bytes();return a.read(e,0,e.length)},u.prototype.skip=function(e){if("number"==typeof e){if(this.pos+e>this.len)throw s(this,e);this.pos+=e}else do{if(this.pos>=this.len)throw s(this)}while(128&this.buf[this.pos++]);return this},u.prototype.skipType=function(e){switch(e){case 0:this.skip();break;case 1:this.skip(8);break;case 2:this.skip(this.uint32());break;case 3:for(;4!=(e=7&this.uint32());)this.skipType(e);break;case 5:this.skip(4);break;default:throw Error("invalid wire type "+e+" at offset "+this.pos)}return this},u._configure=function(e){r=e,u.create=f(),r._configure();var t=o.Long?"toLong":"toNumber";o.merge(u.prototype,{int64:function(){return d.call(this)[t](!1)},uint64:function(){return d.call(this)[t](!0)},sint64:function(){return d.call(this).zzDecode()[t](!1)},fixed64:function(){return h.call(this)[t](!0)},sfixed64:function(){return h.call(this)[t](!1)}})}},593:(e,t,n)=>{"use strict";e.exports=i;var r=n(1408);(i.prototype=Object.create(r.prototype)).constructor=i;var o=n(9693);function i(e){r.call(this,e)}i._configure=function(){o.Buffer&&(i.prototype._slice=o.Buffer.prototype.slice)},i.prototype.string=function(){var e=this.uint32();return this.buf.utf8Slice?this.buf.utf8Slice(this.pos,this.pos=Math.min(this.pos+e,this.len)):this.buf.toString("utf-8",this.pos,this.pos=Math.min(this.pos+e,this.len))},i._configure()},5054:e=>{"use strict";e.exports={}},5994:(e,t,n)=>{"use strict";t.Service=n(7948)},7948:(e,t,n)=>{"use strict";e.exports=o;var r=n(9693);function o(e,t,n){if("function"!=typeof e)throw TypeError("rpcImpl must be a function");r.EventEmitter.call(this),this.rpcImpl=e,this.requestDelimited=Boolean(t),this.responseDelimited=Boolean(n)}(o.prototype=Object.create(r.EventEmitter.prototype)).constructor=o,o.prototype.rpcCall=function e(t,n,o,i,a){if(!i)throw TypeError("request must be specified");var s=this;if(!a)return r.asPromise(e,s,t,n,o,i);if(s.rpcImpl)try{return s.rpcImpl(t,n[s.requestDelimited?"encodeDelimited":"encode"](i).finish(),(function(e,n){if(e)return s.emit("error",e,t),a(e);if(null!==n){if(!(n instanceof o))try{n=o[s.responseDelimited?"decodeDelimited":"decode"](n)}catch(e){return s.emit("error",e,t),a(e)}return s.emit("data",n,t),a(null,n)}s.end(!0)}))}catch(e){return s.emit("error",e,t),void setTimeout((function(){a(e)}),0)}else setTimeout((function(){a(Error("already ended"))}),0)},o.prototype.end=function(e){return this.rpcImpl&&(e||this.rpcImpl(null,null,null),this.rpcImpl=null,this.emit("end").off()),this}},1945:(e,t,n)=>{"use strict";e.exports=o;var r=n(9693);function o(e,t){this.lo=e>>>0,this.hi=t>>>0}var i=o.zero=new o(0,0);i.toNumber=function(){return 0},i.zzEncode=i.zzDecode=function(){return this},i.length=function(){return 1};var a=o.zeroHash="\0\0\0\0\0\0\0\0";o.fromNumber=function(e){if(0===e)return i;var t=e<0;t&&(e=-e);var n=e>>>0,r=(e-n)/4294967296>>>0;return t&&(r=~r>>>0,n=~n>>>0,++n>4294967295&&(n=0,++r>4294967295&&(r=0))),new o(n,r)},o.from=function(e){if("number"==typeof e)return o.fromNumber(e);if(r.isString(e)){if(!r.Long)return o.fromNumber(parseInt(e,10));e=r.Long.fromString(e)}return e.low||e.high?new o(e.low>>>0,e.high>>>0):i},o.prototype.toNumber=function(e){if(!e&&this.hi>>>31){var t=1+~this.lo>>>0,n=~this.hi>>>0;return t||(n=n+1>>>0),-(t+4294967296*n)}return this.lo+4294967296*this.hi},o.prototype.toLong=function(e){return r.Long?new r.Long(0|this.lo,0|this.hi,Boolean(e)):{low:0|this.lo,high:0|this.hi,unsigned:Boolean(e)}};var s=String.prototype.charCodeAt;o.fromHash=function(e){return e===a?i:new o((s.call(e,0)|s.call(e,1)<<8|s.call(e,2)<<16|s.call(e,3)<<24)>>>0,(s.call(e,4)|s.call(e,5)<<8|s.call(e,6)<<16|s.call(e,7)<<24)>>>0)},o.prototype.toHash=function(){return String.fromCharCode(255&this.lo,this.lo>>>8&255,this.lo>>>16&255,this.lo>>>24,255&this.hi,this.hi>>>8&255,this.hi>>>16&255,this.hi>>>24)},o.prototype.zzEncode=function(){var e=this.hi>>31;return this.hi=((this.hi<<1|this.lo>>>31)^e)>>>0,this.lo=(this.lo<<1^e)>>>0,this},o.prototype.zzDecode=function(){var e=-(1&this.lo);return this.lo=((this.lo>>>1|this.hi<<31)^e)>>>0,this.hi=(this.hi>>>1^e)>>>0,this},o.prototype.length=function(){var e=this.lo,t=(this.lo>>>28|this.hi<<4)>>>0,n=this.hi>>>24;return 0===n?0===t?e<16384?e<128?1:2:e<2097152?3:4:t<16384?t<128?5:6:t<2097152?7:8:n<128?9:10}},9693:function(e,t,n){"use strict";var r=t;function o(e,t,n){for(var r=Object.keys(t),o=0;o0)},r.Buffer=function(){try{var e=r.inquire("buffer").Buffer;return e.prototype.utf8Write?e:null}catch(e){return null}}(),r._Buffer_from=null,r._Buffer_allocUnsafe=null,r.newBuffer=function(e){return"number"==typeof e?r.Buffer?r._Buffer_allocUnsafe(e):new r.Array(e):r.Buffer?r._Buffer_from(e):"undefined"==typeof Uint8Array?e:new Uint8Array(e)},r.Array="undefined"!=typeof Uint8Array?Uint8Array:Array,r.Long=r.global.dcodeIO&&r.global.dcodeIO.Long||r.global.Long||r.inquire("long"),r.key2Re=/^true|false|0|1$/,r.key32Re=/^-?(?:0|[1-9][0-9]*)$/,r.key64Re=/^(?:[\\x00-\\xff]{8}|-?(?:0|[1-9][0-9]*))$/,r.longToHash=function(e){return e?r.LongBits.from(e).toHash():r.LongBits.zeroHash},r.longFromHash=function(e,t){var n=r.LongBits.fromHash(e);return r.Long?r.Long.fromBits(n.lo,n.hi,t):n.toNumber(Boolean(t))},r.merge=o,r.lcFirst=function(e){return e.charAt(0).toLowerCase()+e.substring(1)},r.newError=i,r.ProtocolError=i("ProtocolError"),r.oneOfGetter=function(e){for(var t={},n=0;n-1;--n)if(1===t[e[n]]&&void 0!==this[e[n]]&&null!==this[e[n]])return e[n]}},r.oneOfSetter=function(e){return function(t){for(var n=0;n{"use strict";e.exports=f;var r,o=n(9693),i=o.LongBits,a=o.base64,s=o.utf8;function u(e,t,n){this.fn=e,this.len=t,this.next=void 0,this.val=n}function l(){}function c(e){this.head=e.head,this.tail=e.tail,this.len=e.len,this.next=e.states}function f(){this.len=0,this.head=new u(l,0,0),this.tail=this.head,this.states=null}var d=function(){return o.Buffer?function(){return(f.create=function(){return new r})()}:function(){return new f}};function p(e,t,n){t[n]=255&e}function h(e,t){this.len=e,this.next=void 0,this.val=t}function g(e,t,n){for(;e.hi;)t[n++]=127&e.lo|128,e.lo=(e.lo>>>7|e.hi<<25)>>>0,e.hi>>>=7;for(;e.lo>127;)t[n++]=127&e.lo|128,e.lo=e.lo>>>7;t[n++]=e.lo}function m(e,t,n){t[n]=255&e,t[n+1]=e>>>8&255,t[n+2]=e>>>16&255,t[n+3]=e>>>24}f.create=d(),f.alloc=function(e){return new o.Array(e)},o.Array!==Array&&(f.alloc=o.pool(f.alloc,o.Array.prototype.subarray)),f.prototype._push=function(e,t,n){return this.tail=this.tail.next=new u(e,t,n),this.len+=t,this},h.prototype=Object.create(u.prototype),h.prototype.fn=function(e,t,n){for(;e>127;)t[n++]=127&e|128,e>>>=7;t[n]=e},f.prototype.uint32=function(e){return this.len+=(this.tail=this.tail.next=new h((e>>>=0)<128?1:e<16384?2:e<2097152?3:e<268435456?4:5,e)).len,this},f.prototype.int32=function(e){return e<0?this._push(g,10,i.fromNumber(e)):this.uint32(e)},f.prototype.sint32=function(e){return this.uint32((e<<1^e>>31)>>>0)},f.prototype.uint64=function(e){var t=i.from(e);return this._push(g,t.length(),t)},f.prototype.int64=f.prototype.uint64,f.prototype.sint64=function(e){var t=i.from(e).zzEncode();return this._push(g,t.length(),t)},f.prototype.bool=function(e){return this._push(p,1,e?1:0)},f.prototype.fixed32=function(e){return this._push(m,4,e>>>0)},f.prototype.sfixed32=f.prototype.fixed32,f.prototype.fixed64=function(e){var t=i.from(e);return this._push(m,4,t.lo)._push(m,4,t.hi)},f.prototype.sfixed64=f.prototype.fixed64,f.prototype.float=function(e){return this._push(o.float.writeFloatLE,4,e)},f.prototype.double=function(e){return this._push(o.float.writeDoubleLE,8,e)};var b=o.Array.prototype.set?function(e,t,n){t.set(e,n)}:function(e,t,n){for(var r=0;r>>0;if(!t)return this._push(p,1,0);if(o.isString(e)){var n=f.alloc(t=a.length(e));a.decode(e,n,0),e=n}return this.uint32(t)._push(b,t,e)},f.prototype.string=function(e){var t=s.length(e);return t?this.uint32(t)._push(s.write,t,e):this._push(p,1,0)},f.prototype.fork=function(){return this.states=new c(this),this.head=this.tail=new u(l,0,0),this.len=0,this},f.prototype.reset=function(){return this.states?(this.head=this.states.head,this.tail=this.states.tail,this.len=this.states.len,this.states=this.states.next):(this.head=this.tail=new u(l,0,0),this.len=0),this},f.prototype.ldelim=function(){var e=this.head,t=this.tail,n=this.len;return this.reset().uint32(n),n&&(this.tail.next=e.next,this.tail=t,this.len+=n),this},f.prototype.finish=function(){for(var e=this.head.next,t=this.constructor.alloc(this.len),n=0;e;)e.fn(e.val,t,n),n+=e.len,e=e.next;return t},f._configure=function(e){r=e,f.create=d(),r._configure()}},3155:(e,t,n)=>{"use strict";e.exports=i;var r=n(1173);(i.prototype=Object.create(r.prototype)).constructor=i;var o=n(9693);function i(){r.call(this)}function a(e,t,n){e.length<40?o.utf8.write(e,t,n):t.utf8Write?t.utf8Write(e,n):t.write(e,n)}i._configure=function(){i.alloc=o._Buffer_allocUnsafe,i.writeBytesBuffer=o.Buffer&&o.Buffer.prototype instanceof Uint8Array&&"set"===o.Buffer.prototype.set.name?function(e,t,n){t.set(e,n)}:function(e,t,n){if(e.copy)e.copy(t,n,0,e.length);else for(var r=0;r>>0;return this.uint32(t),t&&this._push(i.writeBytesBuffer,t,e),this},i.prototype.string=function(e){var t=o.Buffer.byteLength(e);return this.uint32(t),t&&this._push(a,t,e),this},i._configure()},7714:(e,t,n)=>{"use strict";t.R=void 0;const r=n(6919),o=n(7448);t.R=new class{async init(){}async createSessionHandler(e,t){const n=new r.Session(t);return await n.loadModel(e),new o.OnnxjsSessionHandler(n)}}},4200:(e,t,n)=>{"use strict";t.c8=t.rX=void 0;const r=n(1670),o=n(5381),i=n(2157),a=n(2306);t.rX=()=>{if(("number"!=typeof r.env.wasm.initTimeout||r.env.wasm.initTimeout<0)&&(r.env.wasm.initTimeout=0),"boolean"!=typeof r.env.wasm.simd&&(r.env.wasm.simd=!0),"boolean"!=typeof r.env.wasm.proxy&&(r.env.wasm.proxy=!1),"number"!=typeof r.env.wasm.numThreads||!Number.isInteger(r.env.wasm.numThreads)||r.env.wasm.numThreads<=0){const e="undefined"==typeof navigator?(0,o.cpus)().length:navigator.hardwareConcurrency;r.env.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},t.c8=new class{async init(){(0,t.rX)(),await(0,i.initWasm)()}async createSessionHandler(e,t){const n=new a.OnnxruntimeWebAssemblySessionHandler;return await n.loadModel(e,t),Promise.resolve(n)}}},6018:function(e,t,n){"use strict";var r=this&&this.__createBinding||(Object.create?function(e,t,n,r){void 0===r&&(r=n);var o=Object.getOwnPropertyDescriptor(t,n);o&&!("get"in o?!t.__esModule:o.writable||o.configurable)||(o={enumerable:!0,get:function(){return t[n]}}),Object.defineProperty(e,r,o)}:function(e,t,n,r){void 0===r&&(r=n),e[r]=t[n]}),o=this&&this.__exportStar||function(e,t){for(var n in e)"default"===n||Object.prototype.hasOwnProperty.call(t,n)||r(t,e,n)};Object.defineProperty(t,"__esModule",{value:!0}),o(n(1670),t);const i=n(1670);{const e=n(7714).R;(0,i.registerBackend)("webgl",e,-10)}{const e=n(4200).c8;(0,i.registerBackend)("cpu",e,10),(0,i.registerBackend)("wasm",e,10),(0,i.registerBackend)("xnnpack",e,9)}},246:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createAttributeWithCacheKey=void 0;class n{constructor(e){Object.assign(this,e)}get cacheKey(){return this._cacheKey||(this._cacheKey=Object.getOwnPropertyNames(this).sort().map((e=>`${this[e]}`)).join(";")),this._cacheKey}}t.createAttributeWithCacheKey=e=>new n(e)},7778:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Attribute=void 0;const r=n(1446),o=n(9395),i=n(9162),a=n(2517);var s=o.onnxruntime.experimental.fbs;class u{constructor(e){if(this._attributes=new Map,null!=e){for(const t of e)t instanceof r.onnx.AttributeProto?this._attributes.set(t.name,[u.getValue(t),u.getType(t)]):t instanceof s.Attribute&&this._attributes.set(t.name(),[u.getValue(t),u.getType(t)]);if(this._attributes.sizei.Tensor.fromProto(e)));if(e instanceof s.Attribute)return n.map((e=>i.Tensor.fromOrtTensor(e)))}if(t===r.onnx.AttributeProto.AttributeType.STRING&&e instanceof r.onnx.AttributeProto){const e=n;return(0,a.decodeUtf8String)(e)}return t===r.onnx.AttributeProto.AttributeType.STRINGS&&e instanceof r.onnx.AttributeProto?n.map(a.decodeUtf8String):n}static getValueNoCheck(e){return e instanceof r.onnx.AttributeProto?this.getValueNoCheckFromOnnxFormat(e):this.getValueNoCheckFromOrtFormat(e)}static getValueNoCheckFromOnnxFormat(e){switch(e.type){case r.onnx.AttributeProto.AttributeType.FLOAT:return e.f;case r.onnx.AttributeProto.AttributeType.INT:return e.i;case r.onnx.AttributeProto.AttributeType.STRING:return e.s;case r.onnx.AttributeProto.AttributeType.TENSOR:return e.t;case r.onnx.AttributeProto.AttributeType.GRAPH:return e.g;case r.onnx.AttributeProto.AttributeType.FLOATS:return e.floats;case r.onnx.AttributeProto.AttributeType.INTS:return e.ints;case r.onnx.AttributeProto.AttributeType.STRINGS:return e.strings;case r.onnx.AttributeProto.AttributeType.TENSORS:return e.tensors;case r.onnx.AttributeProto.AttributeType.GRAPHS:return e.graphs;default:throw new Error(`unsupported attribute type: ${r.onnx.AttributeProto.AttributeType[e.type]}`)}}static getValueNoCheckFromOrtFormat(e){switch(e.type()){case s.AttributeType.FLOAT:return e.f();case s.AttributeType.INT:return e.i();case s.AttributeType.STRING:return e.s();case s.AttributeType.TENSOR:return e.t();case s.AttributeType.GRAPH:return e.g();case s.AttributeType.FLOATS:return e.floatsArray();case s.AttributeType.INTS:{const t=[];for(let n=0;n{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.resolveBackend=t.backend=void 0;const r=n(5038),o=new Map;async function i(e){const n=t.backend;if(void 0!==n[e]&&function(e){const t=e;return"initialize"in t&&"function"==typeof t.initialize&&"createSessionHandler"in t&&"function"==typeof t.createSessionHandler&&"dispose"in t&&"function"==typeof t.dispose}(n[e])){const t=n[e];let r=t.initialize();if("object"==typeof r&&"then"in r&&(r=await r),r)return o.set(e,t),t}}t.backend={webgl:new r.WebGLBackend},t.resolveBackend=async function e(t){if(!t)return e(["webgl"]);{const e="string"==typeof t?[t]:t;for(const t of e){const e=o.get(t);if(e)return e;const n=await i(t);if(n)return n}}throw new Error("no available backend to use")}},5038:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.WebGLBackend=void 0;const r=n(1670),o=n(6231),i=n(6416),a=n(7305);t.WebGLBackend=class{get contextId(){return r.env.webgl.contextId}set contextId(e){r.env.webgl.contextId=e}get matmulMaxBatchSize(){return r.env.webgl.matmulMaxBatchSize}set matmulMaxBatchSize(e){r.env.webgl.matmulMaxBatchSize=e}get textureCacheMode(){return r.env.webgl.textureCacheMode}set textureCacheMode(e){r.env.webgl.textureCacheMode=e}get pack(){return r.env.webgl.pack}set pack(e){r.env.webgl.pack=e}get async(){return r.env.webgl.async}set async(e){r.env.webgl.async=e}initialize(){try{return this.glContext=(0,a.createWebGLContext)(this.contextId),"number"!=typeof this.matmulMaxBatchSize&&(this.matmulMaxBatchSize=16),"string"!=typeof this.textureCacheMode&&(this.textureCacheMode="full"),"boolean"!=typeof this.pack&&(this.pack=!1),"boolean"!=typeof this.async&&(this.async=!1),o.Logger.setWithEnv(r.env),o.Logger.verbose("WebGLBackend",`Created WebGLContext: ${typeof this.glContext} with matmulMaxBatchSize: ${this.matmulMaxBatchSize}; textureCacheMode: ${this.textureCacheMode}; pack: ${this.pack}; async: ${this.async}.`),!0}catch(e){return o.Logger.warning("WebGLBackend",`Unable to initialize WebGLBackend. ${e}`),!1}}createSessionHandler(e){return new i.WebGLSessionHandler(this,e)}dispose(){this.glContext.dispose()}}},5107:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.CoordsGlslLib=void 0;const r=n(2517),o=n(8520),i=n(5060),a=n(7859),s=n(9390);class u extends o.GlslLib{constructor(e){super(e)}getFunctions(){return Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({},this.offsetToCoords()),this.coordsToOffset()),this.toVec()),this.valueFrom()),this.getCommonUtilFuncs()),this.getInputsSamplingSnippets()),this.getOutputSamplingSnippet())}getCustomTypes(){return{}}offsetToCoords(){return{offsetToCoords:new o.GlslLibRoutine("\n vec2 offsetToCoords(int offset, int width, int height) {\n int t = offset / width;\n int s = offset - t*width;\n vec2 coords = (vec2(s,t) + vec2(0.5,0.5)) / vec2(width, height);\n return coords;\n }\n ")}}coordsToOffset(){return{coordsToOffset:new o.GlslLibRoutine("\n int coordsToOffset(vec2 coords, int width, int height) {\n float s = coords.s * float(width);\n float t = coords.t * float(height);\n int offset = int(t) * width + int(s);\n return offset;\n }\n ")}}getOutputSamplingSnippet(){const e=this.context.outputTextureLayout;return e.isPacked?this.getPackedOutputSamplingSnippet(e):this.getUnpackedOutputSamplingSnippet(e)}getPackedOutputSamplingSnippet(e){const t=e.unpackedShape,n=[e.width,e.height],r={},a="getOutputCoords";switch(t.length){case 0:r[a]=this.getOutputScalarCoords();break;case 1:r[a]=this.getOutputPacked1DCoords(t,n);break;case 2:r[a]=this.getOutputPacked2DCoords(t,n);break;case 3:r[a]=this.getOutputPacked3DCoords(t,n);break;default:r[a]=this.getOutputPackedNDCoords(t,n)}const s=`\n void setOutput(vec4 val) {\n ${(0,i.getGlsl)(this.context.glContext.version).output} = val;\n }\n `;return r.floatTextureSetRGBA=new o.GlslLibRoutine(s),r}getUnpackedOutputSamplingSnippet(e){const t=e.unpackedShape,n=[e.width,e.height],r={},a="getOutputCoords";switch(t.length){case 0:r[a]=this.getOutputScalarCoords();break;case 1:r[a]=this.getOutputUnpacked1DCoords(t,n);break;case 2:r[a]=this.getOutputUnpacked2DCoords(t,n);break;case 3:r[a]=this.getOutputUnpacked3DCoords(t,n);break;case 4:r[a]=this.getOutputUnpacked4DCoords(t,n);break;case 5:r[a]=this.getOutputUnpacked5DCoords(t,n);break;case 6:r[a]=this.getOutputUnpacked6DCoords(t,n);break;default:throw new Error(`Unsupported output dimensionality: ${t.length}`)}const s=`\n void setOutput(float val) {\n ${(0,i.getGlsl)(this.context.glContext.version).output} = vec4(val, 0, 0, 0);\n }\n `;return r.floatTextureSetR=new o.GlslLibRoutine(s),r}getOutputScalarCoords(){return new o.GlslLibRoutine("\n int getOutputCoords() {\n return 0;\n }\n ")}getOutputPacked1DCoords(e,t){const n=t;let r="";return 1===n[0]?(r=`\n int getOutputCoords() {\n return 2 * int(TexCoords.y * ${n[1]}.0);\n }\n `,new o.GlslLibRoutine(r)):1===n[1]?(r=`\n int getOutputCoords() {\n return 2 * int(TexCoords.x * ${n[0]}.0);\n }\n `,new o.GlslLibRoutine(r)):(r=`\n int getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${n[0]}, ${n[1]}));\n return 2 * (resTexRC.y * ${n[0]} + resTexRC.x);\n }\n `,new o.GlslLibRoutine(r))}getOutputPacked2DCoords(e,t){let n="";if(r.ArrayUtil.arraysEqual(e,t))return n=`\n ivec2 getOutputCoords() {\n return 2 * ivec2(TexCoords.xy * vec2(${t[0]}, ${t[1]}));\n }\n `,new o.GlslLibRoutine(n);const i=t,a=Math.ceil(e[1]/2);return n=`\n ivec2 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${i[0]}, ${i[1]}));\n\n int index = resTexRC.y * ${i[0]} + resTexRC.x;\n\n // reverse r and c order for packed texture\n int r = imod(index, ${a}) * 2;\n int c = 2 * (index / ${a});\n\n return ivec2(r, c);\n }\n `,new o.GlslLibRoutine(n)}getOutputPacked3DCoords(e,t){const n=[t[0],t[1]],r=Math.ceil(e[2]/2),i=r*Math.ceil(e[1]/2),a=`\n ivec3 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${n[0]}, ${n[1]}));\n int index = resTexRC.y * ${n[0]} + resTexRC.x;\n\n int b = index / ${i};\n index -= b * ${i};\n\n // reverse r and c order for packed texture\n int r = imod(index, ${r}) * 2;\n int c = 2 * (index / ${r});\n\n return ivec3(b, r, c);\n }\n `;return new o.GlslLibRoutine(a)}getOutputPackedNDCoords(e,t){const n=[t[0],t[1]],r=Math.ceil(e[e.length-1]/2),i=r*Math.ceil(e[e.length-2]/2);let a=i,s="",u="b, r, c";for(let t=2;t=0;--t)i[t]=i[t+1]*e[t+1];const a=["r","c","d"],s=i.map(((e,t)=>`int ${a[t]} = index / ${e}; ${t===i.length-1?`int ${a[t+1]} = index - ${a[t]} * ${e}`:`index -= ${a[t]} * ${e}`};`)).join("");return n=`\n ivec3 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${t[0]}, ${t[1]}));\n int index = resTexRC.y * ${t[0]} + resTexRC.x;\n ${s}\n return ivec3(r, c, d);\n }\n `,new o.GlslLibRoutine(n)}getOutputUnpacked4DCoords(e,t){let n="";const r=e.length;let i=null;r<2&&(i=[]),i=new Array(r-1),i[r-2]=e[r-1];for(let t=r-3;t>=0;--t)i[t]=i[t+1]*e[t+1];const a=["r","c","d","d2"],s=i.map(((e,t)=>`int ${a[t]} = index / ${e}; ${t===i.length-1?`int ${a[t+1]} = index - ${a[t]} * ${e}`:`index -= ${a[t]} * ${e}`};`)).join("");return n=`\n ivec4 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${t[0]}, ${t[1]}));\n int index = resTexRC.y * ${t[0]} + resTexRC.x;\n ${s}\n return ivec4(r, c, d, d2);\n }\n `,new o.GlslLibRoutine(n)}getOutputUnpacked5DCoords(e,t){let n="";const r=e.length;let i=null;r<2&&(i=[]),i=new Array(r-1),i[r-2]=e[r-1];for(let t=r-3;t>=0;--t)i[t]=i[t+1]*e[t+1];const a=["r","c","d","d2","d3"],s=i.map(((e,t)=>`int ${a[t]} = index / ${e}; ${t===i.length-1?`int ${a[t+1]} = index - ${a[t]} * ${e}`:`index -= ${a[t]} * ${e}`};`)).join("");return n=`\n ivec5 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${t[0]}, ${t[1]}));\n int index = resTexRC.y * ${t[0]} + resTexRC.x;\n ${s}\n return ivec5(r, c, d, d2, d3);\n }\n `,new o.GlslLibRoutine(n)}getOutputUnpacked6DCoords(e,t){let n="";const r=e.length;let i=null;r<2&&(i=[]),i=new Array(r-1),i[r-2]=e[r-1];for(let t=r-3;t>=0;--t)i[t]=i[t+1]*e[t+1];const a=["r","c","d","d2","d3","d4"],s=i.map(((e,t)=>`int ${a[t]} = index / ${e}; ${t===i.length-1?`int ${a[t+1]} = index - ${a[t]} * ${e}`:`index -= ${a[t]} * ${e}`};`)).join("");return n=`\n ivec6 getOutputCoords() {\n ivec2 resTexRC = ivec2(TexCoords.xy *\n vec2(${t[0]}, ${t[1]}));\n int index = resTexRC.y * ${t[0]} + resTexRC.x;\n ${s}\n return ivec6(r, c, d, d2, d3, d4);\n }\n `,new o.GlslLibRoutine(n)}getCommonUtilFuncs(){const e={};let t="uvFromFlat";e[t]=new o.GlslLibRoutine("\n vec2 uvFromFlat(int texNumR, int texNumC, int index) {\n int texC = index / texNumR;\n int texR = index - texC * texNumR;\n // TODO: swap texR, texC order in following function so row is corresponding to u and column is corresponding to\n // v.\n return (vec2(texR, texC) + halfCR) / vec2(texNumR, texNumC);\n }\n "),t="packedUVfrom1D",e[t]=new o.GlslLibRoutine("\n vec2 packedUVfrom1D(int texNumR, int texNumC, int index) {\n int texelIndex = index / 2;\n int texR = texelIndex / texNumC;\n int texC = texelIndex - texR * texNumC;\n return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);\n }\n "),t="packedUVfrom2D",e[t]=new o.GlslLibRoutine("\n vec2 packedUVfrom2D(int texNumR, int texNumC, int texelsInLogicalRow, int row, int col) {\n int texelIndex = (row / 2) * texelsInLogicalRow + (col / 2);\n int texR = texelIndex / texNumC;\n int texC = texelIndex - texR * texNumC;\n return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);\n }\n "),t="packedUVfrom3D",e[t]=new o.GlslLibRoutine("\n vec2 packedUVfrom3D(int texNumR, int texNumC,\n int texelsInBatch, int texelsInLogicalRow, int b,\n int row, int col) {\n int index = b * texelsInBatch + (row / 2) * texelsInLogicalRow + (col / 2);\n int texR = index / texNumC;\n int texC = index - texR * texNumC;\n return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);\n }\n "),t="sampleTexture";const n=(0,i.getGlsl)(this.context.glContext.version);return e[t]=new o.GlslLibRoutine(`\n float sampleTexture(sampler2D textureSampler, vec2 uv) {\n return ${n.texture2D}(textureSampler, uv).r;\n }`),e}getInputsSamplingSnippets(){const e={},t=this.context.outputTextureLayout;return this.context.programInfo.inputNames.forEach(((n,r)=>{const o=this.context.inputTextureLayouts[r],i=(0,s.generateShaderFuncNameFromInputSamplerName)(n);o.isPacked?e[i]=this.getPackedSamplerFromInput(i,n,o):e[i]=this.getUnpackedSamplerFromInput(i,n,o);const a=(0,s.generateShaderFuncNameFromInputSamplerNameAtOutCoords)(n);o.unpackedShape.length<=t.unpackedShape.length&&(o.isPacked?e[a]=this.getPackedSamplerAtOutputCoords(a,o,t,n):e[a]=this.getUnpackedSamplerAtOutputCoords(a,o,t,n))})),e}getPackedSamplerAtOutputCoords(e,t,n,i){const a=t.unpackedShape,u=n.unpackedShape,l=i,c=(0,s.generateShaderFuncNameFromInputSamplerName)(l),f=a.length,d=u.length,p=r.BroadcastUtil.getBroadcastDims(a,u),h=(0,s.getCoordsDataType)(d),g=d-f;let m;const b=(0,s.getGlChannels)();m=0===f?"":d<2&&p.length>=1?"coords = 0;":p.map((e=>`coords.${b[e+g]} = 0;`)).join("\n");let y="";y=d<2&&f>0?"coords":a.map(((e,t)=>`coords.${b[t+g]}`)).join(", ");let A="return outputValue;";const v=1===r.ShapeUtil.size(a),w=1===r.ShapeUtil.size(u);if(1!==f||v||w){if(v&&!w)A=1===d?"\n return vec4(outputValue.x, outputValue.x, 0., 0.);\n ":"\n return vec4(outputValue.x);\n ";else if(p.length){const e=f-2,t=f-1;p.indexOf(e)>-1&&p.indexOf(t)>-1?A="return vec4(outputValue.x);":p.indexOf(e)>-1?A="return vec4(outputValue.x, outputValue.y, outputValue.x, outputValue.y);":p.indexOf(t)>-1&&(A="return vec4(outputValue.xx, outputValue.zz);")}}else A="\n return vec4(outputValue.xy, outputValue.xy);\n ";const _=`\n vec4 ${e}() {\n ${h} coords = getOutputCoords();\n \n int lastDim = coords.${b[d-1]};\n coords.${b[d-1]} = coords.${b[d-2]};\n coords.${b[d-2]} = lastDim;\n \n ${m}\n vec4 outputValue = ${c}(${y});\n ${A}\n }\n `;return new o.GlslLibRoutine(_,["coordinates.getOutputCoords"])}getUnpackedSamplerAtOutputCoords(e,t,n,i){const a=[n.width,n.height],u=[t.width,t.height],l=t.unpackedShape.length,c=n.unpackedShape.length,f=t.unpackedShape,d=n.unpackedShape,p=(0,s.generateShaderFuncNameFromInputSamplerName)(i);if(l===c&&r.ArrayUtil.arraysEqual(u,a)){const t=`\n float ${e}() {\n return sampleTexture(${i}, TexCoords);\n }\n `;return new o.GlslLibRoutine(t,["coordinates.sampleTexture"])}const h=(0,s.getCoordsDataType)(c),g=r.BroadcastUtil.getBroadcastDims(f,d),m=c-l;let b;const y=(0,s.getGlChannels)();b=0===l?"":c<2&&g.length>=1?"coords = 0;":g.map((e=>`coords.${y[e+m]} = 0;`)).join("\n");let A="";A=c<2&&l>0?"coords":t.unpackedShape.map(((e,t)=>`coords.${y[t+m]}`)).join(", ");const v=`\n float ${e}() {\n ${h} coords = getOutputCoords();\n ${b}\n return ${p}(${A});\n }\n `;return new o.GlslLibRoutine(v,["coordinates.getOutputCoords"])}getPackedSamplerFromInput(e,t,n){switch(n.unpackedShape.length){case 0:return this.getPackedSamplerScalar(e,t);case 1:return this.getPackedSampler1D(e,t,n);case 2:return this.getPackedSampler2D(e,t,n);case 3:return this.getPackedSampler3D(e,t,n);default:return this.getPackedSamplerND(e,t,n)}}getUnpackedSamplerFromInput(e,t,n){const r=n.unpackedShape;switch(r.length){case 0:return this.getUnpackedSamplerScalar(e,t,n);case 1:return this.getUnpackedSampler1D(e,t,n);case 2:return this.getUnpackedSampler2D(e,t,n);case 3:return this.getUnpackedSampler3D(e,t,n);case 4:return this.getUnpackedSampler4D(e,t,n);case 5:return this.getUnpackedSampler5D(e,t,n);case 6:return this.getUnpackedSampler6D(e,t,n);default:throw new Error(`Unsupported dimension ${r.length}-D`)}}getPackedSamplerScalar(e,t){const n=`\n vec4 ${e}() {\n return ${(0,i.getGlsl)(this.context.glContext.version).texture2D}(${t}, halfCR);\n }\n `;return new o.GlslLibRoutine(n)}getPackedSampler1D(e,t,n){const r=[n.width,n.height],a=[r[1],r[0]],s=(0,i.getGlsl)(this.context.glContext.version),u=`vec4 ${e}(int index) {\n vec2 uv = packedUVfrom1D(\n ${a[0]}, ${a[1]}, index);\n return ${s.texture2D}(${t}, uv);\n }`;return new o.GlslLibRoutine(u,["coordinates.packedUVfrom1D"])}getPackedSampler2D(e,t,n){const a=n.unpackedShape,s=[n.width,n.height],u=(0,i.getGlsl)(this.context.glContext.version),l=s[0],c=s[1];if(null!=s&&r.ArrayUtil.arraysEqual(a,s)){const n=`vec4 ${e}(int row, int col) {\n vec2 uv = (vec2(col, row) + halfCR) / vec2(${c}.0, ${l}.0);\n return ${u.texture2D}(${t}, uv);\n }`;return new o.GlslLibRoutine(n)}const f=s,d=Math.ceil(a[1]/2),p=`vec4 ${e}(int row, int col) {\n vec2 uv = packedUVfrom2D(${f[1]}, ${f[0]}, ${d}, row, col);\n return ${u.texture2D}(${t}, uv);\n }`;return new o.GlslLibRoutine(p,["coordinates.packedUVfrom2D"])}getPackedSampler3D(e,t,n){const r=n.unpackedShape,a=[n.width,n.height],u=[a[0],a[1]],l=(0,i.getGlsl)(this.context.glContext.version);if(1===r[0]){const i=r.slice(1),a=[1,2],u=(0,s.squeezeInputShape)(r,i),l=["b","row","col"],c=JSON.parse(JSON.stringify(n));c.unpackedShape=u;const f=this.getPackedSamplerFromInput(e,t,c),d=`${f.routineBody}\n vec4 ${e}(int b, int row, int col) {\n return ${e}(${(0,s.getSqueezedParams)(l,a)});\n } `;return new o.GlslLibRoutine(d,f.dependencies)}const c=u[0],f=u[1],d=Math.ceil(r[2]/2),p=`vec4 ${e}(int b, int row, int col) {\n vec2 uv = packedUVfrom3D(\n ${f}, ${c}, ${d*Math.ceil(r[1]/2)}, ${d}, b, row, col);\n return ${l.texture2D}(${t}, uv);}`;return new o.GlslLibRoutine(p,["coordinates.packedUVfrom3D"])}getPackedSamplerND(e,t,n){const r=n.unpackedShape,a=r.length,s=[n.width,n.height],u=(0,i.getGlsl)(this.context.glContext.version),l=[s[0],s[1]],c=l[1],f=l[0],d=Math.ceil(r[a-1]/2);let p=d*Math.ceil(r[a-2]/2),h="int b, int row, int col",g=`b * ${p} + (row / 2) * ${d} + (col / 2)`;for(let e=2;e{const r=this.context.inputTextureLayouts[n],i=(r.unpackedShape.length>0?r.unpackedShape:r.shape).length;let a=`_${t}`;e[a]=new o.GlslLibRoutine(this.getValueFromSingle(t,i,r.width,r.height,!1),[`shapeUtils.indicesToOffset${a}`,"coordinates.offsetToCoords","fragcolor.getColorAsFloat"]),a+="_T",e[a]=new o.GlslLibRoutine(this.getValueFromSingle(t,i,r.width,r.height,!0),[`shapeUtils.indicesToOffset${a}`,"coordinates.offsetToCoords","fragcolor.getColorAsFloat"])})),e}getValueFromSingle(e,t,n,r,o){let a=`_${e}`;return o&&(a+="_T"),`\n float ${a}(int m[${t}]) {\n int offset = indicesToOffset${a}(m);\n vec2 coords = offsetToCoords(offset, ${n}, ${r});\n float value = getColorAsFloat(${(0,i.getGlsl)(this.context.glContext.version).texture2D}(${e}, coords));\n return value;\n }\n `}getPackedValueFrom(e,t,n,r,o){let a=`_${e}_Pack`;return o&&(a+="_T"),`\n vec4 ${a}(int m[${t}]) {\n int offset = indicesToOffset_${e}(m);\n vec2 coords = offsetToCoords(offset, ${n}, ${r});\n return ${(0,i.getGlsl)(this.context.glContext.version).texture2D}(${e}, coords);\n }\n `}}t.CoordsGlslLib=u},8520:(e,t)=>{"use strict";var n;Object.defineProperty(t,"__esModule",{value:!0}),t.TopologicalSortGlslRoutines=t.GlslLibRoutineNode=t.GlslLibRoutine=t.GlslLib=t.GlslContext=t.FunctionType=void 0,(n=t.FunctionType||(t.FunctionType={}))[n.ValueBased=0]="ValueBased",n[n.Positional=1]="Positional",t.GlslContext=class{constructor(e,t,n,r){this.glContext=e,this.programInfo=t,this.inputTextureLayouts=n,this.outputTextureLayout=r}},t.GlslLib=class{constructor(e){this.context=e}},t.GlslLibRoutine=class{constructor(e,t){this.routineBody=e,this.dependencies=t}},t.GlslLibRoutineNode=class{constructor(e,t,n){this.name=e,this.dependencies=n||[],t&&(this.routineBody=t)}addDependency(e){e&&this.dependencies.push(e)}},t.TopologicalSortGlslRoutines=class{static returnOrderedNodes(e){if(!e||0===e.length)return[];if(1===e.length)return e;const t=new Set,n=new Set,r=new Array;return this.createOrderedNodes(e,t,n,r),r}static createOrderedNodes(e,t,n,r){for(let o=0;o0)for(let e=0;e{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.EncodingGlslLib=void 0;const r=n(8520);class o extends r.GlslLib{constructor(e){super(e)}getFunctions(){return Object.assign(Object.assign({},this.encodeFloat32()),this.decodeFloat32())}getCustomTypes(){return{}}encodeFloat32(){return{encode:new r.GlslLibRoutine("highp vec4 encode(highp float f) {\n return vec4(f, 0.0, 0.0, 0.0);\n }\n ")}}decodeFloat32(){return{decode:new r.GlslLibRoutine("highp float decode(highp vec4 rgba) {\n return rgba.r;\n }\n ")}}encodeUint8(){const e=o.isLittleEndian()?"rgba.rgba=rgba.abgr;":"";return{encode:new r.GlslLibRoutine(`\n highp vec4 encode(highp float f) {\n highp float F = abs(f);\n highp float Sign = step(0.0,-f);\n highp float Exponent = floor(log2(F));\n highp float Mantissa = (exp2(- Exponent) * F);\n Exponent = floor(log2(F) + 127.0) + floor(log2(Mantissa));\n highp vec4 rgba;\n rgba[0] = 128.0 * Sign + floor(Exponent*exp2(-1.0));\n rgba[1] = 128.0 * mod(Exponent,2.0) + mod(floor(Mantissa*128.0),128.0);\n rgba[2] = floor(mod(floor(Mantissa*exp2(23.0 -8.0)),exp2(8.0)));\n rgba[3] = floor(exp2(23.0)*mod(Mantissa,exp2(-15.0)));\n ${e}\n rgba = rgba / 255.0; // values need to be normalized to [0,1]\n return rgba;\n }\n `)}}decodeUint8(){const e=o.isLittleEndian()?"rgba.rgba=rgba.abgr;":"";return{decode:new r.GlslLibRoutine(`\n highp float decode(highp vec4 rgba) {\n rgba = rgba * 255.0; // values need to be de-normalized from [0,1] to [0,255]\n ${e}\n highp float Sign = 1.0 - step(128.0,rgba[0])*2.0;\n highp float Exponent = 2.0 * mod(rgba[0],128.0) + step(128.0,rgba[1]) - 127.0;\n highp float Mantissa = mod(rgba[1],128.0)*65536.0 + rgba[2]*256.0 +rgba[3] + float(0x800000);\n highp float Result = Sign * exp2(Exponent) * (Mantissa * exp2(-23.0 ));\n return Result;\n }\n `)}}static isLittleEndian(){const e=new ArrayBuffer(4),t=new Uint32Array(e),n=new Uint8Array(e);if(t[0]=3735928559,239===n[0])return!0;if(222===n[0])return!1;throw new Error("unknown endianness")}}t.EncodingGlslLib=o},9894:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.FragColorGlslLib=void 0;const r=n(8520),o=n(5060);class i extends r.GlslLib{constructor(e){super(e)}getFunctions(){return Object.assign(Object.assign({},this.setFragColor()),this.getColorAsFloat())}getCustomTypes(){return{}}setFragColor(){const e=(0,o.getGlsl)(this.context.glContext.version);return{setFragColor:new r.GlslLibRoutine(`\n void setFragColor(float value) {\n ${e.output} = encode(value);\n }\n `,["encoding.encode"])}}getColorAsFloat(){return{getColorAsFloat:new r.GlslLibRoutine("\n float getColorAsFloat(vec4 color) {\n return decode(color);\n }\n ",["encoding.decode"])}}}t.FragColorGlslLib=i},2848:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.replaceInlines=void 0;const n=/@inline[\s\n\r]+(\w+)[\s\n\r]+([0-9a-zA-Z_]+)\s*\(([^)]*)\)\s*{(([^}]|[\n\r])*)}/gm;t.replaceInlines=function(e){const t={};let r;for(;null!==(r=n.exec(e));){const e=r[3].split(",").map((e=>{const t=e.trim().split(" ");return t&&2===t.length?{type:t[0],name:t[1]}:null})).filter((e=>null!==e));t[r[2]]={params:e,body:r[4]}}for(const n in t){const o="(\\w+)?\\s+([_0-9a-zA-Z]+)\\s+=\\s+__FUNC__\\((.*)\\)\\s*;".replace("__FUNC__",n),i=new RegExp(o,"gm");for(;null!==(r=i.exec(e));){const o=r[1],i=r[2],a=r[3].split(","),s=o?`${o} ${i};`:"";let u=t[n].body,l="";t[n].params.forEach(((e,t)=>{e&&(l+=`${e.type} ${e.name} = ${a[t]};\n`)})),u=`${l}\n ${u}`,u=u.replace("return",`${i} = `);const c=`\n ${s}\n {\n ${u}\n }\n `;e=e.replace(r[0],c)}}return e.replace(n,"")}},8879:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.GlslPreprocessor=void 0;const r=n(8520),o=n(2848),i=n(5483),a=n(5060);t.GlslPreprocessor=class{constructor(e,t,n,o){this.libs={},this.glslLibRoutineDependencyGraph={},this.context=new r.GlslContext(e,t,n,o),Object.keys(i.glslRegistry).forEach((e=>{const t=new i.glslRegistry[e](this.context);this.libs[e]=t}));const a=this.glslLibRoutineDependencyGraph;for(const e in this.libs){const t=this.libs[e].getFunctions();for(const n in t){const o=e+"."+n;let i;a[o]?(i=a[o],i.routineBody=t[n].routineBody):(i=new r.GlslLibRoutineNode(o,t[n].routineBody),a[o]=i);const s=t[n].dependencies;if(s)for(let e=0;e{const r=n.split(".")[1];-1!==e.indexOf(r)&&t.push(this.glslLibRoutineDependencyGraph[n])})),r.TopologicalSortGlslRoutines.returnOrderedNodes(t)}getUniforms(e,t){const n=[];if(e)for(const t of e)n.push(`uniform sampler2D ${t};`);if(t)for(const e of t)n.push(`uniform ${e.type} ${e.name}${e.arrayLength?`[${e.arrayLength}]`:""};`);return n.join("\n")}}},5483:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.glslRegistry=void 0;const r=n(5107),o=n(7341),i=n(9894),a=n(2655),s=n(3891);t.glslRegistry={encoding:o.EncodingGlslLib,fragcolor:i.FragColorGlslLib,vec:s.VecGlslLib,shapeUtils:a.ShapeUtilsGlslLib,coordinates:r.CoordsGlslLib}},2655:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.ShapeUtilsGlslLib=void 0;const r=n(8520);class o extends r.GlslLib{constructor(e){super(e)}getFunctions(){return Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({},this.bcastIndex()),this.bcastMatmulIndex()),this.offsetToIndices()),this.indicesToOffset()),this.incrementIndices())}getCustomTypes(){return{}}bcastIndex(){const e=this.context.outputTextureLayout.shape.length,t={};return this.context.programInfo.inputNames.forEach(((n,o)=>{const i=this.context.inputTextureLayouts[o].unpackedShape;if(i.length<=e){const o=i.length,a=e-o,s=`bcastIndices_${n}`;let u="";for(let e=0;e{const i=this.context.inputTextureLayouts[o].shape;if(!(i.length<2||i.length>e)){const o=i.length,a=e-o,s=`bcastMatmulIndices_${n}`;let u="";for(let e=0;e{const i=this.context.inputTextureLayouts[n].shape,a=this.context.inputTextureLayouts[n].strides,s=i.length;let u=`indicesToOffset_${t}`;e[u]=new r.GlslLibRoutine(o.indexToOffsetSingle(u,s,a)),u=`indicesToOffset_${t}_T`,e[u]=new r.GlslLibRoutine(o.indexToOffsetSingle(u,s,a.slice().reverse()))})),e}static indexToOffsetSingle(e,t,n){let r="";for(let e=t-1;e>=0;--e)r+=`\n offset += indices[${e}] * ${n[e]};\n `;return`\n int ${e}(int indices[${t}]) {\n int offset = 0;\n ${r}\n return offset;\n }\n `}offsetToIndices(){const e={};return this.context.programInfo.inputNames.forEach(((t,n)=>{const i=this.context.inputTextureLayouts[n].shape,a=this.context.inputTextureLayouts[n].strides,s=i.length;let u=`offsetToIndices_${t}`;e[u]=new r.GlslLibRoutine(o.offsetToIndicesSingle(u,s,a)),u=`offsetToIndices_${t}_T`,e[u]=new r.GlslLibRoutine(o.offsetToIndicesSingle(u,s,a.slice().reverse()))})),e}static offsetToIndicesSingle(e,t,n){const r=[];for(let e=0;e{const o=this.context.inputTextureLayouts[n].shape,i=o.length,a=`incrementIndices_${t}`;let s="";for(let e=0;e= 0; --i) {\n if(i > axis) continue;\n indices[i] += 1;\n if(indices[i] < shape[i]) {\n break;\n }\n indices[i] = 0;\n }\n }\n `;e[a]=new r.GlslLibRoutine(u)})),e}}t.ShapeUtilsGlslLib=o},5060:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getDefaultFragShaderMain=t.getFragShaderPreamble=t.getVertexShaderSource=t.getGlsl=void 0;const n={version:"",attribute:"attribute",varyingVertex:"varying",varyingFrag:"varying",texture2D:"texture2D",output:"gl_FragColor",outputDeclaration:""},r={version:"#version 300 es",attribute:"in",varyingVertex:"out",varyingFrag:"in",texture2D:"texture",output:"outputColor",outputDeclaration:"out vec4 outputColor;"};function o(e){return 1===e?n:r}t.getGlsl=o,t.getVertexShaderSource=function(e){const t=o(e);return`${t.version}\n precision highp float;\n ${t.attribute} vec3 position;\n ${t.attribute} vec2 textureCoord;\n\n ${t.varyingVertex} vec2 TexCoords;\n\n void main()\n {\n gl_Position = vec4(position, 1.0);\n TexCoords = textureCoord;\n }`},t.getFragShaderPreamble=function(e){const t=o(e);return`${t.version}\n precision highp float;\n precision highp int;\n precision highp sampler2D;\n ${t.varyingFrag} vec2 TexCoords;\n ${t.outputDeclaration}\n const vec2 halfCR = vec2(0.5, 0.5);\n\n // Custom vector types to handle higher dimenalities.\n struct ivec5\n {\n int x;\n int y;\n int z;\n int w;\n int u;\n };\n\n struct ivec6\n {\n int x;\n int y;\n int z;\n int w;\n int u;\n int v;\n };\n\n int imod(int x, int y) {\n return x - y * (x / y);\n }\n\n `},t.getDefaultFragShaderMain=function(e,t){return`\n void main() {\n int indices[${t}];\n toVec(TexCoords, indices);\n vec4 result = vec4(process(indices));\n ${o(e).output} = result;\n }\n `}},3891:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.VecGlslLib=void 0;const r=n(8520);class o extends r.GlslLib{constructor(e){super(e)}getCustomTypes(){return{}}getFunctions(){return Object.assign(Object.assign(Object.assign(Object.assign({},this.binaryVecFunctions()),this.copyVec()),this.setVecItem()),this.getVecItem())}binaryVecFunctions(){const e=this.context.outputTextureLayout.shape.length,t={add:"+=",sub:"-=",mul:"*=",div:"/="},n={};for(const o in t){const i=`${o}Vec`;let a="";for(let n=0;n{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.WebGLInferenceHandler=void 0;const r=n(6231),o=n(9162),i=n(2517),a=n(2403),s=n(7019),u=n(8710),l=n(5611),c=n(4057),f=n(2039);t.WebGLInferenceHandler=class{constructor(e){this.session=e,this.packedTextureDataCache=new Map,this.unpackedTextureDataCache=new Map}calculateTextureWidthAndHeight(e,t){return(0,c.calculateTextureWidthAndHeight)(this.session.layoutStrategy,e,t)}executeProgram(e,t){if(t.length{const n=t.map((e=>`${e.unpackedShape.join(",")};${e.width}x${e.height}`)).join("_");let r=e.name;return e.cacheHint&&(r+="["+e.cacheHint+"]"),r+=":"+n,r})(e,n);let o=this.session.programManager.getArtifact(r);const i=o?o.programInfo:"function"==typeof e.get?e.get():e,a=(0,c.createTextureLayoutFromTextureType)(this.session.layoutStrategy,i.output.dims,i.output.textureType),s=this.createTextureData(a,i.output.type);return o||(o=this.session.programManager.build(i,n,s),this.session.programManager.setArtifact(r,o)),this.runProgram(o,n,s),s}run(e,t){return this.executeProgram(e,t).tensor}runProgram(e,t,n){for(let n=0;nthis.readTexture(a)),(async e=>this.readTextureAsync(a)),void 0,i),texture:n});return this.setTextureData(a.tensor.dataId,a,e.isPacked),a}getTextureData(e,t=!1){return this.session.isInitializer(e)?this.session.getTextureData(e,t):t?this.packedTextureDataCache.get(e):this.unpackedTextureDataCache.get(e)}setTextureData(e,t,n=!1){this.session.isInitializer(e)?this.session.setTextureData(e,t,n):(n?this.packedTextureDataCache:this.unpackedTextureDataCache).set(e,t)}isTextureLayoutCached(e,t=!1){return!!this.getTextureData(e.dataId,t)}dispose(){this.session.textureManager.clearActiveTextures(),this.packedTextureDataCache.forEach((e=>this.session.textureManager.releaseTexture(e))),this.packedTextureDataCache=new Map,this.unpackedTextureDataCache.forEach((e=>this.session.textureManager.releaseTexture(e))),this.unpackedTextureDataCache=new Map}readTexture(e){return e.isPacked?this.readTexture(this.unpack(e)):this.session.backend.glContext.isFloat32DownloadSupported?this.session.textureManager.readTexture(e,e.tensor.type,e.channels):this.session.textureManager.readUint8TextureAsFloat((0,u.encodeAsUint8)(this,e))}async readTextureAsync(e){return e.isPacked?this.readTextureAsync(this.unpack(e)):this.session.backend.glContext.isFloat32DownloadSupported?this.session.textureManager.readTextureAsync(e,e.tensor.type,e.channels):this.session.textureManager.readUint8TextureAsFloat((0,u.encodeAsUint8)(this,e))}pack(e){return this.executeProgram((0,a.createPackProgramInfoLoader)(this,e.tensor),[e.tensor])}unpack(e){return this.executeProgram((0,l.createUnpackProgramInfoLoader)(this,e.tensor),[e.tensor])}}},1640:function(e,t,n){"use strict";var r=this&&this.__createBinding||(Object.create?function(e,t,n,r){void 0===r&&(r=n);var o=Object.getOwnPropertyDescriptor(t,n);o&&!("get"in o?!t.__esModule:o.writable||o.configurable)||(o={enumerable:!0,get:function(){return t[n]}}),Object.defineProperty(e,r,o)}:function(e,t,n,r){void 0===r&&(r=n),e[r]=t[n]}),o=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)"default"!==n&&Object.prototype.hasOwnProperty.call(e,n)&&r(t,e,n);return o(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.WEBGL_OP_RESOLVE_RULES=void 0;const a=n(2898),s=i(n(7839)),u=n(4196),l=n(2069),c=n(8138),f=n(9663),d=n(5193),p=n(7992),h=n(1253),g=n(4776),m=n(6572),b=n(3346),y=n(5623),A=n(2870),v=n(2143),w=n(4939),_=n(718),x=n(2268),T=n(8117),E=n(2278),S=n(5524),O=n(5975),k=n(3933),I=n(6558),P=n(5723),C=n(3738),D=i(n(4909)),N=n(8428),B=n(9793);t.WEBGL_OP_RESOLVE_RULES=[["Abs","","6+",D.abs],["Acos","","7+",D.acos],["Add","","7+",s.add],["And","","7+",s.and],["Asin","","7+",D.asin],["Atan","","7+",D.atan],["AveragePool","","7+",v.averagePool,v.parseAveragePoolAttributes],["BatchNormalization","","7+",a.batchNormalization,a.parseBatchNormalizationAttributes],["Cast","","6+",u.cast,u.parseCastAttributes],["Ceil","","6+",D.ceil],["Clip","","6-10",D.clip,D.parseClipAttributes],["Clip","","11+",D.clipV11],["Concat","","4+",l.concat,l.parseConcatAttributes],["Conv","","1+",c.conv,c.parseConvAttributes],["ConvTranspose","","1+",f.convTranspose,f.parseConvTransposeAttributes],["Cos","","7+",D.cos],["Div","","7+",s.div],["Dropout","","7+",D.identity],["DepthToSpace","","1+",d.depthToSpace,d.parseDepthToSpaceAttributes],["Equal","","7+",s.equal],["Elu","","6+",D.elu,D.parseEluAttributes],["Exp","","6+",D.exp],["Flatten","","1+",p.flatten,p.parseFlattenAttributes],["Floor","","6+",D.floor],["FusedConv","com.microsoft","1+",c.conv,c.parseConvAttributes],["Gather","","1+",h.gather,h.parseGatherAttributes],["Gemm","","7-10",g.gemm,g.parseGemmAttributesV7],["Gemm","","11+",g.gemm,g.parseGemmAttributesV11],["GlobalAveragePool","","1+",v.globalAveragePool,v.parseGlobalAveragePoolAttributes],["GlobalMaxPool","","1+",v.globalMaxPool],["Greater","","7+",s.greater],["Identity","","1+",D.identity],["ImageScaler","","1+",m.imageScaler,m.parseImageScalerAttributes],["InstanceNormalization","","6+",b.instanceNormalization,b.parseInstanceNormalizationAttributes],["LeakyRelu","","6+",D.leakyRelu,D.parseLeakyReluAttributes],["Less","","7+",s.less],["Log","","6+",D.log],["MatMul","","1+",y.matMul,y.parseMatMulAttributes],["MaxPool","","1+",v.maxPool,v.parseMaxPoolAttributes],["Mul","","7+",s.mul],["Neg","","6+",D.neg],["Not","","1+",D.not],["Or","","7+",s.or],["Pad","","2-10",A.padV2,A.parsePadAttributesV2],["Pad","","11+",A.padV11,A.parsePadAttributesV11],["Pow","","7+",s.pow],["PRelu","","7+",s.pRelu],["ReduceLogSum","","1+",w.reduceLogSum,w.parseReduceAttributes],["ReduceMax","","1+",w.reduceMax,w.parseReduceAttributes],["ReduceMean","","1+",w.reduceMean,w.parseReduceAttributes],["ReduceMin","","1+",w.reduceMin,w.parseReduceAttributes],["ReduceProd","","1+",w.reduceProd,w.parseReduceAttributes],["ReduceSum","","1-12",w.reduceSum,w.parseReduceAttributes],["ReduceSumSquare","","1+",w.reduceLogSumSquare,w.parseReduceAttributes],["Relu","","6+",D.relu],["Reshape","","5+",_.reshape],["Resize","","10",x.resize,x.parseResizeAttributesV10],["Resize","","11+",x.resize,x.parseResizeAttributesV11],["Shape","","1+",T.shape],["Sigmoid","","6+",D.sigmoid],["Sin","","7+",D.sin],["Slice","","10+",E.sliceV10],["Slice","","1-9",E.slice,E.parseSliceAttributes],["Softmax","","1-12",S.softmax,S.parseSoftmaxAttributes],["Softmax","","13+",S.softmaxV13,S.parseSoftmaxAttributesV13],["Split","","2-12",O.split,O.parseSplitAttributes],["Sqrt","","6+",D.sqrt],["Squeeze","","1-12",k.squeeze,k.parseSqueezeAttributes],["Squeeze","","13+",k.squeezeV13],["Sub","","7+",s.sub],["Sum","","6+",I.sum],["Tan","","7+",D.tan],["Tanh","","6+",D.tanh],["Tile","","6+",P.tile],["Transpose","","1+",C.transpose,C.parseTransposeAttributes],["Upsample","","7-8",B.upsample,B.parseUpsampleAttributesV7],["Upsample","","9",B.upsample,B.parseUpsampleAttributesV9],["Unsqueeze","","1-12",N.unsqueeze,N.parseUnsqueezeAttributes],["Unsqueeze","","13+",N.unsqueezeV13],["Xor","","7+",s.xor]]},2898:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseBatchNormalizationAttributes=t.batchNormalization=void 0;const r=n(246),o=n(5060),i=n(2039),a={name:"BatchNormalization",inputNames:["A","Scale","B","Mean","Variance"],inputTypes:[i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked]};t.batchNormalization=(e,t,n)=>(u(t),[e.run(Object.assign(Object.assign({},a),{cacheHint:n.cacheKey,get:()=>s(e,t,n)}),t)]),t.parseBatchNormalizationAttributes=e=>{const t=e.attributes.getFloat("epsilon",1e-5),n=e.attributes.getFloat("momentum",.9),o=e.attributes.getInt("spatial",1);return(0,r.createAttributeWithCacheKey)({epsilon:t,momentum:n,spatial:o})};const s=(e,t,n)=>{const r=(0,o.getGlsl)(e.session.backend.glContext.version),s=t[0].dims.length,[u,l]=e.calculateTextureWidthAndHeight(t[1].dims,i.TextureType.unpacked),c=`\n float process(int[${s}] indices) {\n vec2 position = offsetToCoords(indices[1], ${u}, ${l});\n float scale = getColorAsFloat(${r.texture2D}(Scale, position));\n float mean = getColorAsFloat(${r.texture2D}(Mean, position));\n float variance = getColorAsFloat(${r.texture2D}(Variance, position));\n float b = getColorAsFloat(${r.texture2D}(B, position));\n\n return scale * ( (_A(indices) - mean) / sqrt(variance + float(${n.epsilon})) ) + b;\n }`;return Object.assign(Object.assign({},a),{output:{dims:t[0].dims,type:t[0].type,textureType:i.TextureType.unpacked},shaderSource:c})},u=e=>{if(!e||5!==e.length)throw new Error("BatchNormalization requires 5 inputs.");const t=e[0],n=e[1],r=e[2],o=e[3],i=e[4];if(t.dims.length<3||1!==n.dims.length||1!==r.dims.length||1!==o.dims.length||1!==i.dims.length)throw new Error("invalid input shape.");if(n.dims[0]!==t.dims[1]||r.dims[0]!==t.dims[1]||o.dims[0]!==t.dims[1]||i.dims[0]!==t.dims[1])throw new Error("invalid input shape.");if("float32"!==t.type&&"float64"!==t.type||"float32"!==n.type&&"float64"!==n.type||"float32"!==r.type&&"float64"!==r.type||"float32"!==o.type&&"float64"!==o.type||"float32"!==i.type&&"float64"!==i.type)throw new Error("invalid input tensor types.")}},7839:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.xor=t.sub=t.pRelu=t.pow=t.or=t.mul=t.less=t.greater=t.equal=t.div=t.and=t.add=t.glslPRelu=t.glslPow=t.glslXor=t.glslOr=t.glslAnd=t.glslLess=t.glslGreater=t.glslEqual=t.glslSub=t.glslMul=t.glslDiv=t.glslAdd=void 0;const r=n(2517),o=n(8520),i=n(5060),a=n(2039);function s(){const e="add_";return{body:`\n float ${e}(float a, float b) {\n return a + b;\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return v1 + v2;\n }\n `,name:e,type:o.FunctionType.ValueBased}}function u(){const e="div_";return{body:`\n float ${e}(float a, float b) {\n return a / b;\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return v1 / v2;\n }\n `,name:e,type:o.FunctionType.ValueBased}}function l(){const e="mul_";return{body:`\n float ${e}(float a, float b) {\n return a * b;\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return v1 * v2;\n }\n `,name:e,type:o.FunctionType.ValueBased}}function c(){const e="sub_";return{body:`\n float ${e}(float a, float b) {\n return a - b;\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return v1 - v2;\n }\n `,name:e,type:o.FunctionType.ValueBased}}function f(){const e="equal_";return{body:`\n float ${e}(float a, float b) {\n return float(a == b);\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return vec4(equal(v1, v2));\n }\n `,name:e,type:o.FunctionType.ValueBased}}function d(){const e="greater_";return{body:`\n float ${e}(float a, float b) {\n return float(a > b);\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return vec4( v1.r > v2.r ,\n v1.g > v2.g,\n v1.b > v2.b,\n v1.a > v2.a );\n }\n `,name:e,type:o.FunctionType.ValueBased}}function p(){const e="less_";return{body:`\n float ${e}(float a, float b) {\n return float(a < b);\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return vec4( v1.r < v2.r ,\n v1.g < v2.g,\n v1.b < v2.b,\n v1.a < v2.a );\n }\n `,name:e,type:o.FunctionType.ValueBased}}function h(){const e="and_";return{body:`\n float ${e}(float a, float b) {\n return float( bool(a) && bool(b) );\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n bvec4 b1 = bvec4(v1);\n bvec4 b2 = bvec4(v2);\n return vec4( b1.r && b2.r ,\n b1.g && b2.g,\n b1.b && b2.b,\n b1.a && b2.a );\n }\n `,name:e,type:o.FunctionType.ValueBased}}function g(){const e="or_";return{body:`\n float ${e}(float a, float b) {\n return float( bool(a) || bool(b) );\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n bvec4 b1 = bvec4(v1);\n bvec4 b2 = bvec4(v2);\n return vec4( b1.r || b2.r ,\n b1.g || b2.g,\n b1.b || b2.b,\n b1.a || b2.a );\n }\n `,name:e,type:o.FunctionType.ValueBased}}function m(){const e="xor_";return{body:`\n float ${e}(float a, float b) {\n return float( bool(a) ^^ bool(b) );\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n bvec4 b1 = bvec4(v1);\n bvec4 b2 = bvec4(v2);\n return vec4( b1.r ^^ b2.r ,\n b1.g ^^ b2.g,\n b1.b ^^ b2.b,\n b1.a ^^ b2.a );\n }\n `,name:e,type:o.FunctionType.ValueBased}}function b(){return function(e){const t=`${e}_`;return{body:`\n float ${t}(float a, float b) {\n return ${e}(a, b);\n }\n vec4 ${t}(vec4 v1, vec4 v2) {\n return ${e}(v1, v2);\n }\n `,name:t,type:o.FunctionType.ValueBased}}("pow")}function y(){const e="prelu_";return{body:`\n float ${e}(float a, float b) {\n return a < 0.0 ? a * b: a;\n }\n vec4 ${e}(vec4 v1, vec4 v2) {\n return vec4(\n v1.r < 0.0 ? v1.r * v2.r: v1.r,\n v1.g < 0.0 ? v1.g * v2.g: v1.g,\n v1.b < 0.0 ? v1.b * v2.b: v1.b,\n v1.a < 0.0 ? v1.a * v2.a: v1.a\n );\n }\n `,name:e,type:o.FunctionType.ValueBased}}t.glslAdd=s,t.glslDiv=u,t.glslMul=l,t.glslSub=c,t.glslEqual=f,t.glslGreater=d,t.glslLess=p,t.glslAnd=h,t.glslOr=g,t.glslXor=m,t.glslPow=b,t.glslPRelu=y;const A=(e,t,n,r=t[0].type,o)=>{const i=e.session.pack?a.TextureType.packed:a.TextureType.unpacked;return{name:n.name,inputNames:["A","B"],inputTypes:[i,i],cacheHint:o,get:()=>v(e,t,n,r)}},v=(e,t,n,o=t[0].type)=>{const s=e.session.pack?a.TextureType.packed:a.TextureType.unpacked,u=!r.ShapeUtil.areEqual(t[0].dims,t[1].dims);let l=t[0].dims;const c=e.session.pack;if(u){const a=r.BroadcastUtil.calcShape(t[0].dims,t[1].dims,!1);if(!a)throw new Error("Can't perform binary op on the given tensors");l=a;const u=l.length,f=0!==t[0].dims.length?t[0].dims.length:1,d=0!==t[1].dims.length?t[1].dims.length:1,p=0!==t[0].dims.length?"bcastIndices_A(indices, aindices);":"aindices[0] = 0;",h=0!==t[1].dims.length?"bcastIndices_B(indices, bindices);":"bindices[0] = 0;",g=(0,i.getGlsl)(e.session.backend.glContext.version),m=c?`\n ${n.body}\n void main() {\n vec4 a = getAAtOutCoords();\n vec4 b = getBAtOutCoords();\n vec4 result = ${n.name}(a, b);\n ${g.output} = result;\n }`:`\n ${n.body}\n float process(int indices[${u}]) {\n int aindices[${f}];\n int bindices[${d}];\n ${p}\n ${h}\n return ${n.name}(_A(aindices), _B(bindices));\n }`;return{name:n.name,inputNames:["A","B"],inputTypes:[s,s],output:{dims:l,type:o,textureType:s},shaderSource:m,hasMain:c}}const f=(0,i.getGlsl)(e.session.backend.glContext.version),d=`\n ${n.body}\n void main() {\n vec4 v1 = ${f.texture2D}(A, TexCoords);\n vec4 v2 = ${f.texture2D}(B, TexCoords);\n vec4 result = ${n.name}(v1, v2);\n ${f.output} = result;\n }\n `;return{name:n.name,inputNames:["A","B"],inputTypes:[s,s],output:{dims:t[0].dims,type:o,textureType:s},shaderSource:d,hasMain:!0}};t.add=(e,t)=>[e.run(A(e,t,s()),t)],t.and=(e,t)=>[e.run(A(e,t,h(),"bool"),t)],t.div=(e,t)=>[e.run(A(e,t,u()),t)],t.equal=(e,t)=>[e.run(A(e,t,f(),"bool"),t)],t.greater=(e,t)=>[e.run(A(e,t,d(),"bool"),t)],t.less=(e,t)=>[e.run(A(e,t,p(),"bool"),t)],t.mul=(e,t)=>[e.run(A(e,t,l()),t)],t.or=(e,t)=>[e.run(A(e,t,g(),"bool"),t)],t.pow=(e,t)=>[e.run(A(e,t,b()),t)],t.pRelu=(e,t)=>[e.run(A(e,t,y()),t)],t.sub=(e,t)=>[e.run(A(e,t,c()),t)],t.xor=(e,t)=>[e.run(A(e,t,m(),"bool"),t)]},4196:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseCastAttributes=t.cast=void 0;const r=n(2517);t.cast=(e,t,n)=>(o(t),[e.cast(t[0],n)]),t.parseCastAttributes=e=>r.ProtoUtil.tensorDataTypeFromProto(e.attributes.getInt("to"));const o=e=>{if(!e||1!==e.length)throw new Error("Cast requires 1 input.");if("string"===e[0].type)throw new Error("Invalid input type.")}},1163:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createPackedConcatProgramInfoLoader=void 0;const r=n(5060),o=n(2039),i=n(9390),a=n(2827);t.createPackedConcatProgramInfoLoader=(e,t,n)=>{const u=(l=t.length,c=n.cacheKey,{name:"Concat (packed)",inputNames:Array.from({length:l},((e,t)=>`X${t}`)),inputTypes:Array(l).fill(o.TextureType.packed),cacheHint:c});var l,c;return Object.assign(Object.assign({},u),{get:()=>((e,t,n,u)=>{const l=n[0].dims.slice();if(u>=l.length||u<-1*l.length)throw new Error("axis specified for concat doesn't match input dimensionality");u<0&&(u=l.length+u);const c=l.slice(0);for(let e=1;ee.dims)),m=(0,i.getGlChannels)(f),b=new Array(g.length-1);b[0]=g[0][u];for(let e=1;e= ${b[e-1]}) {\n return getChannel(\n getX${e}(${s(m,y,t)}),\n vec2(${s(A,y,t)}));\n }`}const _=b.length,x=b[b.length-1];w+=`\n return getChannel(\n getX${_}(${s(m,y,x)}),\n vec2(${s(A,y,x)}));`;const T=(0,r.getGlsl)(e.session.backend.glContext.version),E=`\n ${h}\n float getValue(${m.map((e=>"int "+e))}) {\n ${w}\n }\n\n void main() {\n ${p} coords = getOutputCoords();\n int lastDim = coords.${m[f-1]};\n coords.${m[f-1]} = coords.${m[f-2]};\n coords.${m[f-2]} = lastDim;\n\n vec4 result = vec4(getValue(${d}), 0., 0., 0.);\n\n ${d[f-1]} = ${d[f-1]} + 1;\n if (${d[f-1]} < ${c[f-1]}) {\n result.g = getValue(${d});\n }\n\n ${d[f-2]} = ${d[f-2]} + 1;\n if (${d[f-2]} < ${c[f-2]}) {\n result.a = getValue(${d});\n }\n\n ${d[f-1]} = ${d[f-1]} - 1;\n if (${d[f-2]} < ${c[f-2]} &&\n ${d[f-1]} < ${c[f-1]}) {\n result.b = getValue(${d});\n }\n ${T.output} = result;\n }\n `;return Object.assign(Object.assign({},t),{output:{dims:c,type:n[0].type,textureType:o.TextureType.packed},shaderSource:E,hasMain:!0})})(e,u,t,n.axis)})};const s=(e,t,n)=>{const r=e.indexOf(t);return e.map(((e,t)=>t===r?`${e} - ${n}`:e)).join()}},2069:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseConcatAttributes=t.concat=void 0;const r=n(246),o=n(2039),i=n(1163);t.concat=(e,t,n)=>(f(t),e.session.pack&&t[0].dims.length>1?[e.run((0,i.createPackedConcatProgramInfoLoader)(e,t,n),t)]:[e.run(a(e,t,n),t)]);const a=(e,t,n)=>{const r=(i=t.length,a=n.cacheKey,{name:"Concat",inputNames:Array.from({length:i},((e,t)=>`X${t}`)),inputTypes:Array(i).fill(o.TextureType.unpacked),cacheHint:a});var i,a;return Object.assign(Object.assign({},r),{get:()=>((e,t,n,r)=>{const i=n[0].dims.slice();if(r>=i.length||r<-1*i.length)throw new Error("axis specified for concat doesn't match input dimensionality");r<0&&(r=i.length+r);const a=i.slice(0);for(let e=1;e`int getTextureWhereDataResides(int index) {\n ${e.map(((e,t)=>`if(index<${e}) {return ${t};}\n`)).join("")}\n }`,u=e=>s(e),l=(e,t)=>{const n=[`float fetchDataFromCorrectTexture(int textureIndex, int indices[${t}]) {`];for(let t=0;t{const t=["int getSizeInConcatAxisValueFromIndex(int index) {"];for(let n=0;n(0,r.createAttributeWithCacheKey)({axis:e.attributes.getInt("axis")});const f=e=>{if(!e||e.length<1)throw new Error("too few inputs");const t=e[0].type,n=e[0].dims.length;if("string"===t)throw new Error("string tensor is not supported yet");for(const r of e){if(r.type!==t)throw new Error("input tensors should be one type");if(r.dims.length!==n)throw new Error("input tensors should have the same shape")}}},4770:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createUnpackedGroupedConvProgramInfoLoader=void 0;const r=n(6231),o=n(5060),i=n(2039),a=n(8138),s=n(2823);t.createUnpackedGroupedConvProgramInfoLoader=(e,t,n)=>{const u=(l=t.length>2,c=n.cacheKey,{name:"GroupedConv",inputNames:l?["X","W","Bias"]:["X","W"],inputTypes:l?[i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked]:[i.TextureType.unpacked,i.TextureType.unpacked],cacheHint:c});var l,c;return Object.assign(Object.assign({},u),{get:()=>((e,t,n,u)=>{const l=t.length>2?"value += getBias(output_channel);":"",c=t[0].dims.slice(),f=t[1].dims.slice(),d=f[0]/u.group;r.Logger.verbose("GroupedConv",`autpPad:${u.autoPad}, dilations:${u.dilations}, group:${u.group}, kernelShape:${u.kernelShape}, pads:${u.pads}, strides:${u.strides}`);const p=(0,a.calculateOutputShape)(c,f,u.dilations,u.pads,u.strides),h=(0,o.getGlsl)(e.session.backend.glContext.version),{activationFunction:g,applyActivation:m}=(0,s.getActivationSnippet)(u),b=`\n const ivec2 strides = ivec2(${u.strides[0]}, ${u.strides[1]});\n const ivec2 pads = ivec2(${u.pads[0]}, ${u.pads[1]});\n ${g}\n void main() {\n ivec4 coords = getOutputCoords();\n int batch = coords.x;\n int output_channel = coords.y;\n ivec2 xRCCorner = coords.zw * strides - pads;\n int group_id = output_channel / ${d};\n\n float value = 0.0;\n for (int wInChannel = 0; wInChannel < ${f[1]}; wInChannel++) {\n int input_channel = group_id * ${f[1]} + wInChannel;\n for (int wHeight = 0; wHeight < ${f[2]}; wHeight++) {\n int xHeight = xRCCorner.x + wHeight * ${u.dilations[0]};\n\n if (xHeight < 0 || xHeight >= ${c[2]}) {\n continue;\n }\n\n for (int wWidth = 0; wWidth < ${f[3]}; wWidth++) {\n int xWidth = xRCCorner.y + wWidth * ${u.dilations[1]};\n if (xWidth < 0 || xWidth >= ${c[3]}) {\n continue;\n }\n\n float xVal = getX(batch, input_channel, xWidth, xHeight);\n float wVal = getW(output_channel, wInChannel, wWidth, wHeight);\n value += xVal*wVal;\n }\n }\n }\n ${l}\n ${m}\n ${h.output} = vec4(value, .0, .0, .0);\n }\n`;return Object.assign(Object.assign({},n),{output:{dims:p,type:t[0].type,textureType:i.TextureType.unpacked},shaderSource:b,hasMain:!0})})(e,t,u,n)})}},1386:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.conv2DPacked=t.conv2DPackedPointwise=void 0;const r=n(8138),o=n(8555),i=n(708);t.conv2DPackedPointwise=(e,t,n)=>{const o=t[0].dims,a=t[1].dims,s=(0,r.calculateOutputShape)(o,a,n.dilations,n.pads,n.strides),u=e.reshapePacked(t[0],[o[1],o[2]*o[3]]),l=e.reshapePacked(t[1],[a[0],a[1]]),c=t.length>2?[l,u,t[2]]:[l,u],f=e.run((0,i.createPackedMatmulProgramInfoLoader)(e,c,n),c);return e.reshapePacked(f,s)},t.conv2DPacked=(e,t,n)=>{const a=t[0].dims,s=t[1].dims,u=(0,r.calculateOutputShape)(a,s,n.dilations,n.pads,n.strides),l=e.run((0,o.createPackedIm2ColProgramInfoLoader)(e,t[0],t[1],u,n),[t[0]]),c=e.reshapePacked(t[1],[s[0],s[1]*s[2]*s[3]]),f=3===t.length?[c,l,t[2]]:[c,l],d=e.run((0,i.createPackedMatmulProgramInfoLoader)(e,f,n),f);return e.reshapePacked(d,u)}},9663:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseConvTransposeAttributes=t.convTranspose=void 0;const r=n(246),o=n(5060),i=n(2039),a=n(2823),s=(e,t,n,r,o,i)=>(e-1)*t+n+(r-1)*o+1-i,u=(e,t,n,r,o)=>{const i=Math.floor(e/2);"SAME_UPPER"===t?(n[r]=i,n[o]=e-i):"SAME_LOWER"===t&&(n[r]=e-i,n[o]=i)};t.convTranspose=(e,t,n)=>(d(t,n),l(e,t,n));const l=(e,t,n)=>{const r=f(n,t);return[c(e,t,r)]},c=(e,t,n)=>e.run(((e,t,n)=>{const r=(s=t.length>2,u=n.cacheKey,{name:"ConvTranspose",inputNames:s?["X","W","B"]:["X","W"],inputTypes:s?[i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked]:[i.TextureType.unpacked,i.TextureType.unpacked],cacheHint:u});var s,u;return Object.assign(Object.assign({},r),{get:()=>((e,t,n,r)=>{const s=t.length>2?"getB(output_channel)":"0.0",u=t[0].dims,l=t[1].dims,c=l[1],f=l[0]/r.group,d=[t[0].dims[0],t[1].dims[1]*r.group,...r.outputShape],p=(0,o.getGlsl)(e.session.backend.glContext.version),{activationFunction:h,applyActivation:g}=(0,a.getActivationSnippet)(r),m=`\n const ivec2 strides = ivec2(${r.strides[0]}, ${r.strides[1]});\n const ivec2 pads = ivec2(${r.pads[0]}, ${r.pads[1]});\n ${h}\n void main() {\n ivec4 coords = getOutputCoords();\n int batch = coords.x;\n int output_channel = coords.y;\n\n ivec2 loc = coords.zw + pads;\n\n int group_id = output_channel / ${c};\n int wOutChannel = output_channel - group_id * ${c};\n\n float value = ${s};\n for (int inChannelOffset = 0; inChannelOffset < ${f}; inChannelOffset++) {\n int input_channel = group_id * ${f} + inChannelOffset;\n for (int wWOff = 0; wWOff < ${l[2]}; wWOff++) {\n for (int wHOff = 0; wHOff < ${l[3]}; wHOff++) {\n ivec2 wOff = ivec2(wWOff * ${r.dilations[0]}, wHOff * ${r.dilations[1]});\n ivec2 wLoc = loc - wOff;\n ivec2 wLocIn = wLoc / strides;\n if (\n wLocIn * strides == wLoc &&\n wLocIn.x >= 0 && wLocIn.x < ${u[2]} &&\n wLocIn.y >= 0 && wLocIn.y < ${u[3]}\n ) {\n float xVal = getX(batch, input_channel, wLocIn.y, wLocIn.x);\n float wVal = getW(input_channel, wOutChannel, wHOff, wWOff);\n value += xVal * wVal;\n }\n }\n }\n }\n ${g}\n ${p.output} = vec4(value, .0, .0, .0);\n }\n`;return Object.assign(Object.assign({},n),{output:{dims:d,type:t[0].type,textureType:i.TextureType.unpacked},shaderSource:m,hasMain:!0})})(e,t,r,n)})})(e,t,n),t),f=(e,t)=>{const n=e.kernelShape.slice();if(0===e.kernelShape.length)for(let e=2;e{const c=e.length-2,f=0===l.length;for(let d=0;d{const t=e.attributes,n=(0,a.parseInternalActivationAttributes)(t),o=t.getString("auto_pad","NOTSET"),i=t.getInts("dilations",[1,1]),s=t.getInt("group",1),u=t.getInts("kernel_shape",[]),l=t.getInts("output_padding",[0,0]),c=t.getInts("output_shape",[]),f=t.getInts("pads",[0,0,0,0]),d=t.getInts("strides",[1,1]);return(0,r.createAttributeWithCacheKey)(Object.assign({autoPad:o,dilations:i,group:s,kernelShape:u,outputPadding:l,outputShape:c,pads:f,strides:d},n))};const d=(e,t)=>{if(!e||2!==e.length&&3!==e.length)throw new Error("Conv requires 2 or 3 inputs");if(4!==e[0].dims.length||4!==e[1].dims.length)throw new Error("currently only support 2-dimensional conv");if(e[0].dims[1]!==e[1].dims[0])throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");const n=e[1].dims[1]*t.group;if(3===e.length&&(1!==e[2].dims.length||e[2].dims[0]!==n))throw new Error("invalid bias");const r=e[0].dims.length-2;if(t.dilations.length!==r)throw new Error(`dilations should be ${r}D`);if(t.strides.length!==r)throw new Error(`strides should be ${r}D`);if(t.pads.length!==2*r)throw new Error(`pads should be ${2*r}D`);if(t.outputPadding.length!==r)throw new Error(`output_padding should be ${r}D`);if(0!==t.kernelShape.length&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(0!==t.outputShape.length&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape");if("float32"!==e[0].type||"float32"!==e[1].type)throw new Error("ConvTranspose input(X,W) should be float tensor");if(3===e.length&&"float32"!==e[2].type)throw new Error("ConvTranspose input(bias) should be float tensor")}},8138:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseConvAttributes=t.conv=t.calculateOutputShape=void 0;const r=n(246),o=n(2517),i=n(4770),a=n(1386),s=n(9828),u=n(2823),l=n(3248),c=n(5623);t.calculateOutputShape=(e,t,n,r,o)=>{const i=e[0],a=e.slice(2),s=a.length,u=t[0],l=t.slice(2).map(((e,t)=>e+(e-1)*(n[t]-1))),c=a.map(((e,t)=>e+r[t]+r[t+s])).map(((e,t)=>Math.floor((e-l[t]+o[t])/o[t])));return[i,u].concat(...c)},t.conv=(e,t,n)=>(g(t,n),f(e,t,n));const f=(e,t,n)=>{const r=h(n,t),o=e.session.pack,s=1===r.kernelShape[0]&&1===r.kernelShape[1];return r.group>1?[e.run((0,i.createUnpackedGroupedConvProgramInfoLoader)(e,t,r),t)]:s&&o?[d(e,t,r)]:o&&4===t[0].dims.length&&1===t[0].dims[0]&&!s?[(0,a.conv2DPacked)(e,t,r)]:[p(e,t,r)]},d=(e,n,r)=>{const o=n[0].dims,i=n[1].dims,a=(0,t.calculateOutputShape)(o,i,r.dilations,r.pads,r.strides),s=e.reshapeUnpacked(n[0],[o[1],o[2]*o[3]]),u=e.reshapeUnpacked(n[1],[i[0],i[1]]),l=n.length>2?[u,s,n[2]]:[u,s],f=e.run((0,c.createMatmulProgramInfoLoader)(l,r),l);return e.reshapeUnpacked(f,a)},p=(e,n,r)=>{const o=n[0].dims,i=n[1].dims,a=(0,t.calculateOutputShape)(o,i,r.dilations,r.pads,r.strides),u=e.run((0,l.createIm2ColProgramInfoLoader)(e,n[0],n[1],a,r),[n[0]]),c=3===n.length?[u,n[1],n[2]]:[u,n[1]];return e.run((0,s.createDotProductProgramInfoLoader)(e,n,a,r),c)},h=(e,t)=>{const n=e.kernelShape.slice();if(0===e.kernelShape.length)for(let e=2;e{const t=e.attributes,n=(0,u.parseInternalActivationAttributes)(t),o=t.getString("auto_pad","NOTSET"),i=t.getInts("dilations",[1,1]),a=t.getInt("group",1),s=t.getInts("kernel_shape",[]),l=t.getInts("pads",[0,0,0,0]),c=t.getInts("strides",[1,1]);return(0,r.createAttributeWithCacheKey)(Object.assign({autoPad:o,dilations:i,group:a,kernelShape:s,pads:l,strides:c},n))};const g=(e,t)=>{if(!e||2!==e.length&&3!==e.length)throw new Error("Conv requires 2 or 3 inputs");if(4!==e[0].dims.length||4!==e[1].dims.length)throw new Error("currently only support 2-dimensional conv");if(e[0].dims[1]!==e[1].dims[1]*t.group)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(3===e.length&&(1!==e[2].dims.length||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");const n=e[0].dims.length-2;if(t.dilations.length!==n)throw new Error(`dilations should be ${n}D`);if(t.strides.length!==n)throw new Error(`strides should be ${n}D`);if(t.pads.length!==2*n)throw new Error(`pads should be ${2*n}D`);if(0!==t.kernelShape.length&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if("float32"!==e[0].type||"float32"!==e[1].type)throw new Error("Conv input(X,W) should be float tensor");if(3===e.length&&"float32"!==e[2].type)throw new Error("Conv input(bias) should be float tensor")}},5193:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseDepthToSpaceAttributes=t.depthToSpace=void 0;const r=n(3738);t.depthToSpace=(e,t,n)=>{o(t);const i=n.blocksize,a=i*i,s="DCR"===n.mode?[0,3,4,1,5,2]:[0,1,4,2,5,3],u="DCR"===n.mode?[t[0].dims[0],i,i,t[0].dims[1]/a,t[0].dims[2],t[0].dims[3]]:[t[0].dims[0],t[0].dims[1]/a,i,i,t[0].dims[2],t[0].dims[3]],l=e.reshapeUnpacked(t[0],u),c={perm:s,cacheKey:`${s}`},[f]=(0,r.transpose)(e,[l],c),d=[t[0].dims[0],t[0].dims[1]/a,t[0].dims[2]*i,t[0].dims[3]*i];return[e.reshapeUnpacked(f,d)]},t.parseDepthToSpaceAttributes=e=>{const t=e.attributes.getInt("blocksize");if(t<1)throw new Error(`blocksize must be >= 1, but got : ${t} for DepthToSpace`);const n=e.attributes.getString("mode","DCR");if("DCR"!==n&&"CRD"!==n)throw new Error(`unrecognized mode: ${n} for DepthToSpace`);return{mode:n,blocksize:t}};const o=e=>{if(1!==e.length)throw new Error(`DepthToSpace expect 1 inputs, but got ${e.length}`);if("string"===e[0].type||4!==e[0].dims.length)throw new TypeError("DepthToSpace input should be a 4-D numeric tensor")}},9828:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createDotProductProgramInfoLoader=void 0;const r=n(2517),o=n(5060),i=n(2039),a=n(2823),s=n(3248);t.createDotProductProgramInfoLoader=(e,t,n,u)=>{const l=((e,t)=>({name:"ConvDotProduct",inputNames:e?["Im2Col","K","B"]:["Im2Col","K"],inputTypes:e?[i.TextureType.unpacked,i.TextureType.packedLastDimension,i.TextureType.unpacked]:[i.TextureType.unpacked,i.TextureType.packedLastDimension],cacheKey:t.activationCacheKey}))(t.length>2,u);return Object.assign(Object.assign({},l),{get:()=>((e,t,n,u,l)=>{const c=n[0].dims,f=n[1].dims,d=[f[0],Math.ceil(c[1]*f[2]*f[3]/4)],p=(0,s.calculateIm2ColDims)(c,f,u),[h,g]=e.calculateTextureWidthAndHeight(d,i.TextureType.packedLastDimension),m=r.ShapeUtil.computeStrides(p),[b,y]=e.calculateTextureWidthAndHeight(p,i.TextureType.packedLastDimension),A=u.length,v=n.length<3?"0.0":"_B(b)",w=Math.ceil(c[1]*f[2]*f[3]/4),{activationFunction:_,applyActivation:x}=(0,a.getActivationSnippet)(l),T=(0,o.getGlsl)(e.session.backend.glContext.version),E=`\n${_}\nfloat process(int indices[${A}]) {\n int b[1];\n b[0] = indices[1];\n int im2col[4];\n im2col[0] = indices[0];\n im2col[1] = indices[2];\n im2col[2] = indices[3];\n int im2colOffset = im2col[0] * ${m[0]} + im2col[1] * ${m[1]} + im2col[2] * ${m[2]};\n int kernelOffset = indices[1] * ${d[1]};\n float value = ${v};\n for (int i = 0; i < ${w}; ++i) {\n vec2 im2colCoords = offsetToCoords(im2colOffset, ${b}, ${y});\n vec2 kernelCoords = offsetToCoords(kernelOffset, ${h}, ${g});\n value += dot(${T.texture2D}(Im2Col, im2colCoords), ${T.texture2D}(K, kernelCoords));\n ++im2colOffset;\n ++kernelOffset;\n }\n ${x}\n return value;\n}`;return Object.assign(Object.assign({},t),{output:{dims:u,type:n[0].type,textureType:i.TextureType.unpacked},shaderSource:E})})(e,l,t,n,u)})}},7992:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseFlattenAttributes=t.flatten=void 0;const r=n(2517);t.flatten=(e,t,n)=>{o(t,n);const i=r.ShapeUtil.flattenShape(t[0].dims,n);return[e.reshapeUnpacked(t[0],i)]},t.parseFlattenAttributes=e=>e.attributes.getInt("axis",1);const o=(e,t)=>{if(!e||1!==e.length)throw new Error("Flatten requires 1 input.");const n=e[0].dims.length;if(0===n)throw new Error("scalar tensor is not supported.");if(t<-n||t>n)throw new Error("Invalid axis");if("string"===e[0].type)throw new Error("string tensor is not supported.")}},2823:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseInternalActivationAttributes=t.getActivationSnippet=void 0;const r=n(2517),o=n(4909);t.getActivationSnippet=function(e){let t;switch(e.activation){case"Relu":t=(0,o.glslRelu)();break;case"Sigmoid":t=(0,o.glslSigmoid)();break;case"Clip":t=(0,o.glslClip)(e.clipMin,e.clipMax);break;default:return{activationFunction:"",applyActivation:""}}const n=t.name;return{activationFunction:t.body,applyActivation:`value = ${n}_(value);`}},t.parseInternalActivationAttributes=e=>{const t=e.getString("activation","");if("Clip"===t){const[n,o]=e.getFloats("activation_params",[r.MIN_CLIP,r.MAX_CLIP]);return{activation:t,clipMax:o,clipMin:n,activationCacheKey:`${t}:${n},${o}`}}return{activation:t,activationCacheKey:t}}},1253:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseGatherAttributes=t.gather=void 0;const r=n(246),o=n(782),i=n(2517),a=n(2039);t.gather=(e,t,n)=>(l(t,n.axis),[e.run(u(e,t,n),t)]),t.parseGatherAttributes=e=>(0,r.createAttributeWithCacheKey)({axis:e.attributes.getInt("axis",0)});const s={name:"Gather",inputNames:["A","B"],inputTypes:[a.TextureType.unpacked,a.TextureType.unpacked]},u=(e,t,n)=>{const r=Object.assign(Object.assign({},s),{cacheHint:n.cacheKey});return Object.assign(Object.assign({},r),{get:()=>((e,t,n,r)=>{const o=n[0].dims.slice(),s=n[1].dims.slice(),u=new Array(o.length+s.length-1);r=i.ShapeUtil.normalizeAxis(r,o.length);const l=[];for(let e=0;e{if(!e||2!==e.length)throw new Error("Gather requires 2 inputs.");const n=e[0].dims.length;if(n<1)throw new Error("Invalid input shape.");if(t<-n||t>n-1)throw new Error("Invalid axis.");if(-1===o.NUMBER_TYPES.indexOf(e[0].type))throw new Error("Invaid input type.");if("int32"!==e[1].type&&"int16"!==e[1].type)throw new Error("Invaid input type.")}},4776:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseGemmAttributesV11=t.parseGemmAttributesV7=t.gemm=void 0;const r=n(246),o=n(2517),i=n(2039);t.gemm=(e,t,n)=>(l(t,n),[e.run(s(t,n),t)]);const a=(e,t)=>{const n=0!==e.attributes.getInt("transA",0),o=0!==e.attributes.getInt("transB",0),i=e.attributes.getFloat("alpha",1),a=e.attributes.getFloat("beta",1);return(0,r.createAttributeWithCacheKey)({transA:n,transB:o,alpha:i,beta:a,isOptionalC:t})};t.parseGemmAttributesV7=e=>a(e,!1),t.parseGemmAttributesV11=e=>a(e,!0);const s=(e,t)=>{const n={name:"Gemm",inputNames:3===e.length?["A","B","C"]:["A","B"],inputTypes:3===e.length?[i.TextureType.unpacked,i.TextureType.unpacked,i.TextureType.unpacked]:[i.TextureType.unpacked,i.TextureType.unpacked],key:t.cacheKey};return Object.assign(Object.assign({},n),{get:()=>u(n,e,t)})},u=(e,t,n)=>{const r=t[0].dims.slice(),a=t[1].dims.slice(),[s,u]=o.GemmUtil.getShapeOfGemmResult(r,n.transA,a,n.transB,3===t.length?t[2].dims:void 0),l=[s,u];if(!l)throw new Error("Can't use gemm on the given tensors");let c=r[r.length-1],f="";n.transA&&(c=r[0]),n.transA&&n.transB?f="value += _A_T(a) * _B_T(b);":n.transA&&!n.transB?f="value += _A_T(a) * _B(b);":!n.transA&&n.transB?f="value += _A(a) * _B_T(b);":n.transA||n.transB||(f="value += _A(a) * _B(b);");const d=l.length,p=`\n float process(int indices[${d}]) {\n int a[${d}];\n int b[${d}];\n ${3===t.length?`int c[${t[2].dims.length}];`:""}\n\n copyVec(indices, a);\n copyVec(indices, b);\n ${3===t.length?"bcastIndices_C(indices, c);":""}\n\n float value = 0.0;\n for (int k=0; k<${c}; ++k) {\n a[${d-1}] = k;\n b[${d-2}] = k;\n ${f}\n }\n\n value = value * alpha;\n ${3===t.length?"value += beta * _C(c);":""}\n return value;\n }`;return Object.assign(Object.assign({},e),{output:{dims:l,type:t[0].type,textureType:i.TextureType.unpacked},variables:[{name:"alpha",type:"float",data:n.alpha},{name:"beta",type:"float",data:n.beta}],shaderSource:p})},l=(e,t)=>{if(!e)throw new Error("Input is missing");if(t.isOptionalC&&(e.length<2||e.length>3))throw new Error("Invaid input shape.");if(!t.isOptionalC&&3!==e.length)throw new Error("Gemm requires 3 inputs");if(3===e.length&&1!==e[2].dims.length&&2!==e[2].dims.length)throw new Error("Invalid input shape of C");if("float32"!==e[0].type&&"float64"!==e[0].type||"float32"!==e[1].type&&"float64"!==e[1].type||3===e.length&&"float32"!==e[2].type&&"float64"!==e[2].type)throw new Error("Invalid input type.");if(e[0].type!==e[1].type||3===e.length&&e[0].type!==e[2].type)throw new Error("Input types are mismatched")}},8555:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createPackedIm2ColProgramInfoLoader=void 0;const r=n(5060),o=n(2039),i=n(2827);t.createPackedIm2ColProgramInfoLoader=(e,t,n,a,s)=>{const u=(l=s.cacheKey,{name:"Im2Col (packed)",inputNames:["A"],inputTypes:[o.TextureType.packed],cacheHint:l});var l;return Object.assign(Object.assign({},u),{get:()=>((e,t,n,a,s,u)=>{const l=n.dims,c=a.dims,f=s.length,d=[c[1]*c[2]*c[3],s[2]*s[3]],p=c[2]*c[3],h=(0,i.unpackFromChannel)(),g=(0,r.getGlsl)(e.session.backend.glContext.version);let m="";for(let e=0;e<=1;e++)for(let t=0;t<=1;t++)m+=`\n blockIndex = rc.x + ${t};\n pos = rc.y + ${e};\n\n if(blockIndex < ${d[1]} && pos < ${d[0]}) {\n offsetY = int(blockIndex / (${s[f-1]})) * ${u.strides[0]} -\n ${u.pads[0]};\n d0 = offsetY + ${u.dilations[0]} * (imod(pos, ${p}) / ${c[2]});\n\n if(d0 < ${l[2]} && d0 >= 0) {\n offsetX = imod(blockIndex, ${s[f-1]}) * ${u.strides[1]} -\n ${u.pads[1]};\n d1 = offsetX + ${u.dilations[1]} * imod(imod(pos, ${p}), ${c[2]});\n\n if(d1 < ${l[3]} && d1 >= 0) {\n\n ch = int(float(pos)/ ${p}.);\n innerDims = vec2(d0, d1);\n result[${2*e+t}] = getChannel(\n getA(0, ch, int(innerDims.x),\n int(innerDims.y)), innerDims);\n }\n }\n }\n\n `;const b=`\n ${h}\n\n void main() {\n ivec2 rc = getOutputCoords();\n vec4 result = vec4(0.0);\n int blockIndex, pos, offsetY, d0, offsetX, d1, ch;\n vec2 innerDims;\n ${m}\n ${g.output} = result;\n }\n `;return Object.assign(Object.assign({},t),{output:{dims:d,type:n.type,textureType:o.TextureType.packed},shaderSource:b,hasMain:!0})})(e,u,t,n,a,s)})}},3248:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.calculateIm2ColDims=t.createIm2ColProgramInfoLoader=void 0;const r=n(2039);t.createIm2ColProgramInfoLoader=(e,n,o,i,a)=>{const s=(u=a.cacheKey,{name:"Im2Col",inputNames:["X"],inputTypes:[r.TextureType.unpacked],cacheHint:u});var u;return Object.assign(Object.assign({},s),{get:()=>((e,n,o,i,a,s)=>{const u=o.dims,l=i.dims,c=a.length,f=(0,t.calculateIm2ColDims)(u,l,a,4),d=`\n const int XC = ${u[1]};\n const int XH = ${u[2]};\n const int XW = ${u[3]};\n const int KH = ${s.kernelShape[0]};\n const int KW = ${s.kernelShape[1]};\n const int dilationH = ${s.dilations[0]};\n const int dilationW = ${s.dilations[1]};\n const int strideH = ${s.strides[0]};\n const int strideW = ${s.strides[1]};\n const int padH = ${s.pads[0]};\n const int padW = ${s.pads[1]};\n const int KHKW = KH*KW;\n const int XCKHKW = XC * KHKW;\n const int outputChannels = 4;\n vec4 process(int indices[${c}]) {\n int b = indices[0]; // batch size\n int oh = indices[1] * strideH - padH; //output height\n int ow = indices[2] * strideW - padW; //output width\n int p = indices[3] * outputChannels; //patch\n vec4 value = vec4(0.0);\n for(int i=0; i < outputChannels; ++i) {\n if(p < XCKHKW) {\n int patchC = p / KHKW;\n int patchH = (p - patchC*KHKW) / KW;\n int patchW = (p - patchC*KHKW) - patchH * KW;\n int xh2 = oh + patchH * dilationH;\n int xw2 = ow + patchW * dilationW;\n int x[${u.length}];\n x[0] = b;\n x[1] = patchC;\n x[2] = xh2;\n x[3] = xw2;\n if(xh2 >= 0 &&\n xh2 < XH &&\n xw2 >= 0 &&\n xw2 < XW) {\n value[i] = _X(x);\n }\n }\n ++p;\n }\n return value;\n }\n `;return Object.assign(Object.assign({},n),{output:{dims:f,type:o.type,textureType:r.TextureType.packedLastDimension},shaderSource:d})})(0,s,n,o,i,a)})},t.calculateIm2ColDims=(e,t,n,r=4)=>[n[0],n[2],n[3],Math.ceil(e[1]*t[2]*t[3]/r)]},6572:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseImageScalerAttributes=t.imageScaler=void 0;const r=n(246),o=n(2039);t.imageScaler=(e,t,n)=>(u(t),[e.run(a(e,t,n),t)]),t.parseImageScalerAttributes=e=>{const t=e.attributes.getFloat("scale"),n=e.attributes.getFloats("bias");return(0,r.createAttributeWithCacheKey)({scale:t,bias:n})};const i={name:"ImageScaler",inputNames:["X"],inputTypes:[o.TextureType.unpacked]},a=(e,t,n)=>{const r=Object.assign(Object.assign({},i),{cacheHint:n.cacheKey});return Object.assign(Object.assign({},r),{get:()=>((e,t,n,r)=>{const i=n[0].dims.slice(),a=i.length,u=`\n ${s(r.bias.length)}\n float process(int indices[${a}]) {\n return _X(indices) * scale + getBias(bias, indices[1]);\n }`;return Object.assign(Object.assign({},t),{output:{dims:i,type:n[0].type,textureType:o.TextureType.unpacked},variables:[{name:"bias",type:"float",arrayLength:r.bias.length,data:r.bias},{name:"scale",type:"float",data:r.scale}],shaderSource:u})})(0,r,t,n)})},s=e=>{const t=[`float getBias(float bias[${e}], int channel) {`];for(let n=0;n{if(!e||1!==e.length)throw new Error("ImageScaler requires 1 input.");if(4!==e[0].dims.length)throw new Error("Invalid input shape.");if("float32"!==e[0].type&&"float64"!==e[0].type)throw new Error("Invalid input type.")}},3346:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseInstanceNormalizationAttributes=t.instanceNormalization=void 0;const r=n(5060),o=n(2039);t.instanceNormalization=(e,t,n)=>{l(t);const r=e.run(a(t[0]),t);return[e.run(u(e,t[0],n,r.dims),[t[0],r,t[1],t[2]])]},t.parseInstanceNormalizationAttributes=e=>e.attributes.getFloat("epsilon",1e-5);const i={name:"InstanceNormalization_MeanAndVariance",inputNames:["X"],inputTypes:[o.TextureType.unpacked]},a=e=>Object.assign(Object.assign({},i),{get:()=>((e,t)=>{const n=t.dims.slice(),r=n[1],i=n[2]*n[3],a=[n[0],r],s=`\n vec4 process(int[2] indices) {\n vec4 v = vec4(0.0);\n int a[4];\n a[0] = indices[0];\n a[1] = indices[1];\n float temp = 0.0;\n for(int a2=0; a2<${n[2]}; a2++) {\n a[2] = a2;\n for(int a3=0; a3<${n[3]}; a3++) {\n a[3] = a3;\n float x = _X(a);\n temp += x;\n }\n }\n float mean = temp / float(${i});\n temp = 0.0;\n for(int a2=0; a2<${n[2]}; a2++) {\n a[2] = a2;\n for(int a3=0; a3<${n[3]}; a3++) {\n a[3] = a3;\n float x = _X(a);\n temp += (x - mean) * (x - mean);\n }\n }\n v.r = mean;\n v.g = temp / float(${i});\n\n return v;\n }`;return Object.assign(Object.assign({},e),{output:{dims:a,type:t.type,textureType:o.TextureType.packedLastDimension},shaderSource:s})})(i,e)}),s={name:"InstanceNormalization_ComputeOutput",inputNames:["X","MeanAndVariance","Scale","B"],inputTypes:[o.TextureType.unpacked,o.TextureType.packedLastDimension,o.TextureType.unpacked,o.TextureType.unpacked]},u=(e,t,n,i)=>{const a=Object.assign(Object.assign({},s),{cacheHint:`${n}`});return Object.assign(Object.assign({},a),{get:()=>((e,t,n,i,a)=>{const s=(0,r.getGlsl)(e.session.backend.glContext.version),[u,l]=e.calculateTextureWidthAndHeight(a,o.TextureType.packedLastDimension),[c,f]=[u/4,l],d=`\n vec4 get_MeanAndVariance(int[2] mv) {\n int offset = indicesToOffset_MeanAndVariance(mv);\n vec2 coords = offsetToCoords(offset, ${c}, ${f});\n return ${s.texture2D}(MeanAndVariance, coords);\n }\n\n float process(int[4] indices) {\n int mv[2];\n mv[0] = indices[0];\n mv[1] = indices[1];\n vec4 mean_and_variance = get_MeanAndVariance(mv);\n float mean = mean_and_variance.r;\n float variance = mean_and_variance.g;\n\n int sb[1];\n sb[0] = indices[1];\n float scale = _Scale(sb);\n float b = _B(sb);\n\n return scale * (_X(indices) - mean) / sqrt(variance + epsilon) + b;\n }`;return Object.assign(Object.assign({},t),{output:{dims:n.dims,type:n.type,textureType:o.TextureType.unpacked},variables:[{name:"epsilon",type:"float",data:i}],shaderSource:d})})(e,a,t,n,i)})},l=e=>{if(!e||3!==e.length)throw new Error("InstanceNormalization requires 3 inputs.");const t=e[0],n=e[1],r=e[2];if(t.dims.length<3||1!==n.dims.length||1!==r.dims.length)throw new Error("Invalid input shape.");if(n.dims[0]!==t.dims[1]||r.dims[0]!==t.dims[1])throw new Error("Input shapes are mismatched.");if("float32"!==t.type&&"float64"!==t.type||"float32"!==n.type&&"float64"!==n.type||"float32"!==r.type&&"float64"!==r.type)throw new Error("Invalid input type.");if(4!==e[0].dims.length)throw new Error("Only support 4-D input shape.")}},708:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createPackedMatmulProgramInfoLoader=void 0;const r=n(2517),o=n(5060),i=n(2039),a=n(9390),s=n(2823),u=n(5623);t.createPackedMatmulProgramInfoLoader=(e,t,n)=>{const l=(c=t.length>2,f=n.activationCacheKey,{name:"MatMul (packed)",inputNames:c?["A","B","Bias"]:["A","B"],inputTypes:c?[i.TextureType.packed,i.TextureType.packed,i.TextureType.packed]:[i.TextureType.packed,i.TextureType.packed],cacheHint:f});var c,f;return Object.assign(Object.assign({},l),{get:()=>((e,t,n,l)=>{const c=n.length>2,f=c?"value += getBiasForMatmul();":"",d=n[0].dims,p=n[1].dims,h=r.BroadcastUtil.calcShape(d,p,!0),g=!r.ShapeUtil.areEqual(n[0].dims,n[1].dims);if(!h)throw new Error("Can't use matmul on the given tensors");const m=d[d.length-1],b=Math.ceil(m/2),y=d.length,A=p.length,v=(0,o.getGlsl)(e.session.backend.glContext.version),w=(0,a.getCoordsDataType)(h.length),_=h.length,x=(0,a.getGlChannels)(),{activationFunction:T,applyActivation:E}=(0,s.getActivationSnippet)(l),S=c?`${(0,u.getBiasForMatmul)(w,x,n[2].dims,h,!0)}`:"",O=g?`${function(e,t,n,o){let i=[],a=[];const s=n[0].dims,u=n[1].dims,l=s.length,c=u.length,f=o.length,d=f-l,p=f-c;i=s.map(((e,n)=>`coords.${t[n+d]}`)),i[l-1]="i*2",i.join(", "),a=u.map(((e,n)=>`coords.${t[n+p]}`)),a[c-2]="i*2",a.join(", ");const h=r.BroadcastUtil.getBroadcastDims(s,o),g=r.BroadcastUtil.getBroadcastDims(u,o),m=h.map((e=>`coords.${t[e+d]} = 0;`)).join("\n"),b=g.map((e=>`coords.${t[e+p]} = 0;`)).join("\n"),y=`int lastDim = coords.${t[f-1]};\n coords.${t[f-1]} = coords.${t[f-2]};\n coords.${t[f-2]} = lastDim;`;return`\nvec4 getAAtOutCoordsMatmul(int i) {\n ${e} coords = getOutputCoords();\n ${y}\n ${m}\n vec4 outputValue = getA(${i});\n return outputValue;\n}\n\nvec4 getBAtOutCoordsMatmul(int i) {\n ${e} coords = getOutputCoords();\n ${y}\n ${b}\n vec4 outputValue = getB(${a});\n return outputValue;\n}`}(w,x,n,h)}`:"",k=g?"getAAtOutCoordsMatmul(i)":`getA(${function(e,t){let n="";for(let r=0;r{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getBiasForMatmul=t.createMatmulProgramInfoLoader=t.parseMatMulAttributes=t.matMul=void 0;const r=n(2517),o=n(2039),i=n(9390),a=n(2823),s=n(708);function u(e,t){const n=(s=e.length>2,u=t.activationCacheKey,{name:"MatMul",inputNames:s?["A","B","Bias"]:["A","B"],inputTypes:s?[o.TextureType.unpacked,o.TextureType.unpacked,o.TextureType.unpacked]:[o.TextureType.unpacked,o.TextureType.unpacked],cacheHint:u});var s,u;return Object.assign(Object.assign({},n),{get:()=>function(e,t,n){const s=t[0].dims,u=t[1].dims,l=r.BroadcastUtil.calcShape(s,u,!0);if(!l)throw new Error("Can't use matmul on the given tensors");const f=(0,i.getCoordsDataType)(l.length),d=(0,i.getGlChannels)(),{activationFunction:p,applyActivation:h}=(0,a.getActivationSnippet)(n),g=t.length>2,m=g?"value += getBiasForMatmul();":"",b=g?`${c(f,d,t[2].dims,l,!1)}`:"",y=l.length,A=s.length,v=u.length,w=`\n ${p}\n ${b}\n float process(int indices[${y}]) {\n int a[${A}];\n int b[${v}];\n bcastMatmulIndices_A(indices, a);\n bcastMatmulIndices_B(indices, b);\n\n float value;\n for (int k=0; k<${s[s.length-1]}; ++k) {\n a[${A-1}] = k;\n b[${v-2}] = k;\n value += _A(a) * _B(b);\n }\n ${m}\n ${h}\n return value;\n }`;return Object.assign(Object.assign({},e),{output:{dims:l,type:t[0].type,textureType:o.TextureType.unpacked},shaderSource:w})}(n,e,t)})}t.matMul=(e,t,n)=>(l(t),e.session.pack?[e.run((0,s.createPackedMatmulProgramInfoLoader)(e,t,n),t)]:[e.run(u(t,n),t)]),t.parseMatMulAttributes=e=>(0,a.parseInternalActivationAttributes)(e.attributes),t.createMatmulProgramInfoLoader=u;const l=e=>{if(!e||2!==e.length)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.");if("float32"!==e[0].type&&"float64"!==e[0].type||"float32"!==e[1].type&&"float64"!==e[1].type)throw new Error("inputs should be float type");if(e[0].type!==e[1].type)throw new Error("inputs types should match")};function c(e,t,n,o,i){let a="";const s=n.length,u=o.length,l=u-s;a=u<2&&s>0?"coords":n.map(((e,n)=>`coords.${t[n+l]}`)).join(", ");const c=r.BroadcastUtil.getBroadcastDims(n,o).map((e=>`coords.${t[e+l]} = 0;`)).join("\n");let f="vec4(outputValue.xx, outputValue.yy)";return 1===r.ShapeUtil.size(n)&&(f="vec4(outputValue.x)"),i?`\nvec4 getBiasForMatmul() {\n ${e} coords = getOutputCoords();\n ${c}\n vec4 outputValue = getBias(${a});\n return ${f};\n}`:`\nfloat getBiasForMatmul() {\n ${e} coords = getOutputCoords();\n ${c}\n return getBias(coords.x);\n}`}t.getBiasForMatmul=c},2403:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createPackProgramInfoLoader=void 0;const r=n(5060),o=n(2039),i=n(9390),a=n(2827),s={name:"pack",inputNames:["A"],inputTypes:[o.TextureType.unpackedReversed]};t.createPackProgramInfoLoader=(e,t)=>Object.assign(Object.assign({},s),{get:()=>((e,t)=>{const n=(0,r.getGlsl)(e.session.backend.glContext.version),u=t.dims,l=u.length,c=t.dims.length,f=(0,i.getCoordsDataType)(c),d=(0,a.getChannels)("rc",c),p=(h=c,g=d,m=u[u.length-2],b=u[u.length-1],0===h||1===h?"":`\n int r = ${g[h-2]};\n int c = ${g[h-1]};\n int rp1 = ${g[h-2]} + 1;\n int cp1 = ${g[h-1]} + 1;\n bool rEdge = rp1 >= ${b};\n bool cEdge = cp1 >= ${m};\n `);var h,g,m,b;let y;y=0===l?[1,1]:1===l?[u[0],1]:[u[c-1],u[c-2]];const A=function(e,t,n){if(0===e)return"false";if(1===e)return`rc > ${t[0]}`;let r="";for(let o=e-2;o= ${t[o-e+2]}`,o= ${e[0]} ? 0. : getA(rc + 1),\n 0, 0`;let r="";if(n>2)for(let e=0;e{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.unpackFromChannel=t.getChannels=t.getVecChannels=void 0;const r=n(9390);function o(e,t){return(0,r.getGlChannels)(t).map((t=>`${e}.${t}`))}t.getVecChannels=o,t.getChannels=function(e,t){return 1===t?[e]:o(e,t)},t.unpackFromChannel=function(){return"\n float getChannel(vec4 frag, int dim) {\n int modCoord = imod(dim, 2);\n return modCoord == 0 ? frag.r : frag.g;\n }\n\n float getChannel(vec4 frag, vec2 innerDims) {\n vec2 modCoord = mod(innerDims, 2.);\n return modCoord.x == 0. ?\n (modCoord.y == 0. ? frag.r : frag.g) :\n (modCoord.y == 0. ? frag.b : frag.a);\n }\n "}},2870:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parsePadAttributesV11=t.padV11=t.parsePadAttributesV2=t.padV2=void 0;const r=n(246),o=n(2517),i=n(5060),a=n(2039),s={name:"Pad",inputNames:["A"],inputTypes:[a.TextureType.unpacked]};t.padV2=(e,t,n)=>(c(t),[e.run(Object.assign(Object.assign({},s),{cacheHint:n.cacheKey,get:()=>l(e,t[0],n)}),t)]),t.parsePadAttributesV2=e=>{const t=e.attributes.getString("mode","constant"),n=e.attributes.getFloat("value",0),o=e.attributes.getInts("pads");return(0,r.createAttributeWithCacheKey)({mode:t,value:n,pads:o})},t.padV11=(e,n,r)=>{f(n);const o=u(e,n,r);return(0,t.padV2)(e,[n[0]],o)},t.parsePadAttributesV11=e=>e.attributes.getString("mode","constant");const u=(e,t,n)=>{if(!e.session.isInitializer(t[1].dataId)||t.length>=3&&!e.session.isInitializer(t[2].dataId))throw new Error("dynamic pad attributes are not allowed");const o=Array.from(t[1].integerData),i=t.length>=3?t[2].floatData[0]:0;return(0,r.createAttributeWithCacheKey)({mode:n,pads:o,value:i})},l=(e,t,n)=>{const r=o.ShapeUtil.padShape(t.dims.slice(),n.pads),i=r.length,s=`\n ${d(e,t,n)}\n float process(int[${i}] indices) {\n return padA(indices);\n }`;return{name:"Pad",inputNames:["A"],inputTypes:[a.TextureType.unpacked],output:{dims:r,type:t.type,textureType:a.TextureType.unpacked},shaderSource:s}},c=e=>{if(!e||1!==e.length)throw new Error("Pad requires 1 input");if("float32"!==e[0].type&&"float64"!==e[0].type)throw new Error("Invalid input type.")},f=e=>{if(!e||2!==e.length&&3!==e.length)throw new Error("Pad requires 2 or 3 inputs");if("int32"!==e[1].type)throw new Error("Invalid input type.");if(e.length>=3&&"string"===e[2].type)throw new Error("Invalid input type.")},d=(e,t,n)=>{const r=(0,i.getGlsl)(e.session.backend.glContext.version),[s,u]=e.calculateTextureWidthAndHeight(t.dims,a.TextureType.unpacked),l=o.ShapeUtil.computeStrides(t.dims);switch(n.mode){case"constant":return p(r,t.dims,l,s,u,n.pads,n.value);case"reflect":return h(r,t.dims,l,s,u,n.pads);case"edge":return g(r,t.dims,l,s,u,n.pads);default:throw new Error("Invalid mode")}},p=(e,t,n,r,o,i,a)=>{const s=t.length;let u="";for(let e=s-1;e>=0;--e)u+=`\n k = m[${e}] - ${i[e]};\n if (k < 0) return constant;\n if (k >= ${t[e]}) return constant;\n offset += k * ${n[e]};\n `;return`\n float padA(int m[${s}]) {\n const float constant = float(${a});\n int offset = 0;\n int k = 0;\n ${u}\n vec2 coords = offsetToCoords(offset, ${r}, ${o});\n float value = getColorAsFloat(${e.texture2D}(A, coords));\n return value;\n }\n `},h=(e,t,n,r,o,i)=>{const a=t.length;let s="";for(let e=a-1;e>=0;--e)s+=`\n k = m[${e}] - ${i[e]};\n if (k < 0) { k = -k; }\n {\n const int _2n_1 = ${2*(t[e]-1)};\n k = int( mod( float(k), float(_2n_1) ) ) ;\n if(k >= ${t[e]}) { k = _2n_1 - k; }\n }\n offset += k * ${n[e]};\n `;return`\n float padA(int m[${a}]) {\n int offset = 0;\n int k = 0;\n ${s}\n vec2 coords = offsetToCoords(offset, ${r}, ${o});\n float value = getColorAsFloat(${e.texture2D}(A, coords));\n return value;\n }\n `},g=(e,t,n,r,o,i)=>{const a=t.length;let s="";for(let e=a-1;e>=0;--e)s+=`\n k = m[${e}] - ${i[e]};\n if (k < 0) k = 0;\n if (k >= ${t[e]}) k = ${t[e]-1};\n offset += k * ${n[e]};\n `;return`\n float padA(int m[${a}]) {\n int offset = 0;\n int k = 0;\n ${s}\n vec2 coords = offsetToCoords(offset, ${r}, ${o});\n float value = getColorAsFloat(${e.texture2D}(A, coords));\n return value;\n }\n `}},2143:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.globalMaxPool=t.parseMaxPoolAttributes=t.maxPool=t.parseGlobalAveragePoolAttributes=t.globalAveragePool=t.parseAveragePoolAttributes=t.averagePool=void 0;const r=n(246),o=n(2517),i=n(2039);t.averagePool=(e,t,n)=>{f(t);const r={name:"AveragePool",inputNames:["X"],inputTypes:[i.TextureType.unpacked],cacheHint:n.cacheKey};return[e.run(Object.assign(Object.assign({},r),{get:()=>a(t,r,!1,n)}),t)]},t.parseAveragePoolAttributes=e=>{const t=e.attributes.getString("auto_pad","NOTSET"),n=e.attributes.getInt("ceil_mode",0),o=0!==e.attributes.getInt("count_include_pad",0),i=e.attributes.getInts("kernel_shape"),a=e.attributes.getInts("strides",[]),s=e.attributes.getInts("pads",[]);if(0!==n)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");return(0,r.createAttributeWithCacheKey)({autoPad:t,ceilMode:n,countIncludePad:o,kernelShape:i,strides:a,pads:s})};const a=(e,t,n,r)=>{const[a,s]=u(e,r,n),l=o.ShapeUtil.size(a.kernelShape);let c="";a.countIncludePad?c+=`value /= float(${l});`:c+=`value /= float(${l} - pad);`;const f=`\n ${d(e[0].dims,a,"value += _X(x);",c,"0.0")}\n `;return Object.assign(Object.assign({},t),{output:{dims:s,type:e[0].type,textureType:i.TextureType.unpacked},shaderSource:f})};t.globalAveragePool=(e,t,n)=>{f(t);const r={name:"GlobalAveragePool",inputNames:["X"],inputTypes:[i.TextureType.unpacked],cacheHint:`${n.countIncludePad}`};return[e.run(Object.assign(Object.assign({},r),{get:()=>a(t,r,!0,n)}),t)]},t.parseGlobalAveragePoolAttributes=e=>{const t=0!==e.attributes.getInt("count_include_pad",0);return(0,r.createAttributeWithCacheKey)({autoPad:"",ceilMode:0,countIncludePad:t,kernelShape:[],strides:[],pads:[]})},t.maxPool=(e,t,n)=>{f(t);const r={name:"MaxPool",inputNames:["X"],inputTypes:[i.TextureType.unpacked],cacheHint:n.cacheKey};return[e.run(Object.assign(Object.assign({},r),{get:()=>s(t,r,!1,n)}),t)]},t.parseMaxPoolAttributes=e=>{const t=e.attributes.getString("auto_pad","NOTSET"),n=e.attributes.getInt("ceil_mode",0),o=e.attributes.getInts("kernel_shape"),i=e.attributes.getInts("strides",[]),a=e.attributes.getInts("pads",[]),s=e.attributes.getInt("storage_order",0),u=e.attributes.getInts("dilations",[]);if(0!==s)throw new Error("column major storage order is not yet supported for MaxPool");if(0!==n)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");return(0,r.createAttributeWithCacheKey)({autoPad:t,ceilMode:n,countIncludePad:!1,kernelShape:o,strides:i,pads:a,storageOrder:s,dilations:u})};const s=(e,t,n,r)=>{const[o,a]=u(e,r,n),s=`\n ${d(e[0].dims,o,"\n value = max(_X(x), value);\n ","","-1e5")}\n `;return Object.assign(Object.assign({},t),{output:{dims:a,type:e[0].type,textureType:i.TextureType.unpacked},shaderSource:s})},u=(e,t,n)=>{const r=e[0].dims.slice(),i=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),s=t.strides.slice(),u=i?t.dilations.slice():[],l=t.pads.slice();o.PoolConvUtil.adjustPoolAttributes(n,r,a,s,u,l);const c=o.PoolConvUtil.computePoolOutputShape(n,r,s,u,a,l,t.autoPad),f=Object.assign({},t);return i?Object.assign(f,{kernelShape:a,strides:s,pads:l,dilations:u,cacheKey:t.cacheKey}):Object.assign(f,{kernelShape:a,strides:s,pads:l,cacheKey:t.cacheKey}),[f,c]},l={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[],cacheKey:""},c={name:"GlobalMaxPool",inputNames:["X"],inputTypes:[i.TextureType.unpacked]};t.globalMaxPool=(e,t)=>(f(t),[e.run(Object.assign(Object.assign({},c),{get:()=>s(t,c,!0,l)}),t)]);const f=e=>{if(!e||1!==e.length)throw new Error("Pool ops requires 1 input.");if("float32"!==e[0].type&&"float64"!==e[0].type)throw new Error("Invalid input type.")},d=(e,t,n,r,i)=>{const a=e.length;if(t.kernelShape.length<=2){const o=t.kernelShape[t.kernelShape.length-1],s=t.strides[t.strides.length-1],u=t.pads[t.pads.length/2-1],l=t.pads[t.pads.length-1],c=e[a-1];let f="",d="",p="";if(f=u+l!==0?`\n for (int i = 0; i < ${o}; i++) {\n x[${a} - 1] = indices[${a} - 1] * ${s} - ${u} + i;\n if (x[${a} - 1] < 0 || x[${a} - 1] >= ${c}) {\n pad++;\n continue;\n }\n ${n}\n }`:`\n for (int i = 0; i < ${o}; i++) {\n x[${a} - 1] = indices[${a} - 1] * ${s} - ${u} + i;\n ${n}\n }`,2===t.kernelShape.length){const n=t.kernelShape[t.kernelShape.length-2],r=t.strides[t.strides.length-2],i=t.pads[t.pads.length/2-2],s=t.pads[t.pads.length-2],u=e[a-2];d=i+s!==0?`\n for (int j = 0; j < ${n}; j++) {\n x[${a} - 2] = indices[${a} - 2] * ${r} - ${i} + j;\n if (x[${a} - 2] < 0 || x[${a} - 2] >= ${u}) {\n pad+= ${o};\n continue;\n }\n `:`\n for (int j = 0; j < ${n}; j++) {\n x[${a} - 2] = indices[${a} - 2] * ${r} - ${i} + j;\n `,p="\n }\n "}return`\n float process(int indices[${a}]) {\n int x[${a}];\n copyVec(indices, x);\n\n float value = ${i};\n int pad = 0;\n ${d}\n ${f}\n ${p}\n ${r}\n return value;\n }\n `}{const s=o.ShapeUtil.size(t.kernelShape),u=o.ShapeUtil.computeStrides(t.kernelShape),l=u.length,c=t.pads.length,f=h(l),d=p(e,"inputDims"),g=p(t.pads,"pads"),m=p(u,"kernelStrides"),b=p(t.strides,"strides");let y="";return y=t.pads.reduce(((e,t)=>e+t))?`\n if (x[j] >= inputDims[j] || x[j] < 0) {\n pad++;\n isPad = true;\n break;\n }\n }\n if (!isPad) {\n ${n}\n }`:`\n }\n ${n}\n `,`\n ${f}\n float process(int indices[${a}]) {\n int x[${a}];\n copyVec(indices, x);\n int offset[${l}];\n int pads[${c}];\n int inputDims[${a}];\n int kernelStrides[${l}];\n int strides[${l}];\n ${g}\n ${d}\n ${b}\n ${m}\n\n float value = ${i};\n int pad = 0;\n bool isPad = false;\n for (int i = 0; i < ${s}; i++) {\n offsetToIndices(i, kernelStrides, offset);\n isPad = false;\n for (int j = ${a} - ${l}; j < ${a}; j++) {\n x[j] = indices[j] * strides[j - ${a} + ${l}]\n + offset[j - ${a} + ${l}] - pads[j - 2];\n ${y}\n }\n ${r}\n\n return value;\n }\n `}},p=(e,t)=>{let n="";for(let r=0;r`\n void offsetToIndices(int offset, int[${e}] strides, out int[${e}] indices) {\n if (${e} == 0) {\n return;\n }\n for (int i = 0; i < ${e} - 1; ++i) {\n indices[i] = offset / strides[i];\n offset -= indices[i] * strides[i];\n }\n indices[${e} - 1] = offset;\n }`},4939:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.reduceLogSumSquare=t.reduceLogSum=t.reduceProd=t.reduceMin=t.reduceMax=t.reduceMean=t.reduceSum=t.parseReduceAttributes=void 0;const r=n(246),o=n(782),i=n(2517),a=n(2039),s=(e,t,n,r,o)=>{l(t);const i={name:r,inputNames:["A"],inputTypes:[a.TextureType.unpacked]};return[e.run(Object.assign(Object.assign({},i),{cacheHint:n.cacheKey,get:()=>u(e,t,n,r,o,i)}),t)]};t.parseReduceAttributes=e=>{const t=e.attributes.getInts("axes",[]),n=1===e.attributes.getInt("keepdims",1);return(0,r.createAttributeWithCacheKey)({axes:t,keepDims:n})};const u=(e,t,n,r,o,s)=>{const u=[],l=t[0].dims.length||1,c=[],f=i.ShapeUtil.normalizeAxes(n.axes,t[0].dims.length),d=o(t,f);let p=d[1];for(let e=0;e=0||0===f.length?(n.keepDims&&u.push(1),p=`\n for(int j${e} = 0; j${e} < ${t[0].dims[e]}; j${e}++) {\n inputIdx[${e}] = j${e};\n ${p}\n }`):(c.push(`inputIdx[${e}] = outputIdx[${u.length}];`),u.push(t[0].dims[e]));const h=`\n float process(int outputIdx[${u.length||1}]) {\n float value; // final result\n int inputIdx[${l}]; // addressing input data\n ${c.join("\n")}\n ${d[0]} // init ops for reduce max/min\n ${p}\n ${d[2]} // final computation for reduce mean\n return value;\n }`;return Object.assign(Object.assign({},s),{output:{dims:u,type:t[0].type,textureType:a.TextureType.unpacked},shaderSource:h})},l=e=>{if(!e||1!==e.length)throw new Error("Reduce op requires 1 input.");if(-1===o.NUMBER_TYPES.indexOf(e[0].type))throw new Error("Invalid input type.")};t.reduceSum=(e,t,n)=>s(e,t,n,"ReduceSum",(()=>["value = 0.0;","value += _A(inputIdx);",""])),t.reduceMean=(e,t,n)=>s(e,t,n,"ReduceMean",((e,t)=>{let n=1;for(let r=0;r=0||0===t.length)&&(n*=e[0].dims[r]);return["value = 0.0;","value += _A(inputIdx);",`value /= ${n}.;`]})),t.reduceMax=(e,t,n)=>s(e,t,n,"ReduceMax",((e,t)=>{const n=[];for(let r=0;r=0||0===t.length)&&n.push(`inputIdx[${r}] = 0;`);return[`${n.join("\n")}\nvalue = _A(inputIdx);`,"value = max(value, _A(inputIdx));",""]})),t.reduceMin=(e,t,n)=>s(e,t,n,"ReduceMin",((e,t)=>{const n=[];for(let r=0;r=0||0===t.length)&&n.push(`inputIdx[${r}] = 0;`);return[`${n.join("\n")}\nvalue = _A(inputIdx);`,"value = min(value, _A(inputIdx));",""]})),t.reduceProd=(e,t,n)=>s(e,t,n,"ReduceProd",(()=>["value = 1.0;","value *= _A(inputIdx);",""])),t.reduceLogSum=(e,t,n)=>s(e,t,n,"ReduceLogSum",(()=>["value = 0.0;","value += _A(inputIdx);","value = log(value);"])),t.reduceLogSumSquare=(e,t,n)=>s(e,t,n,"ReduceLogSumSquare",(()=>["float t; value = 0.0;","t = _A(inputIdx); value += t * t;",""]))},7019:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.isReshapeCheap=t.processDims3D=t.createPackedReshape3DProgramInfoLoader=void 0;const r=n(2517),o=n(5060),i=n(2039),a=n(2827);t.createPackedReshape3DProgramInfoLoader=(e,t,n)=>{const s=(e=>({name:"Reshape (packed)",inputTypes:[i.TextureType.packed],inputNames:["A"],cacheHint:`${e}`}))(n);return Object.assign(Object.assign({},s),{get:()=>((e,t,n,s)=>{const u=t.dims,l=s;let c="";for(let e=0;e<4;e++){let t="";switch(e){case 0:t="outputCoords = rc;";break;case 1:t="outputCoords = ivec3(rc.x, rc.y+1, rc.z);";break;case 2:t="outputCoords = ivec3(rc.x, rc.y, rc.z+1);";break;case 3:t="outputCoords = ivec3(rc.x, rc.y+1, rc.z+1);";break;default:throw new Error}c+=`\n ${t}\n ${e>0?"if(outputCoords.y < rows && outputCoords.z < cols){":""}\n int flattenedIndex = getFlattenedIndex(outputCoords);\n\n ivec3 inputRC = inputCoordsFromReshapedOutCoords(flattenedIndex);\n vec2 innerDims = vec2(float(inputRC.y),float(inputRC.z));\n\n result[${e}] = getChannel(getA(inputRC.x, inputRC.y, inputRC.z), innerDims);\n\n ${e>0?"}":""}\n `}const f=(0,o.getGlsl)(e.session.backend.glContext.version),d=`\n ${function(e){const t=r.ShapeUtil.computeStrides(e),n=["b","r","c"],o="index";return`\n ivec3 inputCoordsFromReshapedOutCoords(int index) {\n ${t.map(((e,r)=>`int ${n[r]} = ${o} / ${e}; ${r===t.length-1?`int ${n[r+1]} = ${o} - ${n[r]} * ${e}`:`index -= ${n[r]} * ${e}`};`)).join("")}\n return ivec3(b, r, c);\n }\n `}(u)}\n ${function(e){const t=r.ShapeUtil.computeStrides(e);return`\n int getFlattenedIndex(ivec3 coords) {\n // reverse y, z order\n return coords.x * ${t[0]} + coords.z * ${t[1]} + coords.y;\n }\n`}(l)}\n ${(0,a.unpackFromChannel)()}\n\n void main() {\n ivec3 rc = getOutputCoords();\n\n vec4 result = vec4(0.0);\n\n ivec3 outputCoords;\n int rows = ${l[2]};\n int cols = ${l[1]};\n\n ${c}\n ${f.output} = result;\n }\n `;return Object.assign(Object.assign({},n),{output:{dims:l,type:t.type,textureType:i.TextureType.packed},shaderSource:d,hasMain:!0})})(e,t,s,n)})},t.processDims3D=function(e){if(0===e.length)return[1,1,1];let t=1;for(let n=0;n1?e[e.length-2]:1,e[e.length-1]]},t.isReshapeCheap=function(e,t){let n=!1;return n=0===e.length||0===t.length||(e.length<2||t.length<2?e[e.length-1]===t[t.length-1]:e[e.length-1]===t[t.length-1]&&e[e.length-2]===t[t.length-2]),n}},718:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.reshape=void 0;const r=n(2517);t.reshape=(e,t)=>{const n=r.ShapeUtil.calculateReshapedDims(t[0].dims,t[1].integerData);return e.session.pack?[e.reshapePacked(t[0],n)]:[e.reshapeUnpacked(t[0],n)]}},2268:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseResizeAttributesV11=t.parseResizeAttributesV10=t.resize=void 0;const r=n(5060),o=n(2039),i=n(9390),a=n(2827),s=n(9793),u={name:"Resize",inputNames:["A"],inputTypes:[o.TextureType.packed]};t.resize=(e,t,n)=>((0,s.validateInputs)(t,n),[e.run(Object.assign(Object.assign({},u),{cacheHint:n.cacheKey,get:()=>l(e,t,n)}),t)]),t.parseResizeAttributesV10=e=>(0,s.parseUpsampleAttributes)(e,10),t.parseResizeAttributesV11=e=>(0,s.parseUpsampleAttributes)(e,11);const l=(e,t,n)=>{const s=(0,r.getGlsl)(e.session.backend.glContext.version),[l,f]=c(t,n);if(l.every((e=>1===e))&&"tf_crop_and_resize"!==n.coordinateTransformMode)return Object.assign(Object.assign({},u),{output:{dims:f,type:t[0].type,textureType:o.TextureType.packed},hasMain:!0,shaderSource:`void main() {\n vec4 v = ${s.texture2D}(X, TexCoords);\n ${s.output} = v;\n }`});const d=f.length;if(d<2)throw new Error(`output dimension should be at least 2, but got ${d}`);const p=f[d-2],h=f[d-1],g=t[0].dims;if(d!==g.length)throw new Error(`output dimension should match input ${g.length}, but got ${d}`);const m=g[d-2],b=g[d-1],y=l[d-2],A=l[d-1];let v="";if("linear"!==n.mode)throw new Error(`resize (packed) does not support mode: '${n.mode}'`);switch(n.coordinateTransformMode){case"asymmetric":v="\n vec4 getSourceFracIndex(ivec4 coords) {\n return vec4(coords) / scaleWHWH;\n }\n ";break;case"half_pixel":v="\n vec4 getSourceFracIndex(ivec4 coords) {\n return (vec4(coords) + 0.5) / scaleWHWH - 0.5;\n }\n ";break;case"pytorch_half_pixel":v=`\n vec4 getSourceFracIndex(ivec4 coords) {\n vec4 fcoords = vec4(coords);\n return vec4(\n ${h}.0 > 1.0 ? (fcoords.x + 0.5) / scaleWHWH.x - 0.5 : 0.0,\n ${p}.0 > 1.0 ? (fcoords.y + 0.5) / scaleWHWH.y - 0.5 : 0.0,\n ${h}.0 > 1.0 ? (fcoords.z + 0.5) / scaleWHWH.z - 0.5 : 0.0,\n ${p}.0 > 1.0 ? (fcoords.w + 0.5) / scaleWHWH.w - 0.5 : 0.0\n );\n }\n `;break;case"align_corners":v=`\n vec4 getSourceFracIndex(ivec4 coords) {\n vec4 resized = vec4(${h}.0 - 1.0, ${p}.0 - 1.0, ${h}.0 - 1.0,\n ${p}.0 - 1.0);\n vec4 original = vec4(${b}.0 - 1.0, ${m}.0 - 1.0, ${b}.0 - 1.0,\n ${m}.0 - 1.0);\n vec4 new_scale = original / resized;\n return vec4(coords) * new_scale;\n }\n `;break;default:throw new Error(`resize (packed) does not support coordinateTransformMode: '${n.coordinateTransformMode}'`)}const w=(0,i.getCoordsDataType)(d),_=`\n const vec2 inputWH = vec2(${m}.0, ${b}.0);\n const vec4 scaleWHWH = vec4(float(${y}), float(${A}), float(${y}), float(${A}));\n ${(0,a.unpackFromChannel)()}\n ${v}\n float getAValue(int x10, int r, int c, int d) {\n return getChannel(getA(x10, r, c, d), vec2(c, d));\n }\n void main() {\n ${w} rc = getOutputCoords();\n\n int batch = rc[0];\n int depth = rc[1];\n\n // retrieve the 4 coordinates that is used in the 4 packed output values.\n ivec4 coords = ivec4(rc.wz, rc.w + 1, rc.z + 1);\n\n // calculate the source index in fraction\n vec4 sourceFrac = getSourceFracIndex(coords);\n\n // get the lower and upper bound of the 4 values that will be packed into one texel.\n ivec4 x00 = ivec4(max(sourceFrac.xy, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.xy)));\n ivec4 x01 = ivec4(max(sourceFrac.xw, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.xw)));\n ivec4 x10 = ivec4(max(sourceFrac.zy, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.zy)));\n ivec4 x11 = ivec4(max(sourceFrac.zw, vec2(0.0)), min(inputWH - 1.0, ceil(sourceFrac.zw)));\n\n bool hasNextRow = rc.w < ${p-1};\n bool hasNextCol = rc.z < ${h-1};\n\n // pack x00, x01, x10, x11's top-left corner into one vec4 structure\n vec4 topLeft = vec4(\n getAValue(batch, depth, x00.x, x00.y),\n hasNextCol ? getAValue(batch, depth, x01.x, x01.y) : 0.0,\n hasNextRow ? getAValue(batch, depth, x10.x, x10.y) : 0.0,\n (hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.x, x11.y) : 0.0);\n\n // pack x00, x01, x10, x11's top-right corner into one vec4 structure\n vec4 topRight = vec4(\n getAValue(batch, depth, x00.x, x00.w),\n hasNextCol ? getAValue(batch, depth, x01.x, x01.w) : 0.0,\n hasNextRow ? getAValue(batch, depth, x10.x, x10.w) : 0.0,\n (hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.x, x11.w) : 0.0);\n\n // pack x00, x01, x10, x11's bottom-left corner into one vec4 structure\n vec4 bottomLeft = vec4(\n getAValue(batch, depth, x00.z, x00.y),\n hasNextCol ? getAValue(batch, depth, x01.z, x01.y) : 0.0,\n hasNextRow ? getAValue(batch, depth, x10.z, x10.y) : 0.0,\n (hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.z, x11.y) : 0.0);\n\n // pack x00, x01, x10, x11's bottom-right corner into one vec4 structure\n vec4 bottomRight = vec4(\n getAValue(batch, depth, x00.z, x00.w),\n hasNextCol ? getAValue(batch, depth, x01.z, x01.w) : 0.0,\n hasNextRow ? getAValue(batch, depth, x10.z, x10.w) : 0.0,\n (hasNextRow && hasNextCol) ? getAValue(batch, depth, x11.z, x11.w) : 0.0);\n\n // calculate the interpolation fraction on u and v direction\n vec4 frac = vec4(sourceFrac) - floor(sourceFrac);\n vec4 clampFrac = clamp(frac, vec4(0.0), vec4(1.0));\n\n vec4 top = mix(topLeft, topRight, clampFrac.ywyw);\n vec4 bottom = mix(bottomLeft, bottomRight, clampFrac.ywyw);\n vec4 newValue = mix(top, bottom, clampFrac.xxzz);\n\n ${s.output} = vec4(newValue);\n }\n `;return Object.assign(Object.assign({},u),{output:{dims:f,type:t[0].type,textureType:o.TextureType.packed},hasMain:!0,shaderSource:_})},c=(e,t)=>{const n=e[0].dims;let r,o=t.scales;if(0===o.length){const i=e[t.scalesInputIdx];if(i&&0!==i.size){if(e[t.sizesInputIdx])throw new Error("Only one of scales or sizes must be provided as input.");o=f(i,t.mode,t.isResize)}else{const i=e[t.sizesInputIdx];if(!i||0===i.size)throw new Error("Either scales or sizes MUST be provided as input.");r=Array.from(i.integerData),o=d(r,n,t.mode,t.isResize)}}else if(e[t.sizesInputIdx])throw new Error("Only one of scales or sizes must be provided as input.");const i=r||n.map(((e,t)=>Math.floor(e*o[t])));return[o,i]},f=(e,t,n)=>{const r=Array.from(e.floatData);return(0,s.scalesValidation)(r,t,n),r},d=(e,t,n,r)=>{const o=t.length,i=new Array(o);for(let n=0,r=o;n{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.shape=void 0;const r=n(9162);t.shape=(e,t)=>(o(t),[new r.Tensor([t[0].dims.length],"int32",void 0,void 0,new Int32Array(t[0].dims))]);const o=e=>{if(!e||1!==e.length)throw new Error("Shape requires 1 input.")}},2278:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.sliceV10=t.parseSliceAttributes=t.slice=void 0;const r=n(246),o=n(782),i=n(2517),a=n(2039),s={name:"Slice",inputNames:["A"],inputTypes:[a.TextureType.unpacked]};t.slice=(e,t,n)=>(l(t),[e.run(Object.assign(Object.assign({},s),{cacheHint:n.cacheKey,get:()=>u(e,t[0],n)}),t)]),t.parseSliceAttributes=e=>{const t=e.attributes.getInts("starts"),n=e.attributes.getInts("ends"),o=e.attributes.getInts("axes",[]);return(0,r.createAttributeWithCacheKey)({starts:t,ends:n,axes:o})};const u=(e,t,n)=>{const r=0===n.axes.length?t.dims.slice(0).map(((e,t)=>t)):n.axes,o=i.ShapeUtil.normalizeAxes(r,t.dims.length),u=n.starts.map(((e,n)=>e>t.dims[o[n]]-1?t.dims[o[n]]:i.ShapeUtil.normalizeAxis(e,t.dims[o[n]]))),l=n.ends.map(((e,n)=>e>t.dims[o[n]]-1?t.dims[o[n]]:i.ShapeUtil.normalizeAxis(e,t.dims[o[n]]))),c=t.dims.slice(),f=[];for(let e=0;e0&&f.push(`outputIdx[${o[e]}] += ${u[e]};`);const d=`\n float process(int outputIdx[${c.length}]) {\n ${f.join("\n ")}\n return _A(outputIdx);\n }`;return Object.assign(Object.assign({},s),{output:{dims:c,type:t.type,textureType:a.TextureType.unpacked},shaderSource:d})},l=e=>{if(!e||1!==e.length)throw new Error("Slice requires 1 input.");if(-1===o.NUMBER_TYPES.indexOf(e[0].type))throw new Error("Invalid input type.")};t.sliceV10=(e,t)=>{f(t);const n=c(e,t);return[e.run(Object.assign(Object.assign({},s),{cacheHint:n.cacheKey,get:()=>u(e,t[0],n)}),[t[0]])]};const c=(e,t)=>{if(!e.session.isInitializer(t[1].dataId)||!e.session.isInitializer(t[2].dataId)||t.length>=4&&!e.session.isInitializer(t[3].dataId)||t.length>=5&&!e.session.isInitializer(t[4].dataId))throw new Error("dynamic slice attributes are not allowed");if(t.length>=5&&t[4].integerData.some((e=>1!==e)))throw new Error("currently non-1 steps is not supported for Slice");const n=Array.from(t[1].integerData),r=Array.from(t[2].integerData),o=t.length>=4?Array.from(t[3].integerData):[];return{starts:n,ends:r,axes:o,cacheKey:`${o};${n};${r}`}},f=e=>{if(!e||e.length<3||e.length>5)throw new Error("Invalid input number.");if("int32"!==e[1].type||1!==e[1].dims.length)throw new Error("Invalid input type.");if("int32"!==e[2].type||1!==e[2].dims.length)throw new Error("Invalid input type.");if(e.length>=4&&("int32"!==e[3].type||1!==e[3].dims.length))throw new Error("Invalid input type.");if(e.length>=5&&("int32"!==e[4].type||1!==e[4].dims.length))throw new Error("Invalid input type.")}},5524:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.softmaxV13=t.parseSoftmaxAttributesV13=t.parseSoftmaxAttributes=t.softmax=void 0;const r=n(246),o=n(2517),i=n(5060),a=n(2039),s=n(3738),u={name:"SoftmaxComputeMax",inputNames:["A"],inputTypes:[a.TextureType.unpacked]},l={name:"SoftmaxComputeScale",inputNames:["A","Max"],inputTypes:[a.TextureType.unpacked,a.TextureType.unpacked]},c={name:"SoftMax",inputNames:["A","Max","Norm"],inputTypes:[a.TextureType.unpacked,a.TextureType.unpacked,a.TextureType.unpacked]};t.softmax=(e,t,n)=>{g(t);const r=t[0].dims.slice(),i=o.ShapeUtil.normalizeAxis(n.axis,r.length),a=o.ShapeUtil.sizeToDimension(r,i),s=o.ShapeUtil.sizeFromDimension(r,i);return f(e,t,n,a,s)},t.parseSoftmaxAttributes=e=>(0,r.createAttributeWithCacheKey)({axis:e.attributes.getInt("axis",1)}),t.parseSoftmaxAttributesV13=e=>(0,r.createAttributeWithCacheKey)({axis:e.attributes.getInt("axis",-1)}),t.softmaxV13=(e,t,n)=>{g(t);const i=t[0].dims.slice(),a=o.ShapeUtil.normalizeAxis(n.axis,i.length),u=i.length,l=a!==u-1,c=[];let d,p=[],h=[];l&&(p=Array.from({length:u}).map(((e,t)=>t)),p[a]=u-1,p[u-1]=a,p.map((e=>c.push(i[e]))),d=(0,r.createAttributeWithCacheKey)({perm:p}),h=(0,s.transpose)(e,t,d));const m=l?o.ShapeUtil.sizeToDimension(c,u-1):o.ShapeUtil.sizeToDimension(i,u-1),b=l?o.ShapeUtil.sizeFromDimension(c,u-1):o.ShapeUtil.sizeFromDimension(i,u-1),y=f(e,l?h:t,n,m,b);return l?(0,s.transpose)(e,y,d):y};const f=(e,t,n,r,o)=>{const i=d(e,t[0],r,o,[r]),a=e.run(Object.assign(Object.assign({},u),{cacheHint:n.cacheKey,get:()=>i}),t),s=p(e,t[0],r,o,i.output.dims,[r]),f=e.run(Object.assign(Object.assign({},l),{cacheHint:n.cacheKey,get:()=>s}),[t[0],a]),g=h(e,t[0],r,o,i.output.dims,s.output.dims);return[e.run(Object.assign(Object.assign({},c),{cacheHint:n.cacheKey,get:()=>g}),[t[0],a,f])]},d=(e,t,n,r,o)=>{const[s,l]=e.calculateTextureWidthAndHeight(t.dims,a.TextureType.unpacked),c=o.length;if(n<1||r<1)throw new Error("Logical row count N and feature count D must be greater than or equal to 1");if(1!==o.length)throw new Error("Dimensionality of the output should be 1");if(o[0]!==n)throw new Error("Shape of the output should be equal to logical row count");const f=(0,i.getGlsl)(e.session.backend.glContext.version),d=`\n float process(int[${c}] indices) {\n int logical_row_start_offset = indices[0] * ${r};\n\n float max = getColorAsFloat(${f.texture2D}(A, offsetToCoords(logical_row_start_offset, ${s},\n ${l} )));\n for(int i=1; i<${r}; ++i)\n {\n float current = getColorAsFloat(${f.texture2D}(A, offsetToCoords(logical_row_start_offset + i,\n ${s}, ${l})));\n if(current > max)\n max = current;\n }\n\n return max;\n }`;return Object.assign(Object.assign({},u),{output:{dims:o,type:t.type,textureType:a.TextureType.unpacked},shaderSource:d})},p=(e,t,n,r,o,s)=>{const[u,c]=e.calculateTextureWidthAndHeight(t.dims,a.TextureType.unpacked),f=s.length;if(n<1||r<1)throw new Error("Logical row count N and feature count D must be greater than or equal to 1");if(1!==s.length)throw new Error("Dimensionality of the output should be 1");if(s[0]!==n)throw new Error("Shape of the output should be equal to logical row count");if(1!==o.length)throw new Error("Dimensionality of the intermediate results should be 1");if(o[0]!==n)throw new Error("Shape of the intermediate results should be equal to logical row count");const d=`\n float process(int[${f}] indices) {\n int logical_row_start_offset = indices[0] * ${r};\n\n float norm_factor = 0.0;\n float max = _Max(indices);\n for(int i=0; i<${r}; ++i)\n {\n norm_factor += exp(getColorAsFloat(${(0,i.getGlsl)(e.session.backend.glContext.version).texture2D}(A, offsetToCoords(logical_row_start_offset + i,\n ${u}, ${c}))) - max);\n }\n\n return norm_factor;\n }`;return Object.assign(Object.assign({},l),{output:{dims:s,type:t.type,textureType:a.TextureType.unpacked},shaderSource:d})},h=(e,t,n,r,o,i)=>{const[s,u]=e.calculateTextureWidthAndHeight(t.dims,a.TextureType.unpacked),l=t.dims.length;if(n<1||r<1)throw new Error("Logical row count N and feature count D must be greater than or equal to 1");if(1!==o.length||1!==i.length)throw new Error("Dimensionality of the intermediate results should be 1");if(o[0]!==n||i[0]!==n)throw new Error("Shape of the intermediate results should be equal to logical row count");const f=`\n float process(int[${l}] indices) {\n\n // get offset of current logical tensor index from the 2-D texture coordinates (TexCoords)\n int offset = coordsToOffset(TexCoords, ${s}, ${u});\n\n //determine the logical row for this index\n int logical_row_index[1];\n logical_row_index[0] = offset / ${r};\n\n float norm_factor = _Norm(logical_row_index);\n\n // avoid possible division by 0\n // if norm_facor is 0, all elements are zero\n // if so, return 0\n if(norm_factor == 0.0)\n return 0.0;\n\n return exp(_A(indices) - _Max(logical_row_index)) / norm_factor;\n }`;return Object.assign(Object.assign({},c),{output:{dims:t.dims,type:t.type,textureType:a.TextureType.unpacked},shaderSource:f})},g=e=>{if(!e||1!==e.length)throw new Error("Softmax requires 1 input.");if("float32"!==e[0].type&&"float64"!==e[0].type)throw new Error("Invalid input type")}},5975:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseSplitAttributes=t.split=void 0;const r=n(246),o=n(2517),i=n(2039),a={name:"Split",inputNames:["A"],inputTypes:[i.TextureType.unpacked]};t.split=(e,t,n)=>{l(t);const r=o.ShapeUtil.normalizeAxis(n.axis,t[0].dims.length),i=s(e,t,r,n),c=[];for(let o=0;ou(e,t[0],n,r,o)}),t));return c},t.parseSplitAttributes=e=>{const t=e.attributes.getInt("axis",0),n=e.attributes.getInts("split",[]),o=e.outputs.length;return(0,r.createAttributeWithCacheKey)({axis:t,split:n,numOutputs:o})};const s=(e,t,n,r)=>{const[,i]=o.SplitUtil.splitShape(t[0].dims,n,r.split,r.numOutputs);return i.length},u=(e,t,n,r,s)=>{const[u,l]=o.SplitUtil.splitShape(t.dims,r,n.split,n.numOutputs),c=l[s],f=u[s],d=`\n float process(int indices[${f.length}]) {\n indices[${r}] += ${c};\n return _A(indices);\n }\n `;return Object.assign(Object.assign({},a),{cacheHint:`${n.cacheKey}:${s}`,output:{dims:f,type:t.type,textureType:i.TextureType.unpacked},shaderSource:d})},l=e=>{if(!e||1!==e.length)throw new Error("Split requires one input.");if("int8"!==e[0].type&&"uint8"!==e[0].type&&"int16"!==e[0].type&&"uint16"!==e[0].type&&"int32"!==e[0].type&&"uint32"!==e[0].type&&"float32"!==e[0].type&&"float64"!==e[0].type&&"bool"!==e[0].type)throw new Error("Invalid input type.")}},3933:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseSqueezeAttributes=t.squeezeV13=t.squeeze=void 0;const r=n(2517);t.squeeze=(e,t,n)=>{o(t);const i=r.ShapeUtil.squeezeShape(t[0].dims,n);return[e.reshapeUnpacked(t[0],i)]},t.squeezeV13=(e,n)=>(i(n),(0,t.squeeze)(e,[n[0]],Array.from(n[1].integerData))),t.parseSqueezeAttributes=e=>e.attributes.getInts("axes");const o=e=>{if(!e||1!==e.length)throw new Error("Squeeze requires 1 input.");if("string"===e[0].type)throw new Error("invalid input tensor types.")},i=e=>{if(!e||2!==e.length)throw new Error("Squeeze requires 2 inputs.");if("int32"!==e[1].type)throw new Error("Invalid input type.")}},6558:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.sum=void 0;const r=n(5060),o=n(2039);t.sum=(e,t)=>{a(t);const n={name:"Sum",inputNames:t.map(((e,t)=>`X${t}`)),inputTypes:new Array(t.length).fill(o.TextureType.unpacked)};return[e.run(Object.assign(Object.assign({},n),{get:()=>i(e,t,n)}),t)]};const i=(e,t,n)=>{const i=(0,r.getGlsl)(e.session.backend.glContext.version),a=t[0].dims.slice(),s=`\n void main() {\n vec4 result = ${t.map(((e,t)=>`${i.texture2D}(X${t},TexCoords)`)).join(" + ")};\n ${i.output} = result;\n }\n `;return Object.assign(Object.assign({},n),{output:{dims:a,type:t[0].type,textureType:o.TextureType.unpacked},hasMain:!0,shaderSource:s})},a=e=>{if(!e||0===e.length)throw new Error("Sum requires inputs.");const t=e[0].dims.length;for(let n=1;n{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.tile=void 0;const r=n(782),o=n(2039);t.tile=(e,t)=>{a(t);const n={name:"Tile",inputNames:["A"],inputTypes:[o.TextureType.unpacked]};return[e.run(Object.assign(Object.assign({},n),{get:()=>i(e,t,n)}),t)]};const i=(e,t,n)=>{const r=t[0].dims.slice(),i=new Array(r.length),a=[];for(let e=0;e{if(!e||2!==e.length)throw new Error("Tile requires 2 input.");if(1!==e[1].dims.length)throw new Error("The second input shape must 1 dimension.");if(e[1].dims[0]!==e[0].dims.length)throw new Error("Invalid input shape.");if(-1===r.NUMBER_TYPES.indexOf(e[0].type))throw new Error("Invalid input type.");if("int32"!==e[1].type&&"int16"!==e[1].type)throw new Error("Invalid repeat type.")}},3738:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseTransposeAttributes=t.transpose=void 0;const r=n(246),o=n(2517),i=n(2039),a={name:"Transpose",inputNames:["A"],inputTypes:[i.TextureType.unpacked]};t.transpose=(e,t,n)=>(f(t),[e.run(Object.assign(Object.assign({},a),{cacheHint:n.cacheKey,get:()=>s(e,t[0],n.perm)}),t)]),t.parseTransposeAttributes=e=>(0,r.createAttributeWithCacheKey)({perm:e.attributes.getInts("perm",[])});const s=(e,t,n)=>{const r=t.dims;n=u(r,n);const o=l(r,n),s=r.length,f=`\n ${c("perm",n,s)}\n float process(int indices[${s}]) {\n int a[${s}];\n perm(a, indices);\n return _A(a);\n }`;return Object.assign(Object.assign({},a),{output:{dims:o,type:t.type,textureType:i.TextureType.unpacked},shaderSource:f})},u=(e,t)=>(t&&t.length!==e.length&&(t=[...e.keys()].reverse()),t),l=(e,t)=>(t=u(e,t),o.ShapeUtil.sortBasedOnPerm(e,t)),c=(e,t,n)=>{const r=[];r.push(`void ${e}(out int a[${n}], int src[${n}]) {`);for(let e=0;e{if(!e||1!==e.length)throw new Error("Transpose requires 1 input.");if("float32"!==e[0].type&&"float64"!==e[0].type)throw new Error("input should be float tensor")}},8710:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.encodeAsUint8=void 0;const r=n(5060),o=n(2039);t.encodeAsUint8=(e,t)=>{const n=t.shape,i=(0,r.getGlsl)(e.session.backend.glContext.version),a=`\n const float FLOAT_MAX = 1.70141184e38;\n const float FLOAT_MIN = 1.17549435e-38;\n\n bool isNaN(float val) {\n return (val < 1.0 || 0.0 < val || val == 0.0) ? false : true;\n }\n\n highp vec4 encodeAsUint8(highp float v) {\n if (isNaN(v)) {\n return vec4(255, 255, 255, 255);\n }\n\n highp float av = abs(v);\n\n if(av < FLOAT_MIN) {\n return vec4(0.0, 0.0, 0.0, 0.0);\n } else if(v > FLOAT_MAX) {\n return vec4(0.0, 0.0, 128.0, 127.0) / 255.0;\n } else if(v < -FLOAT_MAX) {\n return vec4(0.0, 0.0, 128.0, 255.0) / 255.0;\n }\n\n highp vec4 c = vec4(0,0,0,0);\n\n highp float e = floor(log2(av));\n highp float m = exp2(fract(log2(av))) - 1.0;\n\n c[2] = floor(128.0 * m);\n m -= c[2] / 128.0;\n c[1] = floor(32768.0 * m);\n m -= c[1] / 32768.0;\n c[0] = floor(8388608.0 * m);\n\n highp float ebias = e + 127.0;\n c[3] = floor(ebias / 2.0);\n ebias -= c[3] * 2.0;\n c[2] += floor(ebias) * 128.0;\n\n c[3] += 128.0 * step(0.0, -v);\n\n return c / 255.0;\n }\n\n void main() {\n float value = ${i.texture2D}(X,TexCoords).r;\n ${i.output} = encodeAsUint8(value);\n }`,s={name:"Uint8Encode",inputTypes:[o.TextureType.unpacked],inputNames:["X"],output:{dims:n,type:t.tensor.type,textureType:o.TextureType.downloadUint8AsFloat},shaderSource:a,hasMain:!0};return e.executeProgram(s,[t.tensor])}},4909:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.tanh=t.tan=t.sqrt=t.sin=t.sigmoid=t.relu=t.not=t.neg=t.log=t.parseLeakyReluAttributes=t.leakyRelu=t.identity=t.floor=t.exp=t.parseEluAttributes=t.elu=t.cos=t.ceil=t.clipV11=t.parseClipAttributes=t.clip=t.atan=t.asin=t.acos=t.abs=t.glslTanh=t.glslTan=t.glslSqrt=t.glslSigmoid=t.glslRelu=t.glslSin=t.glslNot=t.glslNeg=t.glslLog=t.glslLeakyRelu=t.glslIdentity=t.glslClip=t.glslFloor=t.glslExp=t.glslElu=t.glslCos=t.glslCeil=t.glslAtan=t.glslAsin=t.glslAcos=t.glslAbs=void 0;const r=n(246),o=n(2517),i=n(8520),a=n(5060),s=n(2039);function u(){return I("abs")}function l(){return I("acos")}function c(){return I("asin")}function f(){return I("atan")}function d(){return I("ceil")}function p(){return I("cos")}function h(e){const t="elu";return{body:`\n const float alpha = float(${e});\n\n float ${t}_(float a) {\n return a >= 0.0 ? a: (exp(a) - 1.0) * alpha;\n }\n vec4 ${t}_(vec4 v) {\n return vec4(${t}_(v.x), ${t}_(v.y), ${t}_(v.z), ${t}_(v.w));\n }\n `,name:t,type:i.FunctionType.ValueBased}}function g(){return I("exp")}function m(){return I("floor")}function b(e,t){const n="clip";return{body:`\n const float min = float(${e});\n const float max = float(${t});\n\n float ${n}_(float a) {\n return clamp(a, min, max);\n }\n vec4 ${n}_(vec4 v) {\n return clamp(v, min, max);\n }\n `,name:n,type:i.FunctionType.ValueBased}}function y(){const e="indentity";return{body:`\n float ${e}_(float a) {\n return a;\n }\n vec4 ${e}_(vec4 v) {\n return v;\n }\n `,name:e,type:i.FunctionType.ValueBased}}function A(e){const t="leakyRelu";return{body:`\n const float alpha = float(${e});\n\n float ${t}_(float a) {\n return a < 0.0 ? a * alpha : a;\n }\n vec4 ${t}_(vec4 v) {\n return vec4(${t}_(v.x), ${t}_(v.y), ${t}_(v.z), ${t}_(v.w));\n }\n `,name:t,type:i.FunctionType.ValueBased}}function v(){return I("log")}function w(){const e="neg";return{body:`\n float ${e}_(float a) {\n return -a;\n }\n vec4 ${e}_(vec4 v) {\n return -v;\n }\n `,name:e,type:i.FunctionType.ValueBased}}function _(){const e="not";return{body:`\n float ${e}_(float a) {\n return float( ! bool(a) );\n }\n bool ${e}_(bool a) {\n return !a;\n }\n vec4 ${e}_(vec4 v) {\n return vec4(!bool(v.x), !bool(v.y), !bool(v.z), !bool(v.w));\n }\n bvec4 ${e}_(bvec4 v) {\n return bvec4(!v.x, !v.y, !v.z, !v.w);\n }\n `,name:e,type:i.FunctionType.ValueBased}}function x(){return I("sin")}function T(){const e="relu";return{body:`\n float ${e}_(float a) {\n return max( a, 0.0 );\n }\n vec4 ${e}_(vec4 v) {\n return max( v, 0.0 );\n }\n `,name:e,type:i.FunctionType.ValueBased}}function E(){const e="sigmoid";return{body:`\n float ${e}_(float a) {\n return 1.0 / (1.0 + exp(-a));\n }\n vec4 ${e}_(vec4 v) {\n return 1.0 / (1.0 + exp(-v));\n }\n `,name:e,type:i.FunctionType.ValueBased}}function S(){return I("sqrt")}function O(){return I("tan")}function k(){const e="tanh";return{body:`\n float ${e}_(float a) {\n a = clamp(a, -10., 10.);\n a = exp(2.*a);\n return (a - 1.) / (a + 1.);\n }\n vec4 ${e}_(vec4 v) {\n v = clamp(v, -10., 10.);\n v = exp(2.*v);\n return (v - 1.) / (v + 1.);\n }\n `,name:e,type:i.FunctionType.ValueBased}}function I(e){return{body:`\n float ${e}_(float a) {\n return ${e}(a);\n }\n vec4 ${e}_(vec4 v) {\n return ${e}(v);\n }\n `,name:e,type:i.FunctionType.ValueBased}}t.glslAbs=u,t.glslAcos=l,t.glslAsin=c,t.glslAtan=f,t.glslCeil=d,t.glslCos=p,t.glslElu=h,t.glslExp=g,t.glslFloor=m,t.glslClip=b,t.glslIdentity=y,t.glslLeakyRelu=A,t.glslLog=v,t.glslNeg=w,t.glslNot=_,t.glslSin=x,t.glslRelu=T,t.glslSigmoid=E,t.glslSqrt=S,t.glslTan=O,t.glslTanh=k;const P=(e,t,n,r)=>{const o=e.session.pack?s.TextureType.packed:s.TextureType.unpacked,i={name:n.name,inputTypes:[o],inputNames:["A"],cacheHint:r};return Object.assign(Object.assign({},i),{get:()=>((e,t,n,r)=>{const o=e.session.pack?s.TextureType.packed:s.TextureType.unpacked,i=(0,a.getGlsl)(e.session.backend.glContext.version);return Object.assign(Object.assign({},t),{output:{dims:n.dims,type:n.type,textureType:o},shaderSource:`\n ${r.body}\n void main() {\n vec4 v = ${i.texture2D}(A, TexCoords);\n v = ${r.name}_(v);\n ${i.output} = v;\n }\n `,hasMain:!0})})(e,i,t,n)})};t.abs=(e,t)=>[e.run(P(e,t[0],u()),t)],t.acos=(e,t)=>[e.run(P(e,t[0],l()),t)],t.asin=(e,t)=>[e.run(P(e,t[0],c()),t)],t.atan=(e,t)=>[e.run(P(e,t[0],f()),t)],t.clip=(e,t,n)=>[e.run(P(e,t[0],b(n.min,n.max),n.cacheKey),t)],t.parseClipAttributes=e=>(0,r.createAttributeWithCacheKey)({min:e.attributes.getFloat("min",o.MIN_CLIP),max:e.attributes.getFloat("max",o.MAX_CLIP)}),t.clipV11=(e,n)=>{const r=C(e,n);return(0,t.clip)(e,[n[0]],r)};const C=(e,t)=>{if(t.length>=3&&(!e.session.isInitializer(t[1].dataId)||!e.session.isInitializer(t[2].dataId)))throw new Error("dynamic clip attributes are not allowed");const n=t.length>=3?t[1].numberData[0]:o.MIN_CLIP,i=t.length>=3?t[2].numberData[0]:o.MAX_CLIP;return(0,r.createAttributeWithCacheKey)({min:n,max:i})};t.ceil=(e,t)=>[e.run(P(e,t[0],d()),t)],t.cos=(e,t)=>[e.run(P(e,t[0],p()),t)],t.elu=(e,t,n)=>[e.run(P(e,t[0],h(n.alpha),n.cacheKey),t)],t.parseEluAttributes=e=>(0,r.createAttributeWithCacheKey)({alpha:e.attributes.getFloat("alpha",1)}),t.exp=(e,t)=>[e.run(P(e,t[0],g()),t)],t.floor=(e,t)=>[e.run(P(e,t[0],m()),t)],t.identity=(e,t)=>[e.run(P(e,t[0],y()),t)],t.leakyRelu=(e,t,n)=>[e.run(P(e,t[0],A(n.alpha),n.cacheKey),t)],t.parseLeakyReluAttributes=e=>(0,r.createAttributeWithCacheKey)({alpha:e.attributes.getFloat("alpha",.01)}),t.log=(e,t)=>[e.run(P(e,t[0],v()),t)],t.neg=(e,t)=>[e.run(P(e,t[0],w()),t)],t.not=(e,t)=>[e.run(P(e,t[0],_()),t)],t.relu=(e,t)=>[e.run(P(e,t[0],T()),t)],t.sigmoid=(e,t)=>[e.run(P(e,t[0],E()),t)],t.sin=(e,t)=>[e.run(P(e,t[0],x()),t)],t.sqrt=(e,t)=>[e.run(P(e,t[0],S()),t)],t.tan=(e,t)=>[e.run(P(e,t[0],O()),t)],t.tanh=(e,t)=>[e.run(P(e,t[0],k()),t)]},5611:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createUnpackProgramInfoLoader=t.createUnpackProgramInfo=void 0;const r=n(5060),o=n(2039),i=n(9390),a=n(2827),s={name:"unpack",inputNames:["A"],inputTypes:[o.TextureType.packed]};t.createUnpackProgramInfo=(e,t)=>{const n=t.dims.length,u=(0,a.getChannels)("rc",n),l=u.slice(-2),c=(0,i.getCoordsDataType)(n),f=(0,a.unpackFromChannel)(),d=0===t.dims.length?"":function(e,t){if(1===e)return"rc";let n="";for(let r=0;rObject.assign(Object.assign({},s),{get:()=>(0,t.createUnpackProgramInfo)(e,n)})},8428:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.parseUnsqueezeAttributes=t.unsqueezeV13=t.unsqueeze=void 0;const r=n(2517);t.unsqueeze=(e,t,n)=>{o(t);const i=r.ShapeUtil.unsqueezeShape(t[0].dims,n);return[e.reshapeUnpacked(t[0],i)]},t.unsqueezeV13=(e,n)=>(i(n),(0,t.unsqueeze)(e,[n[0]],Array.from(n[1].integerData))),t.parseUnsqueezeAttributes=e=>e.attributes.getInts("axes");const o=e=>{if(!e||1!==e.length)throw new Error("Unsqueeze requires 1 input.");if("string"===e[0].type)throw new Error("invalid input tensor types.")},i=e=>{if(!e||2!==e.length)throw new Error("Unsqueeze requires 2 inputs.");if("int32"!==e[1].type)throw new Error("Invalid input type.")}},9793:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.scalesValidation=t.validateInputs=t.parseUpsampleAttributes=t.parseUpsampleAttributesV9=t.parseUpsampleAttributesV7=t.upsample=void 0;const r=n(246),o=n(5060),i=n(2039),a={name:"Upsample",inputNames:["X"],inputTypes:[i.TextureType.unpacked]};t.upsample=(e,n,r)=>((0,t.validateInputs)(n,r),[e.run(Object.assign(Object.assign({},a),{cacheHint:r.cacheKey,get:()=>s(e,n,r)}),n)]),t.parseUpsampleAttributesV7=e=>(0,t.parseUpsampleAttributes)(e,7),t.parseUpsampleAttributesV9=e=>(0,t.parseUpsampleAttributes)(e,9),t.parseUpsampleAttributes=(e,n)=>{const o=n>=10,i=e.attributes.getString("mode","nearest");if("nearest"!==i&&"linear"!==i&&(n<11||"cubic"!==i))throw new Error(`unrecognized mode: ${i}`);let a=[];n<9&&(a=e.attributes.getFloats("scales"),(0,t.scalesValidation)(a,i,o));const s=e.attributes.getFloat("extrapolation_value",0),u=n>10?e.attributes.getString("coordinate_transformation_mode","half_pixel"):"asymmetric";if(-1===["asymmetric","pytorch_half_pixel","tf_half_pixel_for_nn","align_corners","tf_crop_and_resize","half_pixel"].indexOf(u))throw new Error(`coordinate_transform_mode '${u}' is not supported`);const l="tf_crop_and_resize"===u,c=l,f="nearest"===i&&n>=11?e.attributes.getString("nearest_mode","round_prefer_floor"):"";if(-1===["round_prefer_floor","round_prefer_ceil","floor","ceil",""].indexOf(f))throw new Error(`nearest_mode '${f}' is not supported`);const d=e.attributes.getFloat("cubic_coeff_a",-.75),p=0!==e.attributes.getInt("exclude_outside",0);if(p&&"cubic"!==i)throw new Error("exclude_outside can be set to 1 only when mode is CUBIC.");const h=n<11||"nearest"===i&&"asymmetric"===u&&"floor"===f;let g=0,m=0,b=0;return n>10?e.inputs.length>2?(g=1,m=2,b=3):(m=1,b=2):9===n&&(m=1),(0,r.createAttributeWithCacheKey)({opset:n,isResize:o,mode:i,scales:a,extrapolationValue:s,coordinateTransformMode:u,useExtrapolation:c,needRoiInput:l,nearestMode:f,cubicCoefficientA:d,excludeOutside:p,useNearest2xOptimization:h,roiInputIdx:g,scalesInputIdx:m,sizesInputIdx:b})};const s=(e,t,n)=>{const r=(0,o.getGlsl)(e.session.backend.glContext.version),[s,u]=e.calculateTextureWidthAndHeight(t[0].dims,i.TextureType.unpacked),l=t[0].dims.map(((e,t)=>Math.floor(e*n.scales[t]))),[c,f]=e.calculateTextureWidthAndHeight(l,i.TextureType.unpacked),d=l.length,p=new Array(d),h=new Array(d);let g=`\n int output_pitches[${d}];\n int input_pitches[${d}];\n `;for(let e=d-1;e>=0;e--)p[e]=e===d-1?1:p[e+1]*l[e+1],h[e]=e===d-1?1:h[e+1]*t[0].dims[e+1],g+=`\n output_pitches[${e}] = ${p[e]};\n input_pitches[${e}] = ${h[e]};\n `;const m=`\n float getInputFloat(int index) {\n vec2 coords = offsetToCoords(index, ${s}, ${u});\n float value = getColorAsFloat(${r.texture2D}(X, coords));\n return value;\n }\n `,b="nearest"===n.mode?`\n ${m}\n float process(int indices[${d}]) {\n int input_index = 0;\n int output_index = coordsToOffset(TexCoords, ${c}, ${f});\n\n ${g}\n\n int d, m;\n for (int dim = 0; dim < ${d}; ++dim) {\n d = output_index / output_pitches[dim];\n m = output_index - d * output_pitches[dim];\n output_index = m;\n\n if (scales[dim] != 1 && d > 0) {\n int d2 = d / scales[dim];\n m = d - d2 * scales[dim];\n d = d2;\n }\n input_index += input_pitches[dim] * d;\n }\n\n return getInputFloat(input_index);\n }`:4===d?`\n ${m}\n float process(int indices[4]) {\n int input_index = 0;\n int output_index = coordsToOffset(TexCoords, ${c}, ${f});\n\n ${g}\n\n int m;\n int index_of_dim0, index_of_dim1, index_of_dim2, index_of_dim3;\n index_of_dim0 = output_index / output_pitches[0];\n m = output_index - index_of_dim0 * output_pitches[0];\n index_of_dim1 = m / output_pitches[1];\n m = m - index_of_dim1 * output_pitches[1];\n index_of_dim2 = m / output_pitches[2];\n m = m - index_of_dim2 * output_pitches[2];\n index_of_dim3 = m;\n\n int index_of_input_dim2, index_of_input_dim3, x_offset, y_offset;\n index_of_input_dim2 = index_of_dim2 / scales[2];\n y_offset = index_of_dim2 - index_of_input_dim2 * scales[2];\n index_of_input_dim3 = index_of_dim3 / scales[3];\n x_offset = index_of_dim3 - index_of_input_dim3 * scales[3];\n\n input_index = index_of_dim0 * input_pitches[0] +\n index_of_dim1 * input_pitches[1] +\n index_of_input_dim2 * input_pitches[2] +\n index_of_input_dim3;\n\n float x00 = getInputFloat(input_index);\n float x10, x01, x11;\n\n bool end_of_dim2 = false;\n if (index_of_input_dim2 == (${t[0].dims[2]} - 1)) {\n // It's the end in dimension 2\n x01 = x00;\n end_of_dim2 = true;\n } else {\n x01 = getInputFloat(input_index + input_pitches[2]);\n }\n\n if (index_of_input_dim3 == (input_pitches[2] - 1)) {\n // It's the end in dimension 3\n x10 = x00;\n x11 = x01;\n }\n else {\n x10 = getInputFloat(input_index + 1);\n x11 = end_of_dim2 ? x10 : getInputFloat(input_index + input_pitches[2] + 1);\n }\n\n float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[2]);\n float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[2]);\n return y0 + float(x_offset) * (y1 - y0) / float(scales[3]);\n }`:`\n ${m}\n float process(int indices[2]) {\n int input_index = 0;\n int output_index = coordsToOffset(TexCoords, ${c}, ${f});\n\n ${g}\n\n int m;\n int index_of_dim0, index_of_dim1;\n index_of_dim0 = output_index / output_pitches[0];\n m = output_index - index_of_dim0 * output_pitches[0];\n index_of_dim1 = m;\n\n int index_of_input_dim0, index_of_input_dim1, x_offset, y_offset;\n index_of_input_dim0 = index_of_dim0 / scales[0];\n y_offset = index_of_dim0 - index_of_input_dim0 * scales[0];\n index_of_input_dim1 = index_of_dim1 / scales[1];\n x_offset = index_of_dim1 - index_of_input_dim1 * scales[1];\n\n input_index = index_of_input_dim0 * input_pitches[0] + index_of_input_dim1;\n\n float x00 = getInputFloat(input_index);\n float x10, x01, x11;\n\n bool end_of_dim0 = false;\n if (index_of_input_dim0 == (${t[0].dims[0]} - 1)) {\n // It's the end in dimension 0\n x01 = x00;\n end_of_dim0 = true;\n } else {\n x01 = getInputFloat(input_index + input_pitches[0]);\n }\n\n if (index_of_input_dim1 == (input_pitches[0] - 1)) {\n // It's the end in dimension 1\n x10 = x00;\n x11 = x01;\n }\n else {\n x10 = getInputFloat(input_index + 1);\n x11 = end_of_dim0 ? x10 : getInputFloat(input_index + input_pitches[0] + 1);\n }\n\n float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[0]);\n float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[0]);\n return y0 + float(x_offset) * (y1 - y0) / float(scales[1]);\n }`;return Object.assign(Object.assign({},a),{output:{dims:l,type:t[0].type,textureType:i.TextureType.unpacked},shaderSource:b,variables:[{name:"scales",type:"int",arrayLength:n.scales.length,data:n.scales.map((e=>Math.ceil(e)))}]})};t.validateInputs=(e,t)=>{if(!e||t.opset<9&&1!==e.length||t.opset>=9&&t.opset<11&&2!==e.length||t.opset>=11&&e.length<2)throw new Error("invalid inputs.");if(t.scales.length>0&&e[0].dims.length!==t.scales.length)throw new Error("Invalid input shape.");if("string"===e[0].type)throw new Error("Invalid input tensor types.")},t.scalesValidation=(e,t,n)=>{if(n){for(const t of e)if(t<=0)throw new Error("Scale value should be greater than 0.")}else for(const t of e)if(t<1)throw new Error("Scale value should be greater than or equal to 1.");if(!("linear"!==t&&"cubic"!==t||2===e.length||4===e.length&&1===e[0]&&1===e[1]))throw new Error(`'Linear' mode and 'Cubic' mode only support 2-D inputs ('Bilinear', 'Bicubic') or 4-D inputs with the corresponding outermost 2 scale values being 1 in the ${n?"Resize":"Upsample"} opeartor.`)}},1958:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.ProgramManager=void 0;const r=n(1670),o=n(6231),i=n(8879),a=n(5060);t.ProgramManager=class{constructor(e,t,n){this.profiler=e,this.glContext=t,this.textureLayoutStrategy=n,this.repo=new Map,this.attributesBound=!1}getArtifact(e){return this.repo.get(e)}setArtifact(e,t){this.repo.set(e,t)}run(e,t,n){var r;this.profiler.event("op",`ProgramManager.run ${null!==(r=e.programInfo.name)&&void 0!==r?r:"unknown kernel"}`,(()=>{var r;const i=this.glContext.gl,a=e.program;i.useProgram(a);try{this.bindOutput(n),this.attributesBound||this.bindAttributes(e.attribLocations),this.bindUniforms(e.uniformLocations,null!==(r=e.programInfo.variables)&&void 0!==r?r:[],t)}catch(t){throw o.Logger.error("ProgramManager",e.programInfo.shaderSource),t}this.profiler.event("backend","GlContext.draw()",(()=>{this.glContext.draw()}))}),this.glContext)}dispose(){this.vertexShader&&this.glContext.deleteShader(this.vertexShader),this.repo.forEach((e=>this.glContext.deleteProgram(e.program)))}build(e,t,n){return this.profiler.event("backend","ProgramManager.build",(()=>{const r=new i.GlslPreprocessor(this.glContext,e,t,n),o=r.preprocess(),a=this.compile(o);return{programInfo:e,program:a,uniformLocations:this.getUniformLocations(a,r.context.programInfo.inputNames,r.context.programInfo.variables),attribLocations:this.getAttribLocations(a)}}))}compile(e){if(!this.vertexShader){o.Logger.verbose("ProrgramManager","Compiling and caching Vertex shader for the first time");const e=(0,a.getVertexShaderSource)(this.glContext.version);this.vertexShader=this.glContext.compileShader(e,this.glContext.gl.VERTEX_SHADER)}r.env.debug&&o.Logger.verbose("ProrgramManager",`FragShader:\n${e}\n`);const t=this.glContext.compileShader(e,this.glContext.gl.FRAGMENT_SHADER),n=this.glContext.createProgram(this.vertexShader,t);return this.glContext.deleteShader(t),n}bindOutput(e){const t=e.width,n=e.height;o.Logger.verbose("ProrgramManager",`Binding output texture to Framebuffer: w/h=${t}/${n}, shape=${e.shape}, type=${e.tensor.type}`),this.glContext.attachFramebuffer(e.texture,t,n)}bindAttributes(e){const t=e.position,n=e.textureCoord;this.glContext.setVertexAttributes(t,n),this.attributesBound=!0}bindUniforms(e,t,n){var r;const o=this.glContext.gl;let i=0;for(const{name:a,type:s,location:u,arrayLength:l}of e){const e=null===(r=t.find((e=>e.name===a)))||void 0===r?void 0:r.data;if("sampler2D"!==s&&!e)throw new Error(`variable '${a}' does not have data defined in program info`);switch(s){case"sampler2D":this.bindTexture(n[i],u,i),i++;break;case"float":l?o.uniform1fv(u,e):o.uniform1f(u,e);break;case"int":l?o.uniform1iv(u,e):o.uniform1i(u,e);break;default:throw new Error(`Uniform not implemented: ${s}`)}}}bindTexture(e,t,n){this.glContext.bindTextureToUniform(e.texture,n,t)}getAttribLocations(e){return{position:this.getAttribLocation(e,"position"),textureCoord:this.getAttribLocation(e,"textureCoord")}}getUniformLocations(e,t,n){const r=[];if(t)for(const n of t)r.push({name:n,type:"sampler2D",location:this.getUniformLocation(e,n)});if(n)for(const t of n)r.push(Object.assign(Object.assign({},t),{location:this.getUniformLocation(e,t.name)}));return r}getUniformLocation(e,t){const n=this.glContext.gl.getUniformLocation(e,t);if(null===n)throw new Error(`Uniform ${t} not found.`);return n}getAttribLocation(e,t){return this.glContext.gl.getAttribLocation(e,t)}}},6416:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.WebGLSessionHandler=void 0;const r=n(6231),o=n(1047),i=n(8316),a=n(1640),s=n(1958),u=n(7859),l=n(5702);t.WebGLSessionHandler=class{constructor(e,t){this.backend=e,this.context=t,this.layoutStrategy=new u.PreferLogicalStrategy(e.glContext.maxTextureSize),this.programManager=new s.ProgramManager(this.context.profiler,e.glContext,this.layoutStrategy),this.textureManager=new l.TextureManager(e.glContext,this.layoutStrategy,this.context.profiler,{reuseTextures:"full"===e.textureCacheMode}),this.packedTextureDataCache=new Map,this.unpackedTextureDataCache=new Map,this.pack=e.pack,this.pack2unpackMap=new Map,this.unpack2packMap=new Map}createInferenceHandler(){return new i.WebGLInferenceHandler(this)}onGraphInitialized(e){const t=e.getValues().filter((e=>-1===e.from&&e.tensor)).map((e=>e.tensor.dataId));this.initializers=new Set(t)}isInitializer(e){return!!this.initializers&&this.initializers.has(e)}addInitializer(e){this.initializers.add(e)}getTextureData(e,t){return t?this.packedTextureDataCache.get(e):this.unpackedTextureDataCache.get(e)}setTextureData(e,t,n=!1){r.Logger.verbose("WebGLSessionHandler","Storing Texture data in cache"),n?this.packedTextureDataCache.set(e,t):this.unpackedTextureDataCache.set(e,t)}dispose(){this.programManager.dispose(),this.textureManager.clearActiveTextures(),this.packedTextureDataCache.forEach((e=>this.textureManager.releaseTexture(e,!0))),this.packedTextureDataCache=new Map,this.unpackedTextureDataCache.forEach((e=>this.textureManager.releaseTexture(e,!0))),this.unpackedTextureDataCache=new Map}resolve(e,t,n){const r=(0,o.resolveOperator)(e,t,a.WEBGL_OP_RESOLVE_RULES);return{impl:r.opImpl,context:r.opInit?r.opInit(e,n):e}}}},7769:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Uint8DataEncoder=t.RGBAFloatDataEncoder=t.RedFloat32DataEncoder=void 0;const r=n(6231);t.RedFloat32DataEncoder=class{constructor(e,t=1){if(1===t)this.internalFormat=e.R32F,this.format=e.RED,this.textureType=e.FLOAT,this.channelSize=t;else{if(4!==t)throw new Error(`Invalid number of channels: ${t}`);this.internalFormat=e.RGBA32F,this.format=e.RGBA,this.textureType=e.FLOAT,this.channelSize=t}}encode(e,t){let n,o;return e.constructor!==Float32Array&&(r.Logger.warning("Encoder","data was not of type Float32; creating new Float32Array"),o=new Float32Array(e)),t*this.channelSize>e.length?(r.Logger.warning("Encoder","Source data too small. Allocating larger array"),o=e,n=this.allocate(t*this.channelSize),o.forEach(((e,t)=>n[t]=e))):(o=e,n=o),n}allocate(e){return new Float32Array(4*e)}decode(e,t){return 1===this.channelSize?e.filter(((e,t)=>t%4==0)).subarray(0,t):e.subarray(0,t)}},t.RGBAFloatDataEncoder=class{constructor(e,t=1,n){if(1!==t&&4!==t)throw new Error(`Invalid number of channels: ${t}`);this.internalFormat=e.RGBA,this.format=e.RGBA,this.channelSize=t,this.textureType=n||e.FLOAT}encode(e,t){let n=e;return 1===this.channelSize&&(r.Logger.verbose("Encoder","Exploding into a larger array"),n=this.allocate(t),e.forEach(((e,t)=>n[4*t]=e))),n}allocate(e){return new Float32Array(4*e)}decode(e,t){return 1===this.channelSize?e.filter(((e,t)=>t%4==0)).subarray(0,t):e.subarray(0,t)}},t.Uint8DataEncoder=class{constructor(e,t=1){if(this.channelSize=4,1===t)this.internalFormat=e.ALPHA,this.format=e.ALPHA,this.textureType=e.UNSIGNED_BYTE,this.channelSize=t;else{if(4!==t)throw new Error(`Invalid number of channels: ${t}`);this.internalFormat=e.RGBA,this.format=e.RGBA,this.textureType=e.UNSIGNED_BYTE,this.channelSize=t}}encode(e,t){return new Uint8Array(e.buffer,e.byteOffset,e.byteLength)}allocate(e){return new Uint8Array(e*this.channelSize)}decode(e,t){if(e instanceof Uint8Array)return e.subarray(0,t);throw new Error(`Invalid array type: ${e.constructor}`)}}},7859:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getBatchDim=t.sizeToSquarishShape=t.getRowsCols=t.sizeFromShape=t.isInt=t.parseAxisParam=t.squeezeShape=t.PreferLogicalStrategy=t.AlwaysKeepOriginalSizeStrategy=void 0;const r=n(6231),o=n(2517);function i(e,t){const n=[],r=[],o=null!=t&&Array.isArray(t)&&0===t.length,i=null==t||o?null:a(t,e).sort();let s=0;for(let t=0;tt)&&1===e[t]&&(n.push(e[t]),r.push(t)),i[s]<=t&&s++}1!==e[t]&&(n.push(e[t]),r.push(t))}return{newShape:n,keptDims:r}}function a(e,t){const n=t.length;return e=null==e?t.map(((e,t)=>t)):[].concat(e),(0,o.assert)(e.every((e=>e>=-n&&e`All values in axis param must be in range [-${n}, ${n}) but got axis ${e}`)),(0,o.assert)(e.every(s),(()=>`All values in axis param must be integers but got axis ${e}`)),e.map((e=>e<0?n+e:e))}function s(e){return e%1==0}function u(e){if(0===e.length)return 1;let t=e[0];for(let n=1;n=e.length?1:e.slice(t.breakAxis).reduce(((e,t)=>e*t)),i=t.breakAxis<=0?1:e.slice(0,t.breakAxis).reduce(((e,t)=>e*t));if(!(o>n||i>n))return[o,i];r.Logger.verbose("TextureLayout",`Given width/height preferences were unattainable: shape:${e}, breakAxis:${t.breakAxis}`)}const o=e.reduce(((e,t)=>e*t));let i=Math.floor(Math.sqrt(o));for(;i=n||o%i!=0)throw new Error(`The given dimensions are outside this GPU's boundaries: ${e}`);return[i,o/i]}},t.PreferLogicalStrategy=class{constructor(e){this.maxTextureSize=e}computeTextureWH(e,t){const n=this.computeTexture(e,t);return t&&t.isPacked&&(n[0]/=2,n[1]/=2),t&&t.reverseWH?[n[1],n[0]]:n}computeTexture(e,t){const n=t&&t.isPacked;if(0===e.length)return n?[2,2]:[1,1];let o=this.maxTextureSize;if(t&&void 0!==t.breakAxis){const n=t.breakAxis>=e.length?1:e.slice(t.breakAxis).reduce(((e,t)=>e*t)),i=t.breakAxis<=0?1:e.slice(0,t.breakAxis).reduce(((e,t)=>e*t));if(!(n>o||i>o))return[n,i];r.Logger.verbose("TextureLayout",`Given width/height preferences were unattainable: shape:${e}, breakAxis:${t.breakAxis}`)}let a=e.slice(0);if(n&&(o*=2,a=a.map(((e,t)=>t>=a.length-2?a[t]%2==0?a[t]:a[t]+1:a[t])),1===a.length&&(a=[2,a[0]])),2!==a.length){const e=i(a);a=e.newShape}const s=u(a);return a.length<=1&&s<=o?[1,s]:2===a.length&&a[0]<=o&&a[1]<=o?a:3===a.length&&a[0]*a[1]<=o&&a[2]<=o?[a[0]*a[1],a[2]]:3===a.length&&a[0]<=o&&a[1]*a[2]<=o?[a[0],a[1]*a[2]]:4===a.length&&a[0]*a[1]*a[2]<=o&&a[3]<=o?[a[0]*a[1]*a[2],a[3]]:4===a.length&&a[0]<=o&&a[1]*a[2]*a[3]<=o?[a[0],a[1]*a[2]*a[3]]:n?l(s/4).map((e=>2*e)):l(s)}},t.squeezeShape=i,t.parseAxisParam=a,t.isInt=s,t.sizeFromShape=u,t.getRowsCols=function(e){if(0===e.length)throw Error("Cannot get rows and columns of an empty shape array.");return[e.length>1?e[e.length-2]:1,e[e.length-1]]},t.sizeToSquarishShape=l,t.getBatchDim=function(e,t=2){return u(e.slice(0,e.length-t))}},4057:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createTextureLayoutFromShape=t.calculateTextureWidthAndHeight=t.createTextureLayoutFromTextureType=void 0;const r=n(2517),o=n(2039);t.createTextureLayoutFromTextureType=(e,n,r)=>{const i=r===o.TextureType.unpacked||r===o.TextureType.unpackedReversed?1:4,a=r===o.TextureType.packed,s=r===o.TextureType.unpackedReversed||r===o.TextureType.packed,u=r===o.TextureType.packedLastDimension?n.length-1:void 0,l=r===o.TextureType.packedLastDimension?n.map(((e,t)=>t===n.length-1?4*e:e)):void 0;return(0,t.createTextureLayoutFromShape)(e,n,i,l,{isPacked:a,reverseWH:s,breakAxis:u})},t.calculateTextureWidthAndHeight=(e,n,r)=>{const o=(0,t.createTextureLayoutFromTextureType)(e,n,r);return[o.width,o.height]},t.createTextureLayoutFromShape=(e,t,n=1,o,i)=>{const a=!(!i||!i.isPacked),[s,u]=e.computeTextureWH(a&&o||t,i),l=t.length;let c=t.slice(0);if(0===l&&(c=[1]),1===n)o=t;else if(a){if(4!==n)throw new Error("a packed texture must be 4-channel");o=t,l>0&&(c[l-1]=Math.ceil(c[l-1]/2)),l>1&&(c[l-2]=Math.ceil(c[l-2]/2))}else if(!o)throw new Error("Unpacked shape is needed when using channels > 1");return{width:s,height:u,channels:n,isPacked:a,shape:c,strides:r.ShapeUtil.computeStrides(c),unpackedShape:o,reversedWH:i&&i.reverseWH}}},5702:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.TextureManager=void 0;const r=n(6231);t.TextureManager=class{constructor(e,t,n,r){this.glContext=e,this.layoutStrategy=t,this.profiler=n,this.config=r,this.pendingRead=new Map,r.reuseTextures&&(this.inUseTextures=new Map,this.idleTextures=new Map,this.textureLookup=new Map)}createTextureFromLayout(e,t,n,o){const i=this.toEncoderType(e),a=this.glContext.getEncoder(i,t.channels||1,o);if(t.isPacked&&1===o)throw new Error("not implemented");const s=t.width,u=t.height;let l,c;if(this.config.reuseTextures){l=`${s}x${u}_${a.format}_${a.internalFormat}_${a.textureType}`,c=this.inUseTextures.get(l),c||(c=[],this.inUseTextures.set(l,c));const t=this.idleTextures.get(l);if(t&&t.length>0){const r=t.pop();return c.push(r),1===o&&this.glContext.updateTexture(r,s,u,a,this.toTextureData(e,n)),r}}r.Logger.verbose("TextureManager",`Creating new texture of size ${t.width}x${t.height}`);const f=this.glContext.allocateTexture(s,u,a,this.toTextureData(e,n));return this.config.reuseTextures&&(c.push(f),this.textureLookup.set(f,l)),f}readTexture(e,t,n){return n||(n=1),this.profiler.event("backend","TextureManager.readTexture",(()=>{const r=e.shape.reduce(((e,t)=>e*t))*n,o=this.glContext.readTexture(e.texture,e.width,e.height,r,this.toEncoderType(t),n);return this.toTensorData(t,o)}))}async readTextureAsync(e,t,n){const r=e.tensor.dataId;if(n||(n=1),this.pendingRead.has(r)){const e=this.pendingRead.get(r);return new Promise((t=>null==e?void 0:e.push(t)))}return this.profiler.event("backend","TextureManager.readTextureAsync",(async()=>{this.pendingRead.set(r,[]);const o=e.shape.reduce(((e,t)=>e*t))*n;await this.glContext.createAndWaitForFence();const i=this.glContext.readTexture(e.texture,e.width,e.height,o,this.toEncoderType(t),n),a=this.toTensorData(t,i),s=this.pendingRead.get(r);return this.pendingRead.delete(r),null==s||s.forEach((e=>e(a))),a}))}readUint8TextureAsFloat(e){return this.profiler.event("backend","TextureManager.readUint8TextureAsFloat",(()=>{const t=e.shape.reduce(((e,t)=>e*t)),n=this.glContext.readTexture(e.texture,e.width,e.height,4*t,"byte",4);return new Float32Array(n.buffer,n.byteOffset,t)}))}releaseTexture(e,t){let n;if(this.config.reuseTextures&&(n=this.textureLookup.get(e.texture),n)){t&&this.textureLookup.delete(n);const r=this.inUseTextures.get(n);if(r){const t=r.indexOf(e.texture);if(-1!==t){r.splice(t,1);let o=this.idleTextures.get(n);o||(o=[],this.idleTextures.set(n,o)),o.push(e.texture)}}}n&&!t||(r.Logger.verbose("TextureManager",`Deleting texture of size ${e.width}x${e.height}`),this.glContext.deleteTexture(e.texture))}toTensorData(e,t){switch(e){case"int16":return t instanceof Int16Array?t:Int16Array.from(t);case"int32":return t instanceof Int32Array?t:Int32Array.from(t);case"int8":return t instanceof Int8Array?t:Int8Array.from(t);case"uint16":return t instanceof Uint16Array?t:Uint16Array.from(t);case"uint32":return t instanceof Uint32Array?t:Uint32Array.from(t);case"uint8":case"bool":return t instanceof Uint8Array?t:Uint8Array.from(t);case"float32":return t instanceof Float32Array?t:Float32Array.from(t);case"float64":return t instanceof Float64Array?t:Float64Array.from(t);default:throw new Error(`TensorData type ${e} is not supported`)}}toTextureData(e,t){if(t)return t instanceof Float32Array?t:new Float32Array(t)}toEncoderType(e){return"float"}clearActiveTextures(){this.glContext.clearActiveTextures()}}},2039:(e,t)=>{"use strict";var n;Object.defineProperty(t,"__esModule",{value:!0}),t.TextureType=void 0,(n=t.TextureType||(t.TextureType={}))[n.unpacked=0]="unpacked",n[n.unpackedReversed=1]="unpackedReversed",n[n.packed=2]="packed",n[n.downloadUint8AsFloat=3]="downloadUint8AsFloat",n[n.packedLastDimension=4]="packedLastDimension"},9390:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getGlChannels=t.getCoordsDataType=t.getSqueezedParams=t.squeezeInputShape=t.generateShaderFuncNameFromInputSamplerNameAtOutCoords=t.generateShaderFuncNameFromInputSamplerName=t.repeatedTry=t.getPackedShape=void 0;const r=n(2517);t.getPackedShape=function(e){const t=e.length;return e.slice(0,t-1).concat(e[t-1]/4)},t.repeatedTry=async function(e,t=e=>0,n){return new Promise(((r,o)=>{let i=0;const a=()=>{if(e())return void r();i++;const s=t(i);null!=n&&i>=n?o():setTimeout(a,s)};a()}))},t.generateShaderFuncNameFromInputSamplerName=function(e){return(0,r.assert)(void 0!==e&&0!==e.length,(()=>"empty string found for sampler name")),"get"+e.charAt(0).toUpperCase()+e.slice(1)},t.generateShaderFuncNameFromInputSamplerNameAtOutCoords=function(e){return(0,r.assert)(void 0!==e&&0!==e.length,(()=>"empty string found for sampler name")),"get"+e.charAt(0).toUpperCase()+e.slice(1)+"AtOutCoords"},t.squeezeInputShape=function(e,t){let n=JSON.parse(JSON.stringify(e));return n=t,n},t.getSqueezedParams=function(e,t){return t.map((t=>e[t])).join(", ")},t.getCoordsDataType=function(e){if(e<=1)return"int";if(2===e)return"ivec2";if(3===e)return"ivec3";if(4===e)return"ivec4";if(5===e)return"ivec5";if(6===e)return"ivec6";throw Error(`GPU for rank ${e} is not yet supported`)},t.getGlChannels=function(e=6){return["x","y","z","w","u","v"].slice(0,e)}},7305:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.createNewWebGLContext=t.createWebGLContext=void 0;const r=n(6231),o=n(1713),i={};function a(e){const t=function(){if("undefined"==typeof document){if("undefined"==typeof OffscreenCanvas)throw new TypeError("failed to create canvas: OffscreenCanvas is not supported");return new OffscreenCanvas(1,1)}const e=document.createElement("canvas");return e.width=1,e.height=1,e}();let n;const i={alpha:!1,depth:!1,antialias:!1,stencil:!1,preserveDrawingBuffer:!1,premultipliedAlpha:!1,failIfMajorPerformanceCaveat:!1};if((!e||"webgl2"===e)&&(n=t.getContext("webgl2",i),n))try{return new o.WebGLContext(n,2)}catch(e){r.Logger.warning("GlContextFactory",`failed to create WebGLContext using contextId 'webgl2'. Error: ${e}`)}if((!e||"webgl"===e)&&(n=t.getContext("webgl",i)||t.getContext("experimental-webgl",i),n))try{return new o.WebGLContext(n,1)}catch(e){r.Logger.warning("GlContextFactory",`failed to create WebGLContext using contextId 'webgl' or 'experimental-webgl'. Error: ${e}`)}throw new Error("WebGL is not supported")}t.createWebGLContext=function e(t){let n;t&&"webgl2"!==t||!("webgl2"in i)?t&&"webgl"!==t||!("webgl"in i)||(n=i.webgl):n=i.webgl2,n=n||a(t),t=t||1===n.version?"webgl":"webgl2";const r=n.gl;return i[t]=n,r.isContextLost()?(delete i[t],e(t)):(r.disable(r.DEPTH_TEST),r.disable(r.STENCIL_TEST),r.disable(r.BLEND),r.disable(r.DITHER),r.disable(r.POLYGON_OFFSET_FILL),r.disable(r.SAMPLE_COVERAGE),r.enable(r.SCISSOR_TEST),r.enable(r.CULL_FACE),r.cullFace(r.BACK),n)},t.createNewWebGLContext=a},1713:function(e,t,n){"use strict";var r=this&&this.__createBinding||(Object.create?function(e,t,n,r){void 0===r&&(r=n);var o=Object.getOwnPropertyDescriptor(t,n);o&&!("get"in o?!t.__esModule:o.writable||o.configurable)||(o={enumerable:!0,get:function(){return t[n]}}),Object.defineProperty(e,r,o)}:function(e,t,n,r){void 0===r&&(r=n),e[r]=t[n]}),o=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)"default"!==n&&Object.prototype.hasOwnProperty.call(e,n)&&r(t,e,n);return o(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.WebGLContext=t.linearSearchLastTrue=void 0;const a=n(1670),s=i(n(7769)),u=n(9390);function l(e){let t=0;for(;tthis.isTimerResultAvailable(e))),this.getTimerResult(e)}async createAndWaitForFence(){const e=this.createFence(this.gl);return this.pollFence(e)}createFence(e){let t;const n=e,r=n.fenceSync(n.SYNC_GPU_COMMANDS_COMPLETE,0);return e.flush(),t=null===r?()=>!0:()=>{const e=n.clientWaitSync(r,0,0);return e===n.ALREADY_SIGNALED||e===n.CONDITION_SATISFIED},{query:r,isFencePassed:t}}async pollFence(e){return new Promise((t=>{this.addItemToPoll((()=>e.isFencePassed()),(()=>t()))}))}pollItems(){const e=l(this.itemsToPoll.map((e=>e.isDoneFn)));for(let t=0;t<=e;++t){const{resolveFn:e}=this.itemsToPoll[t];e()}this.itemsToPoll=this.itemsToPoll.slice(e+1)}async addItemToPoll(e,t){this.itemsToPoll.push({isDoneFn:e,resolveFn:t}),this.itemsToPoll.length>1||await(0,u.repeatedTry)((()=>(this.pollItems(),0===this.itemsToPoll.length)))}}},1036:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.ExecutionPlan=void 0;const r=n(6231);class o{constructor(e,t){this.op=e,this.node=t}}t.ExecutionPlan=class{constructor(e,t,n){this.graph=e,this.profiler=n,this.initialize(t)}initialize(e){this.profiler.event("session","ExecutionPlan.initialize",(()=>{const t=this.graph.getNodes();if(t.length!==e.length)throw new Error("The size of nodes and OPs do not match.");this._ops=e.map(((e,n)=>new o(e,t[n]))),this.reset(),this._starter=[],this._ops.forEach(((e,t)=>{let n=!0;for(const t of e.node.inputs)if(!this._values[t]&&-1===this.graph.getInputIndices().indexOf(t)){n=!1;break}n&&this._starter.push(t)}))}))}reset(){this._values=this.graph.getValues().map((e=>e.tensor))}async execute(e,t){return this.profiler.event("session","ExecutionPlan.execute",(async()=>{this.reset();const n=e.createInferenceHandler(),o=this.graph.getInputIndices();if(t.length!==o.length)throw new Error(`number of input tensors don't match the number of inputs to the model: actual: ${t.length} expected: ${o.length}`);t.forEach(((e,t)=>{const n=o[t];this._values[n]=e}));const i=this._starter.slice(0),a=this.graph.getValues(),s=this.graph.getNodes();let u=0;for(;uthis._values[e]));if(-1!==o.indexOf(void 0))throw new Error(`unresolved input detected: op: ${t.node}`);const l=o;r.Logger.verbose("ExecPlan",`Runing op:${t.node.name} (${l.map(((e,n)=>`'${t.node.inputs[n]}': ${e.type}[${e.dims.join(",")}]`)).join(", ")})`);const c=await this.profiler.event("node",t.node.name,(async()=>t.op.impl(n,l,t.op.context)));if(c.length!==t.node.outputs.length)throw new Error("the size of output does not match model definition.");c.forEach(((e,n)=>{const r=t.node.outputs[n];if(this._values[r])throw new Error(`output [${r}] already has value: op:${t.node.name}`);this._values[r]=e}));const f=new Set;c.forEach(((e,n)=>{const r=t.node.outputs[n];for(const e of a[r].to){const t=s[e];let n=!0;for(const e of t.inputs)if(!this._values[e]){n=!1;break}n&&f.add(e)}})),i.push(...f)}const l=[];for(let e=0;e{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Graph=void 0;const r=n(1446),o=n(7778),i=n(9395),a=n(9162),s=n(2517);var u=i.onnxruntime.experimental.fbs;t.Graph={from:(e,t)=>new f(e,t)};class l{constructor(e){this._from=void 0,this._to=[],this.tensor=void 0,this.type=void 0,e&&(this.type=s.ProtoUtil.tensorValueTypeFromProto(e.type.tensorType))}get from(){return this._from}get to(){return this._to}}class c{constructor(e,t){e instanceof r.onnx.NodeProto?(this.name=e.name,this.opType=e.opType,this.attributes=new o.Attribute(e.attribute)):e instanceof u.Node&&(this.name=null!=t?t:e.name(),this.opType=e.opType(),this.attributes=new o.Attribute(s.ProtoUtil.tensorAttributesFromORTFormat(e))),this.inputs=[],this.outputs=[],this.executeNode=!0}}class f{constructor(e,t){if(!e)throw new TypeError("graph is empty");this.buildGraph(e),this.transformGraph(t),this.checkIsAcyclic()}getInputIndices(){return this._allInputIndices}getInputNames(){return this._allInputNames}getOutputIndices(){return this._allOutputIndices}getOutputNames(){return this._allOutputNames}getValues(){return this._allData}getNodes(){return this._nodes}buildGraph(e){if(e instanceof r.onnx.GraphProto)this.buildGraphFromOnnxFormat(e);else{if(!(e instanceof u.Graph))throw new TypeError("Graph type is not supported.");this.buildGraphFromOrtFormat(e)}}buildGraphFromOnnxFormat(e){const t=new Map;this._allData=[],this._allInputIndices=[],this._allInputNames=[],this._allOutputIndices=[],this._allOutputNames=[],this._nodes=[];const n=new Map;if(!e.input)throw new Error("missing information in graph: input");const r=[];for(const n of e.input){if(t.has(n.name))throw new Error(`duplicated input name: ${n.name}`);const e=this._allData.push(new l(n))-1;t.set(n.name,e),r.push(n.name)}if(!e.initializer)throw new Error("missing information in graph: initializer");for(const n of e.initializer){let e=t.get(n.name);if(void 0===e){const r=new l;r.type={shape:{dims:s.ProtoUtil.tensorDimsFromProto(n.dims)},tensorType:s.ProtoUtil.tensorDataTypeFromProto(n.dataType)},e=this._allData.push(r)-1,t.set(n.name,e)}this._allData[e]._from=-1,this._allData[e].tensor=a.Tensor.fromProto(n)}for(let e=0;e{this._allData[t]._to.forEach((t=>{e.add(t)}))}));const t=Array.from(e),n=new Array(this._nodes.length).fill("white");for(;t.length>0;){const e=t.pop();"gray"===n[e]?n[e]="black":(t.push(e),n[e]="gray",this._nodes[e].outputs.forEach((r=>{const o=this._allData[r];if(void 0!==o.tensor)throw new Error("node outputs should not be initialized");if(o._from!==e)throw new Error("from property of the Value object doesn't match index of Node being processed");o._to.forEach((e=>{if("gray"===n[e])throw new Error("model graph is cyclic");"white"===n[e]&&t.push(e)}))})))}}transformGraph(e){this.removeAllIdentityNodes(),this.removeAllDropoutNodes(),this.fuseConvActivationNodes(),e&&e.transformGraph(this),this.finalizeGraph()}finalizeGraph(){let e=0;for(let t=0;t0&&(this._nodes[t].inputs.forEach((n=>{const r=this._allData[n]._to.indexOf(t+e);-1!==r&&(this._allData[n]._to[r]=t)})),this._nodes[t].outputs.forEach((n=>{this._allData[n]._from&&this._allData[n]._from===t+e&&(this._allData[n]._from=t)}))):(e++,this._nodes[t].outputs.forEach((e=>{this._allData[e]._from=-2})),this._nodes.splice(t,1),t--);e=0;for(let t=0;t0){let n=-1;void 0!==this._allData[t].from&&-1!==this._allData[t].from?(n=this._nodes[this._allData[t].from].outputs.indexOf(t+e),-1!==n&&(this._nodes[this._allData[t].from].outputs[n]=t)):(n=this._allInputIndices.indexOf(t+e),-1!==n&&(this._allInputIndices[n]=t)),this._allData[t].to.forEach((r=>{n=this._nodes[r].inputs.indexOf(t+e),-1!==n&&(this._nodes[r].inputs[n]=t)})),0===this._allData[t].to.length&&(n=this._allOutputIndices.indexOf(t+e),-1!==n&&(this._allOutputIndices[n]=t))}}else e++,this._allData.splice(t,1),t--}deleteNode(e){const t=this._nodes[e];if(t.outputs.length>1)for(let e=1;e0)throw new Error("Node deletion with more than one output connected to other nodes is not supported. ");t.executeNode=!1;const n=t.inputs[0],r=t.outputs[0],o=this._allData[r].to,i=this._allData[n].to.indexOf(e);if(-1===i)throw new Error("The Value object doesn't have the current Node in it's 'to' property ");this._allData[n].to.splice(i,1),this._allData[r]._to=[];const a=this._allOutputIndices.indexOf(r);if(-1!==a&&(this._allOutputIndices[a]=n),o&&o.length>0)for(const e of o){const t=this._nodes[e].inputs.indexOf(r);if(-1===t)throw new Error("The Node object doesn't have the output Value in it's 'inputs' property ");this._nodes[e].inputs[t]=n,this._allData[n].to.push(e)}}removeAllDropoutNodes(){let e=0;for(const t of this._nodes){if("Dropout"===t.opType){if(1!==t.inputs.length)throw new Error("Dropout nodes should only contain one input. ");if(1!==t.outputs.length&&2!==t.outputs.length)throw new Error("Dropout nodes should contain either 1 or 2 output(s)");if(2===t.outputs.length&&0!==this._allData[t.outputs[1]]._to.length)throw new Error("Dropout nodes's second output should not be referenced by other nodes");this.deleteNode(e)}e++}}removeAllIdentityNodes(){let e=0;for(const t of this._nodes)"Identity"===t.opType&&this.deleteNode(e),e++}isActivation(e){switch(e.opType){case"Relu":case"Sigmoid":case"Clip":return!0;default:return!1}}fuseConvActivationNodes(){for(const e of this._nodes)if("Conv"===e.opType){const t=this._allData[e.outputs[0]]._to;if(1===t.length&&this.isActivation(this._nodes[t[0]])){const n=this._nodes[t[0]];if("Clip"===n.opType)if(1===n.inputs.length)try{e.attributes.set("activation_params","floats",[n.attributes.getFloat("min"),n.attributes.getFloat("max")])}catch(t){e.attributes.set("activation_params","floats",[s.MIN_CLIP,s.MAX_CLIP])}else{if(!(n.inputs.length>=3&&void 0!==this._allData[n.inputs[1]].tensor&&void 0!==this._allData[n.inputs[2]].tensor))continue;e.attributes.set("activation_params","floats",[this._allData[n.inputs[1]].tensor.floatData[0],this._allData[n.inputs[2]].tensor.floatData[0]])}e.attributes.set("activation","string",n.opType),this.deleteNode(t[0])}}}}},6231:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.now=t.Profiler=t.Logger=void 0;const n={verbose:1e3,info:2e3,warning:4e3,error:5e3,fatal:6e3},r={none:new class{log(e,t,n){}},console:new class{log(e,t,n){console.log(`${this.color(e)} ${n?""+n+" ":""}${t}`)}color(e){switch(e){case"verbose":return"v";case"info":return"i";case"warning":return"w";case"error":return"e";case"fatal":return"f";default:throw new Error(`unsupported severity: ${e}`)}}}},o={provider:"console",minimalSeverity:"warning",logDateTime:!0,logSourceLocation:!1};let i={"":o};function a(e,t,n,r){if(void 0===t)return o=e,{verbose:a.verbose.bind(null,o),info:a.info.bind(null,o),warning:a.warning.bind(null,o),error:a.error.bind(null,o),fatal:a.fatal.bind(null,o)};if(void 0===n)s(e,t);else if("number"==typeof n&&void 0===r)s(e,t);else if("string"==typeof n&&void 0===r)s(e,n,0,t);else{if("string"!=typeof n||"number"!=typeof r)throw new TypeError("input is valid");s(e,n,0,t)}var o}function s(e,t,o,a){const s=i[a||""]||i[""];n[e]{a.then((async t=>{o&&await o.end(),e(t)}),(async e=>{o&&await o.end(),t(e)}))}));if(!i&&o){const e=o.end();if(e&&"function"==typeof e.then)return new Promise(((t,n)=>{e.then((()=>{t(a)}),(e=>{n(e)}))}))}return a}begin(e,n,r){if(!this._started)throw new Error("profiler is not started yet");if(void 0===r){const r=(0,t.now)();return this.flush(r),new u(e,n,r,(e=>this.endSync(e)))}{const t=r.beginTimer();return new u(e,n,0,(async e=>this.end(e)),t,r)}}async end(e){const t=await e.checkTimer();this._timingEvents.length=this._flushBatchSize||e-this._flushTime>=this._flushIntervalInMilliseconds){for(const e=this._flushPointer;this._flushPointerperformance.now():Date.now},2644:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Model=void 0;const r=n(5686),o=n(1446),i=n(7070),a=n(9395),s=n(2517);var u=a.onnxruntime.experimental.fbs;t.Model=class{constructor(){}load(e,t,n){if(!n)try{return void this.loadFromOnnxFormat(e,t)}catch(e){if(void 0!==n)throw e}this.loadFromOrtFormat(e,t)}loadFromOnnxFormat(e,t){const n=o.onnx.ModelProto.decode(e);if(s.LongUtil.longToNumber(n.irVersion)<3)throw new Error("only support ONNX model with IR_VERSION>=3");this._opsets=n.opsetImport.map((e=>({domain:e.domain,version:s.LongUtil.longToNumber(e.version)}))),this._graph=i.Graph.from(n.graph,t)}loadFromOrtFormat(e,t){const n=new r.flatbuffers.ByteBuffer(e),o=u.InferenceSession.getRootAsInferenceSession(n).model();if(s.LongUtil.longToNumber(o.irVersion())<3)throw new Error("only support ONNX model with IR_VERSION>=3");this._opsets=[];for(let e=0;e{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.FLOAT_TYPES=t.INT_TYPES=t.NUMBER_TYPES=void 0,t.NUMBER_TYPES=["float32","float64","int32","int16","int8","uint16","uint32","uint8"],t.INT_TYPES=["int32","int16","int8","uint16","uint32","uint8"],t.FLOAT_TYPES=["float32","float64"]},1047:(e,t)=>{"use strict";function n(e,t){if(t.endsWith("+")){const n=Number.parseInt(t.substring(0,t.length-1),10);return!isNaN(n)&&n<=e}if(2===t.split("-").length){const n=t.split("-"),r=Number.parseInt(n[0],10),o=Number.parseInt(n[1],10);return!isNaN(r)&&!isNaN(o)&&r<=e&&e<=o}return Number.parseInt(t,10)===e}Object.defineProperty(t,"__esModule",{value:!0}),t.resolveOperator=void 0,t.resolveOperator=function(e,t,r){for(const o of r){const r=o[0],i=o[1],a=o[2],s=o[3],u=o[4];if(e.opType===r)for(const e of t)if((e.domain===i||"ai.onnx"===e.domain&&""===i)&&n(e.version,a))return{opImpl:s,opInit:u}}throw new TypeError(`cannot resolve operator '${e.opType}' with opsets: ${t.map((e=>`${e.domain||"ai.onnx"} v${e.version}`)).join(", ")}`)}},9395:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.onnxruntime=void 0;const r=n(5686);var o,i;(function(e){let t;!function(e){e[e.UNDEFINED=0]="UNDEFINED",e[e.FLOAT=1]="FLOAT",e[e.INT=2]="INT",e[e.STRING=3]="STRING",e[e.TENSOR=4]="TENSOR",e[e.GRAPH=5]="GRAPH",e[e.FLOATS=6]="FLOATS",e[e.INTS=7]="INTS",e[e.STRINGS=8]="STRINGS",e[e.TENSORS=9]="TENSORS",e[e.GRAPHS=10]="GRAPHS",e[e.SPARSE_TENSOR=11]="SPARSE_TENSOR",e[e.SPARSE_TENSORS=12]="SPARSE_TENSORS"}(t=e.AttributeType||(e.AttributeType={}))})((i=(o=t.onnxruntime||(t.onnxruntime={})).experimental||(o.experimental={})).fbs||(i.fbs={})),function(e){!function(e){!function(e){let t;!function(e){e[e.UNKNOWN=0]="UNKNOWN",e[e.VALUE=1]="VALUE",e[e.PARAM=2]="PARAM"}(t=e.DimensionValueType||(e.DimensionValueType={}))}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){!function(e){let t;!function(e){e[e.UNDEFINED=0]="UNDEFINED",e[e.FLOAT=1]="FLOAT",e[e.UINT8=2]="UINT8",e[e.INT8=3]="INT8",e[e.UINT16=4]="UINT16",e[e.INT16=5]="INT16",e[e.INT32=6]="INT32",e[e.INT64=7]="INT64",e[e.STRING=8]="STRING",e[e.BOOL=9]="BOOL",e[e.FLOAT16=10]="FLOAT16",e[e.DOUBLE=11]="DOUBLE",e[e.UINT32=12]="UINT32",e[e.UINT64=13]="UINT64",e[e.COMPLEX64=14]="COMPLEX64",e[e.COMPLEX128=15]="COMPLEX128",e[e.BFLOAT16=16]="BFLOAT16"}(t=e.TensorDataType||(e.TensorDataType={}))}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){!function(e){let t;!function(e){e[e.Primitive=0]="Primitive",e[e.Fused=1]="Fused"}(t=e.NodeType||(e.NodeType={}))}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){!function(e){let t;!function(e){e[e.NONE=0]="NONE",e[e.tensor_type=1]="tensor_type",e[e.sequence_type=2]="sequence_type",e[e.map_type=3]="map_type"}(t=e.TypeInfoValue||(e.TypeInfoValue={}))}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsShape(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsShape(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}dim(t,n){let r=this.bb.__offset(this.bb_pos,4);return r?(n||new e.experimental.fbs.Dimension).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}dimLength(){let e=this.bb.__offset(this.bb_pos,4);return e?this.bb.__vector_len(this.bb_pos+e):0}static startShape(e){e.startObject(1)}static addDim(e,t){e.addFieldOffset(0,t,0)}static createDimVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startDimVector(e,t){e.startVector(4,t,4)}static endShape(e){return e.endObject()}static createShape(e,t){return n.startShape(e),n.addDim(e,t),n.endShape(e)}}t.Shape=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsDimension(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsDimension(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}value(t){let n=this.bb.__offset(this.bb_pos,4);return n?(t||new e.experimental.fbs.DimensionValue).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}denotation(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.__string(this.bb_pos+t,e):null}static startDimension(e){e.startObject(2)}static addValue(e,t){e.addFieldOffset(0,t,0)}static addDenotation(e,t){e.addFieldOffset(1,t,0)}static endDimension(e){return e.endObject()}static createDimension(e,t,r){return n.startDimension(e),n.addValue(e,t),n.addDenotation(e,r),n.endDimension(e)}}t.Dimension=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsDimensionValue(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsDimensionValue(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}dimType(){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.readInt8(this.bb_pos+t):e.experimental.fbs.DimensionValueType.UNKNOWN}dimValue(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.readInt64(this.bb_pos+e):this.bb.createLong(0,0)}dimParam(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.__string(this.bb_pos+t,e):null}static startDimensionValue(e){e.startObject(3)}static addDimType(t,n){t.addFieldInt8(0,n,e.experimental.fbs.DimensionValueType.UNKNOWN)}static addDimValue(e,t){e.addFieldInt64(1,t,e.createLong(0,0))}static addDimParam(e,t){e.addFieldOffset(2,t,0)}static endDimensionValue(e){return e.endObject()}static createDimensionValue(e,t,r,o){return n.startDimensionValue(e),n.addDimType(e,t),n.addDimValue(e,r),n.addDimParam(e,o),n.endDimensionValue(e)}}t.DimensionValue=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsTensorTypeAndShape(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsTensorTypeAndShape(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}elemType(){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.readInt32(this.bb_pos+t):e.experimental.fbs.TensorDataType.UNDEFINED}shape(t){let n=this.bb.__offset(this.bb_pos,6);return n?(t||new e.experimental.fbs.Shape).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startTensorTypeAndShape(e){e.startObject(2)}static addElemType(t,n){t.addFieldInt32(0,n,e.experimental.fbs.TensorDataType.UNDEFINED)}static addShape(e,t){e.addFieldOffset(1,t,0)}static endTensorTypeAndShape(e){return e.endObject()}static createTensorTypeAndShape(e,t,r){return n.startTensorTypeAndShape(e),n.addElemType(e,t),n.addShape(e,r),n.endTensorTypeAndShape(e)}}t.TensorTypeAndShape=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsMapType(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsMapType(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}keyType(){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.readInt32(this.bb_pos+t):e.experimental.fbs.TensorDataType.UNDEFINED}valueType(t){let n=this.bb.__offset(this.bb_pos,6);return n?(t||new e.experimental.fbs.TypeInfo).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startMapType(e){e.startObject(2)}static addKeyType(t,n){t.addFieldInt32(0,n,e.experimental.fbs.TensorDataType.UNDEFINED)}static addValueType(e,t){e.addFieldOffset(1,t,0)}static endMapType(e){return e.endObject()}static createMapType(e,t,r){return n.startMapType(e),n.addKeyType(e,t),n.addValueType(e,r),n.endMapType(e)}}t.MapType=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsSequenceType(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsSequenceType(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}elemType(t){let n=this.bb.__offset(this.bb_pos,4);return n?(t||new e.experimental.fbs.TypeInfo).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startSequenceType(e){e.startObject(1)}static addElemType(e,t){e.addFieldOffset(0,t,0)}static endSequenceType(e){return e.endObject()}static createSequenceType(e,t){return n.startSequenceType(e),n.addElemType(e,t),n.endSequenceType(e)}}t.SequenceType=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){(e.fbs||(e.fbs={})).EdgeEnd=class{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}nodeIndex(){return this.bb.readUint32(this.bb_pos)}srcArgIndex(){return this.bb.readInt32(this.bb_pos+4)}dstArgIndex(){return this.bb.readInt32(this.bb_pos+8)}static createEdgeEnd(e,t,n,r){return e.prep(4,12),e.writeInt32(r),e.writeInt32(n),e.writeInt32(t),e.offset()}}}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsNodeEdge(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsNodeEdge(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}nodeIndex(){let e=this.bb.__offset(this.bb_pos,4);return e?this.bb.readUint32(this.bb_pos+e):0}inputEdges(t,n){let r=this.bb.__offset(this.bb_pos,6);return r?(n||new e.experimental.fbs.EdgeEnd).__init(this.bb.__vector(this.bb_pos+r)+12*t,this.bb):null}inputEdgesLength(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.__vector_len(this.bb_pos+e):0}outputEdges(t,n){let r=this.bb.__offset(this.bb_pos,8);return r?(n||new e.experimental.fbs.EdgeEnd).__init(this.bb.__vector(this.bb_pos+r)+12*t,this.bb):null}outputEdgesLength(){let e=this.bb.__offset(this.bb_pos,8);return e?this.bb.__vector_len(this.bb_pos+e):0}static startNodeEdge(e){e.startObject(3)}static addNodeIndex(e,t){e.addFieldInt32(0,t,0)}static addInputEdges(e,t){e.addFieldOffset(1,t,0)}static startInputEdgesVector(e,t){e.startVector(12,t,4)}static addOutputEdges(e,t){e.addFieldOffset(2,t,0)}static startOutputEdgesVector(e,t){e.startVector(12,t,4)}static endNodeEdge(e){return e.endObject()}static createNodeEdge(e,t,r,o){return n.startNodeEdge(e),n.addNodeIndex(e,t),n.addInputEdges(e,r),n.addOutputEdges(e,o),n.endNodeEdge(e)}}t.NodeEdge=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsNode(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsNode(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}name(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}docString(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.__string(this.bb_pos+t,e):null}domain(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.__string(this.bb_pos+t,e):null}sinceVersion(){let e=this.bb.__offset(this.bb_pos,10);return e?this.bb.readInt32(this.bb_pos+e):0}index(){let e=this.bb.__offset(this.bb_pos,12);return e?this.bb.readUint32(this.bb_pos+e):0}opType(e){let t=this.bb.__offset(this.bb_pos,14);return t?this.bb.__string(this.bb_pos+t,e):null}type(){let t=this.bb.__offset(this.bb_pos,16);return t?this.bb.readInt32(this.bb_pos+t):e.experimental.fbs.NodeType.Primitive}executionProviderType(e){let t=this.bb.__offset(this.bb_pos,18);return t?this.bb.__string(this.bb_pos+t,e):null}inputs(e,t){let n=this.bb.__offset(this.bb_pos,20);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}inputsLength(){let e=this.bb.__offset(this.bb_pos,20);return e?this.bb.__vector_len(this.bb_pos+e):0}outputs(e,t){let n=this.bb.__offset(this.bb_pos,22);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}outputsLength(){let e=this.bb.__offset(this.bb_pos,22);return e?this.bb.__vector_len(this.bb_pos+e):0}attributes(t,n){let r=this.bb.__offset(this.bb_pos,24);return r?(n||new e.experimental.fbs.Attribute).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}attributesLength(){let e=this.bb.__offset(this.bb_pos,24);return e?this.bb.__vector_len(this.bb_pos+e):0}inputArgCounts(e){let t=this.bb.__offset(this.bb_pos,26);return t?this.bb.readInt32(this.bb.__vector(this.bb_pos+t)+4*e):0}inputArgCountsLength(){let e=this.bb.__offset(this.bb_pos,26);return e?this.bb.__vector_len(this.bb_pos+e):0}inputArgCountsArray(){let e=this.bb.__offset(this.bb_pos,26);return e?new Int32Array(this.bb.bytes().buffer,this.bb.bytes().byteOffset+this.bb.__vector(this.bb_pos+e),this.bb.__vector_len(this.bb_pos+e)):null}implicitInputs(e,t){let n=this.bb.__offset(this.bb_pos,28);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}implicitInputsLength(){let e=this.bb.__offset(this.bb_pos,28);return e?this.bb.__vector_len(this.bb_pos+e):0}static startNode(e){e.startObject(13)}static addName(e,t){e.addFieldOffset(0,t,0)}static addDocString(e,t){e.addFieldOffset(1,t,0)}static addDomain(e,t){e.addFieldOffset(2,t,0)}static addSinceVersion(e,t){e.addFieldInt32(3,t,0)}static addIndex(e,t){e.addFieldInt32(4,t,0)}static addOpType(e,t){e.addFieldOffset(5,t,0)}static addType(t,n){t.addFieldInt32(6,n,e.experimental.fbs.NodeType.Primitive)}static addExecutionProviderType(e,t){e.addFieldOffset(7,t,0)}static addInputs(e,t){e.addFieldOffset(8,t,0)}static createInputsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startInputsVector(e,t){e.startVector(4,t,4)}static addOutputs(e,t){e.addFieldOffset(9,t,0)}static createOutputsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startOutputsVector(e,t){e.startVector(4,t,4)}static addAttributes(e,t){e.addFieldOffset(10,t,0)}static createAttributesVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startAttributesVector(e,t){e.startVector(4,t,4)}static addInputArgCounts(e,t){e.addFieldOffset(11,t,0)}static createInputArgCountsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addInt32(t[n]);return e.endVector()}static startInputArgCountsVector(e,t){e.startVector(4,t,4)}static addImplicitInputs(e,t){e.addFieldOffset(12,t,0)}static createImplicitInputsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startImplicitInputsVector(e,t){e.startVector(4,t,4)}static endNode(e){return e.endObject()}static createNode(e,t,r,o,i,a,s,u,l,c,f,d,p,h){return n.startNode(e),n.addName(e,t),n.addDocString(e,r),n.addDomain(e,o),n.addSinceVersion(e,i),n.addIndex(e,a),n.addOpType(e,s),n.addType(e,u),n.addExecutionProviderType(e,l),n.addInputs(e,c),n.addOutputs(e,f),n.addAttributes(e,d),n.addInputArgCounts(e,p),n.addImplicitInputs(e,h),n.endNode(e)}}t.Node=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsValueInfo(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsValueInfo(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}name(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}docString(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.__string(this.bb_pos+t,e):null}type(t){let n=this.bb.__offset(this.bb_pos,8);return n?(t||new e.experimental.fbs.TypeInfo).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startValueInfo(e){e.startObject(3)}static addName(e,t){e.addFieldOffset(0,t,0)}static addDocString(e,t){e.addFieldOffset(1,t,0)}static addType(e,t){e.addFieldOffset(2,t,0)}static endValueInfo(e){return e.endObject()}static createValueInfo(e,t,r,o){return n.startValueInfo(e),n.addName(e,t),n.addDocString(e,r),n.addType(e,o),n.endValueInfo(e)}}t.ValueInfo=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsTypeInfo(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsTypeInfo(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}denotation(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}valueType(){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.readUint8(this.bb_pos+t):e.experimental.fbs.TypeInfoValue.NONE}value(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.__union(e,this.bb_pos+t):null}static startTypeInfo(e){e.startObject(3)}static addDenotation(e,t){e.addFieldOffset(0,t,0)}static addValueType(t,n){t.addFieldInt8(1,n,e.experimental.fbs.TypeInfoValue.NONE)}static addValue(e,t){e.addFieldOffset(2,t,0)}static endTypeInfo(e){return e.endObject()}static createTypeInfo(e,t,r,o){return n.startTypeInfo(e),n.addDenotation(e,t),n.addValueType(e,r),n.addValue(e,o),n.endTypeInfo(e)}}t.TypeInfo=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){!function(e){class t{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsOperatorSetId(e,n){return(n||new t).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsOperatorSetId(e,n){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(n||new t).__init(e.readInt32(e.position())+e.position(),e)}domain(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}version(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.readInt64(this.bb_pos+e):this.bb.createLong(0,0)}static startOperatorSetId(e){e.startObject(2)}static addDomain(e,t){e.addFieldOffset(0,t,0)}static addVersion(e,t){e.addFieldInt64(1,t,e.createLong(0,0))}static endOperatorSetId(e){return e.endObject()}static createOperatorSetId(e,n,r){return t.startOperatorSetId(e),t.addDomain(e,n),t.addVersion(e,r),t.endOperatorSetId(e)}}e.OperatorSetId=t}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsTensor(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsTensor(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}name(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}docString(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.__string(this.bb_pos+t,e):null}dims(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.readInt64(this.bb.__vector(this.bb_pos+t)+8*e):this.bb.createLong(0,0)}dimsLength(){let e=this.bb.__offset(this.bb_pos,8);return e?this.bb.__vector_len(this.bb_pos+e):0}dataType(){let t=this.bb.__offset(this.bb_pos,10);return t?this.bb.readInt32(this.bb_pos+t):e.experimental.fbs.TensorDataType.UNDEFINED}rawData(e){let t=this.bb.__offset(this.bb_pos,12);return t?this.bb.readUint8(this.bb.__vector(this.bb_pos+t)+e):0}rawDataLength(){let e=this.bb.__offset(this.bb_pos,12);return e?this.bb.__vector_len(this.bb_pos+e):0}rawDataArray(){let e=this.bb.__offset(this.bb_pos,12);return e?new Uint8Array(this.bb.bytes().buffer,this.bb.bytes().byteOffset+this.bb.__vector(this.bb_pos+e),this.bb.__vector_len(this.bb_pos+e)):null}stringData(e,t){let n=this.bb.__offset(this.bb_pos,14);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}stringDataLength(){let e=this.bb.__offset(this.bb_pos,14);return e?this.bb.__vector_len(this.bb_pos+e):0}static startTensor(e){e.startObject(6)}static addName(e,t){e.addFieldOffset(0,t,0)}static addDocString(e,t){e.addFieldOffset(1,t,0)}static addDims(e,t){e.addFieldOffset(2,t,0)}static createDimsVector(e,t){e.startVector(8,t.length,8);for(let n=t.length-1;n>=0;n--)e.addInt64(t[n]);return e.endVector()}static startDimsVector(e,t){e.startVector(8,t,8)}static addDataType(t,n){t.addFieldInt32(3,n,e.experimental.fbs.TensorDataType.UNDEFINED)}static addRawData(e,t){e.addFieldOffset(4,t,0)}static createRawDataVector(e,t){e.startVector(1,t.length,1);for(let n=t.length-1;n>=0;n--)e.addInt8(t[n]);return e.endVector()}static startRawDataVector(e,t){e.startVector(1,t,1)}static addStringData(e,t){e.addFieldOffset(5,t,0)}static createStringDataVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startStringDataVector(e,t){e.startVector(4,t,4)}static endTensor(e){return e.endObject()}static createTensor(e,t,r,o,i,a,s){return n.startTensor(e),n.addName(e,t),n.addDocString(e,r),n.addDims(e,o),n.addDataType(e,i),n.addRawData(e,a),n.addStringData(e,s),n.endTensor(e)}}t.Tensor=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsSparseTensor(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsSparseTensor(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}values(t){let n=this.bb.__offset(this.bb_pos,4);return n?(t||new e.experimental.fbs.Tensor).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}indices(t){let n=this.bb.__offset(this.bb_pos,6);return n?(t||new e.experimental.fbs.Tensor).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}dims(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.readInt64(this.bb.__vector(this.bb_pos+t)+8*e):this.bb.createLong(0,0)}dimsLength(){let e=this.bb.__offset(this.bb_pos,8);return e?this.bb.__vector_len(this.bb_pos+e):0}static startSparseTensor(e){e.startObject(3)}static addValues(e,t){e.addFieldOffset(0,t,0)}static addIndices(e,t){e.addFieldOffset(1,t,0)}static addDims(e,t){e.addFieldOffset(2,t,0)}static createDimsVector(e,t){e.startVector(8,t.length,8);for(let n=t.length-1;n>=0;n--)e.addInt64(t[n]);return e.endVector()}static startDimsVector(e,t){e.startVector(8,t,8)}static endSparseTensor(e){return e.endObject()}static createSparseTensor(e,t,r,o){return n.startSparseTensor(e),n.addValues(e,t),n.addIndices(e,r),n.addDims(e,o),n.endSparseTensor(e)}}t.SparseTensor=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsAttribute(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsAttribute(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}name(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}docString(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.__string(this.bb_pos+t,e):null}type(){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.readInt32(this.bb_pos+t):e.experimental.fbs.AttributeType.UNDEFINED}f(){let e=this.bb.__offset(this.bb_pos,10);return e?this.bb.readFloat32(this.bb_pos+e):0}i(){let e=this.bb.__offset(this.bb_pos,12);return e?this.bb.readInt64(this.bb_pos+e):this.bb.createLong(0,0)}s(e){let t=this.bb.__offset(this.bb_pos,14);return t?this.bb.__string(this.bb_pos+t,e):null}t(t){let n=this.bb.__offset(this.bb_pos,16);return n?(t||new e.experimental.fbs.Tensor).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}g(t){let n=this.bb.__offset(this.bb_pos,18);return n?(t||new e.experimental.fbs.Graph).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}floats(e){let t=this.bb.__offset(this.bb_pos,20);return t?this.bb.readFloat32(this.bb.__vector(this.bb_pos+t)+4*e):0}floatsLength(){let e=this.bb.__offset(this.bb_pos,20);return e?this.bb.__vector_len(this.bb_pos+e):0}floatsArray(){let e=this.bb.__offset(this.bb_pos,20);return e?new Float32Array(this.bb.bytes().buffer,this.bb.bytes().byteOffset+this.bb.__vector(this.bb_pos+e),this.bb.__vector_len(this.bb_pos+e)):null}ints(e){let t=this.bb.__offset(this.bb_pos,22);return t?this.bb.readInt64(this.bb.__vector(this.bb_pos+t)+8*e):this.bb.createLong(0,0)}intsLength(){let e=this.bb.__offset(this.bb_pos,22);return e?this.bb.__vector_len(this.bb_pos+e):0}strings(e,t){let n=this.bb.__offset(this.bb_pos,24);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}stringsLength(){let e=this.bb.__offset(this.bb_pos,24);return e?this.bb.__vector_len(this.bb_pos+e):0}tensors(t,n){let r=this.bb.__offset(this.bb_pos,26);return r?(n||new e.experimental.fbs.Tensor).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}tensorsLength(){let e=this.bb.__offset(this.bb_pos,26);return e?this.bb.__vector_len(this.bb_pos+e):0}graphs(t,n){let r=this.bb.__offset(this.bb_pos,28);return r?(n||new e.experimental.fbs.Graph).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}graphsLength(){let e=this.bb.__offset(this.bb_pos,28);return e?this.bb.__vector_len(this.bb_pos+e):0}static startAttribute(e){e.startObject(13)}static addName(e,t){e.addFieldOffset(0,t,0)}static addDocString(e,t){e.addFieldOffset(1,t,0)}static addType(t,n){t.addFieldInt32(2,n,e.experimental.fbs.AttributeType.UNDEFINED)}static addF(e,t){e.addFieldFloat32(3,t,0)}static addI(e,t){e.addFieldInt64(4,t,e.createLong(0,0))}static addS(e,t){e.addFieldOffset(5,t,0)}static addT(e,t){e.addFieldOffset(6,t,0)}static addG(e,t){e.addFieldOffset(7,t,0)}static addFloats(e,t){e.addFieldOffset(8,t,0)}static createFloatsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addFloat32(t[n]);return e.endVector()}static startFloatsVector(e,t){e.startVector(4,t,4)}static addInts(e,t){e.addFieldOffset(9,t,0)}static createIntsVector(e,t){e.startVector(8,t.length,8);for(let n=t.length-1;n>=0;n--)e.addInt64(t[n]);return e.endVector()}static startIntsVector(e,t){e.startVector(8,t,8)}static addStrings(e,t){e.addFieldOffset(10,t,0)}static createStringsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startStringsVector(e,t){e.startVector(4,t,4)}static addTensors(e,t){e.addFieldOffset(11,t,0)}static createTensorsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startTensorsVector(e,t){e.startVector(4,t,4)}static addGraphs(e,t){e.addFieldOffset(12,t,0)}static createGraphsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startGraphsVector(e,t){e.startVector(4,t,4)}static endAttribute(e){return e.endObject()}static createAttribute(e,t,r,o,i,a,s,u,l,c,f,d,p,h){return n.startAttribute(e),n.addName(e,t),n.addDocString(e,r),n.addType(e,o),n.addF(e,i),n.addI(e,a),n.addS(e,s),n.addT(e,u),n.addG(e,l),n.addFloats(e,c),n.addInts(e,f),n.addStrings(e,d),n.addTensors(e,p),n.addGraphs(e,h),n.endAttribute(e)}}t.Attribute=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsGraph(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsGraph(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}initializers(t,n){let r=this.bb.__offset(this.bb_pos,4);return r?(n||new e.experimental.fbs.Tensor).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}initializersLength(){let e=this.bb.__offset(this.bb_pos,4);return e?this.bb.__vector_len(this.bb_pos+e):0}nodeArgs(t,n){let r=this.bb.__offset(this.bb_pos,6);return r?(n||new e.experimental.fbs.ValueInfo).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}nodeArgsLength(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.__vector_len(this.bb_pos+e):0}nodes(t,n){let r=this.bb.__offset(this.bb_pos,8);return r?(n||new e.experimental.fbs.Node).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}nodesLength(){let e=this.bb.__offset(this.bb_pos,8);return e?this.bb.__vector_len(this.bb_pos+e):0}maxNodeIndex(){let e=this.bb.__offset(this.bb_pos,10);return e?this.bb.readUint32(this.bb_pos+e):0}nodeEdges(t,n){let r=this.bb.__offset(this.bb_pos,12);return r?(n||new e.experimental.fbs.NodeEdge).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}nodeEdgesLength(){let e=this.bb.__offset(this.bb_pos,12);return e?this.bb.__vector_len(this.bb_pos+e):0}inputs(e,t){let n=this.bb.__offset(this.bb_pos,14);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}inputsLength(){let e=this.bb.__offset(this.bb_pos,14);return e?this.bb.__vector_len(this.bb_pos+e):0}outputs(e,t){let n=this.bb.__offset(this.bb_pos,16);return n?this.bb.__string(this.bb.__vector(this.bb_pos+n)+4*e,t):null}outputsLength(){let e=this.bb.__offset(this.bb_pos,16);return e?this.bb.__vector_len(this.bb_pos+e):0}sparseInitializers(t,n){let r=this.bb.__offset(this.bb_pos,18);return r?(n||new e.experimental.fbs.SparseTensor).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}sparseInitializersLength(){let e=this.bb.__offset(this.bb_pos,18);return e?this.bb.__vector_len(this.bb_pos+e):0}static startGraph(e){e.startObject(8)}static addInitializers(e,t){e.addFieldOffset(0,t,0)}static createInitializersVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startInitializersVector(e,t){e.startVector(4,t,4)}static addNodeArgs(e,t){e.addFieldOffset(1,t,0)}static createNodeArgsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startNodeArgsVector(e,t){e.startVector(4,t,4)}static addNodes(e,t){e.addFieldOffset(2,t,0)}static createNodesVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startNodesVector(e,t){e.startVector(4,t,4)}static addMaxNodeIndex(e,t){e.addFieldInt32(3,t,0)}static addNodeEdges(e,t){e.addFieldOffset(4,t,0)}static createNodeEdgesVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startNodeEdgesVector(e,t){e.startVector(4,t,4)}static addInputs(e,t){e.addFieldOffset(5,t,0)}static createInputsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startInputsVector(e,t){e.startVector(4,t,4)}static addOutputs(e,t){e.addFieldOffset(6,t,0)}static createOutputsVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startOutputsVector(e,t){e.startVector(4,t,4)}static addSparseInitializers(e,t){e.addFieldOffset(7,t,0)}static createSparseInitializersVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startSparseInitializersVector(e,t){e.startVector(4,t,4)}static endGraph(e){return e.endObject()}static createGraph(e,t,r,o,i,a,s,u,l){return n.startGraph(e),n.addInitializers(e,t),n.addNodeArgs(e,r),n.addNodes(e,o),n.addMaxNodeIndex(e,i),n.addNodeEdges(e,a),n.addInputs(e,s),n.addOutputs(e,u),n.addSparseInitializers(e,l),n.endGraph(e)}}t.Graph=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsModel(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsModel(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}irVersion(){let e=this.bb.__offset(this.bb_pos,4);return e?this.bb.readInt64(this.bb_pos+e):this.bb.createLong(0,0)}opsetImport(t,n){let r=this.bb.__offset(this.bb_pos,6);return r?(n||new e.experimental.fbs.OperatorSetId).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}opsetImportLength(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.__vector_len(this.bb_pos+e):0}producerName(e){let t=this.bb.__offset(this.bb_pos,8);return t?this.bb.__string(this.bb_pos+t,e):null}producerVersion(e){let t=this.bb.__offset(this.bb_pos,10);return t?this.bb.__string(this.bb_pos+t,e):null}domain(e){let t=this.bb.__offset(this.bb_pos,12);return t?this.bb.__string(this.bb_pos+t,e):null}modelVersion(){let e=this.bb.__offset(this.bb_pos,14);return e?this.bb.readInt64(this.bb_pos+e):this.bb.createLong(0,0)}docString(e){let t=this.bb.__offset(this.bb_pos,16);return t?this.bb.__string(this.bb_pos+t,e):null}graph(t){let n=this.bb.__offset(this.bb_pos,18);return n?(t||new e.experimental.fbs.Graph).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}graphDocString(e){let t=this.bb.__offset(this.bb_pos,20);return t?this.bb.__string(this.bb_pos+t,e):null}static startModel(e){e.startObject(9)}static addIrVersion(e,t){e.addFieldInt64(0,t,e.createLong(0,0))}static addOpsetImport(e,t){e.addFieldOffset(1,t,0)}static createOpsetImportVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startOpsetImportVector(e,t){e.startVector(4,t,4)}static addProducerName(e,t){e.addFieldOffset(2,t,0)}static addProducerVersion(e,t){e.addFieldOffset(3,t,0)}static addDomain(e,t){e.addFieldOffset(4,t,0)}static addModelVersion(e,t){e.addFieldInt64(5,t,e.createLong(0,0))}static addDocString(e,t){e.addFieldOffset(6,t,0)}static addGraph(e,t){e.addFieldOffset(7,t,0)}static addGraphDocString(e,t){e.addFieldOffset(8,t,0)}static endModel(e){return e.endObject()}static createModel(e,t,r,o,i,a,s,u,l,c){return n.startModel(e),n.addIrVersion(e,t),n.addOpsetImport(e,r),n.addProducerName(e,o),n.addProducerVersion(e,i),n.addDomain(e,a),n.addModelVersion(e,s),n.addDocString(e,u),n.addGraph(e,l),n.addGraphDocString(e,c),n.endModel(e)}}t.Model=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(e){!function(e){class t{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsKernelCreateInfos(e,n){return(n||new t).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsKernelCreateInfos(e,n){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(n||new t).__init(e.readInt32(e.position())+e.position(),e)}nodeIndices(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.readUint32(this.bb.__vector(this.bb_pos+t)+4*e):0}nodeIndicesLength(){let e=this.bb.__offset(this.bb_pos,4);return e?this.bb.__vector_len(this.bb_pos+e):0}nodeIndicesArray(){let e=this.bb.__offset(this.bb_pos,4);return e?new Uint32Array(this.bb.bytes().buffer,this.bb.bytes().byteOffset+this.bb.__vector(this.bb_pos+e),this.bb.__vector_len(this.bb_pos+e)):null}kernelDefHashes(e){let t=this.bb.__offset(this.bb_pos,6);return t?this.bb.readUint64(this.bb.__vector(this.bb_pos+t)+8*e):this.bb.createLong(0,0)}kernelDefHashesLength(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.__vector_len(this.bb_pos+e):0}static startKernelCreateInfos(e){e.startObject(2)}static addNodeIndices(e,t){e.addFieldOffset(0,t,0)}static createNodeIndicesVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addInt32(t[n]);return e.endVector()}static startNodeIndicesVector(e,t){e.startVector(4,t,4)}static addKernelDefHashes(e,t){e.addFieldOffset(1,t,0)}static createKernelDefHashesVector(e,t){e.startVector(8,t.length,8);for(let n=t.length-1;n>=0;n--)e.addInt64(t[n]);return e.endVector()}static startKernelDefHashesVector(e,t){e.startVector(8,t,8)}static endKernelCreateInfos(e){return e.endObject()}static createKernelCreateInfos(e,n,r){return t.startKernelCreateInfos(e),t.addNodeIndices(e,n),t.addKernelDefHashes(e,r),t.endKernelCreateInfos(e)}}e.KernelCreateInfos=t}(e.fbs||(e.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsSubGraphSessionState(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsSubGraphSessionState(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}graphId(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}sessionState(t){let n=this.bb.__offset(this.bb_pos,6);return n?(t||new e.experimental.fbs.SessionState).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startSubGraphSessionState(e){e.startObject(2)}static addGraphId(e,t){e.addFieldOffset(0,t,0)}static addSessionState(e,t){e.addFieldOffset(1,t,0)}static endSubGraphSessionState(e){let t=e.endObject();return e.requiredField(t,4),t}static createSubGraphSessionState(e,t,r){return n.startSubGraphSessionState(e),n.addGraphId(e,t),n.addSessionState(e,r),n.endSubGraphSessionState(e)}}t.SubGraphSessionState=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsSessionState(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsSessionState(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}kernels(t){let n=this.bb.__offset(this.bb_pos,4);return n?(t||new e.experimental.fbs.KernelCreateInfos).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}subGraphSessionStates(t,n){let r=this.bb.__offset(this.bb_pos,6);return r?(n||new e.experimental.fbs.SubGraphSessionState).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos+r)+4*t),this.bb):null}subGraphSessionStatesLength(){let e=this.bb.__offset(this.bb_pos,6);return e?this.bb.__vector_len(this.bb_pos+e):0}static startSessionState(e){e.startObject(2)}static addKernels(e,t){e.addFieldOffset(0,t,0)}static addSubGraphSessionStates(e,t){e.addFieldOffset(1,t,0)}static createSubGraphSessionStatesVector(e,t){e.startVector(4,t.length,4);for(let n=t.length-1;n>=0;n--)e.addOffset(t[n]);return e.endVector()}static startSubGraphSessionStatesVector(e,t){e.startVector(4,t,4)}static endSessionState(e){return e.endObject()}static createSessionState(e,t,r){return n.startSessionState(e),n.addKernels(e,t),n.addSubGraphSessionStates(e,r),n.endSessionState(e)}}t.SessionState=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={})),function(e){!function(t){!function(t){class n{constructor(){this.bb=null,this.bb_pos=0}__init(e,t){return this.bb_pos=e,this.bb=t,this}static getRootAsInferenceSession(e,t){return(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static getSizePrefixedRootAsInferenceSession(e,t){return e.setPosition(e.position()+r.flatbuffers.SIZE_PREFIX_LENGTH),(t||new n).__init(e.readInt32(e.position())+e.position(),e)}static bufferHasIdentifier(e){return e.__has_identifier("ORTM")}ortVersion(e){let t=this.bb.__offset(this.bb_pos,4);return t?this.bb.__string(this.bb_pos+t,e):null}model(t){let n=this.bb.__offset(this.bb_pos,6);return n?(t||new e.experimental.fbs.Model).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}sessionState(t){let n=this.bb.__offset(this.bb_pos,8);return n?(t||new e.experimental.fbs.SessionState).__init(this.bb.__indirect(this.bb_pos+n),this.bb):null}static startInferenceSession(e){e.startObject(3)}static addOrtVersion(e,t){e.addFieldOffset(0,t,0)}static addModel(e,t){e.addFieldOffset(1,t,0)}static addSessionState(e,t){e.addFieldOffset(2,t,0)}static endInferenceSession(e){return e.endObject()}static finishInferenceSessionBuffer(e,t){e.finish(t,"ORTM")}static finishSizePrefixedInferenceSessionBuffer(e,t){e.finish(t,"ORTM",!0)}static createInferenceSession(e,t,r,o){return n.startInferenceSession(e),n.addOrtVersion(e,t),n.addModel(e,r),n.addSessionState(e,o),n.endInferenceSession(e)}}t.InferenceSession=n}(t.fbs||(t.fbs={}))}(e.experimental||(e.experimental={}))}(t.onnxruntime||(t.onnxruntime={}))},7448:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.OnnxjsSessionHandler=void 0;const r=n(1670),o=n(9162);t.OnnxjsSessionHandler=class{constructor(e){this.session=e,this.inputNames=this.session.inputNames,this.outputNames=this.session.outputNames}async dispose(){}async run(e,t,n){const i=new Map;for(const t in e)if(Object.hasOwnProperty.call(e,t)){const n=e[t];i.set(t,new o.Tensor(n.dims,n.type,void 0,void 0,n.data))}const a=await this.session.run(i),s={};return a.forEach(((e,t)=>{s[t]=new r.Tensor(e.type,e.data,e.dims)})),s}startProfiling(){this.session.startProfiling()}endProfiling(){this.session.endProfiling()}}},6919:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Session=void 0;const r=n(7067),o=n(1296),i=n(7091),a=n(1036),s=n(6231),u=n(2644);t.Session=class{constructor(e={}){this._initialized=!1,this.backendHint=e.backendHint,this.profiler=s.Profiler.create(e.profiler),this.context={profiler:this.profiler,graphInputTypes:[],graphInputDims:[]}}get inputNames(){return this._model.graph.getInputNames()}get outputNames(){return this._model.graph.getOutputNames()}startProfiling(){this.profiler.start()}endProfiling(){this.profiler.stop()}async loadModel(e,t,n){await this.profiler.event("session","Session.loadModel",(async()=>{const a=await(0,i.resolveBackend)(this.backendHint);if(this.sessionHandler=a.createSessionHandler(this.context),this._model=new u.Model,"string"==typeof e){const t=e.endsWith(".ort");if("undefined"==typeof fetch){const n=await(0,o.promisify)(r.readFile)(e);this.initialize(n,t)}else{const n=await fetch(e),r=await n.arrayBuffer();this.initialize(new Uint8Array(r),t)}}else if(ArrayBuffer.isView(e))this.initialize(e);else{const r=new Uint8Array(e,t||0,n||e.byteLength);this.initialize(r)}}))}initialize(e,t){if(this._initialized)throw new Error("already initialized");this.profiler.event("session","Session.initialize",(()=>{const n=this.sessionHandler.transformGraph?this.sessionHandler:void 0;this._model.load(e,n,t),this.sessionHandler.onGraphInitialized&&this.sessionHandler.onGraphInitialized(this._model.graph),this.initializeOps(this._model.graph),this._executionPlan=new a.ExecutionPlan(this._model.graph,this._ops,this.profiler)})),this._initialized=!0}async run(e){if(!this._initialized)throw new Error("session not initialized yet");return this.profiler.event("session","Session.run",(async()=>{const t=this.normalizeAndValidateInputs(e),n=await this._executionPlan.execute(this.sessionHandler,t);return this.createOutput(n)}))}normalizeAndValidateInputs(e){const t=this._model.graph.getInputNames();if(Array.isArray(e)){if(e.length!==t.length)throw new Error(`incorrect input array length: expected ${t.length} but got ${e.length}`)}else{if(e.size!==t.length)throw new Error(`incorrect input map size: expected ${t.length} but got ${e.size}`);const n=new Array(e.size);let r=0;for(let o=0;o"string"==typeof e))))throw new TypeError("cache should be a string array");l&&(this.cache=new Array(s))}else{if(void 0!==i){const e=d(t);if(!(i instanceof e))throw new TypeError(`cache should be type ${e.name}`)}if(l){const e=new ArrayBuffer(s*function(e){switch(e){case"bool":case"int8":case"uint8":return 1;case"int16":case"uint16":return 2;case"int32":case"uint32":case"float32":return 4;case"float64":return 8;default:throw new Error(`cannot calculate sizeof() on type ${e}`)}}(t));this.cache=function(e,t){return new(d(t))(e)}(e,t)}}}static fromProto(e){if(!e)throw new Error("cannot construct Value from an empty tensor");const t=u.ProtoUtil.tensorDataTypeFromProto(e.dataType),n=u.ProtoUtil.tensorDimsFromProto(e.dims),r=new c(n,t);if("string"===t)e.stringData.forEach(((e,t)=>{r.data[t]=(0,u.decodeUtf8String)(e)}));else if(e.rawData&&"number"==typeof e.rawData.byteLength&&e.rawData.byteLength>0){const t=r.data,n=new DataView(e.rawData.buffer,e.rawData.byteOffset,e.rawData.byteLength),o=f(e.dataType),i=e.rawData.byteLength/o;if(e.rawData.byteLength%o!=0)throw new Error("invalid buffer length");if(t.length!==i)throw new Error("buffer length mismatch");for(let r=0;r0){const t=r.data,n=new DataView(e.rawDataArray().buffer,e.rawDataArray().byteOffset,e.rawDataLength()),o=f(e.dataType()),i=e.rawDataLength()/o;if(e.rawDataLength()%o!=0)throw new Error("invalid buffer length");if(t.length!==i)throw new Error("buffer length mismatch");for(let r=0;r1&&u>1)return;a[i-s]=Math.max(n,u)}return a}static index(e,t){const n=new Array(t.length);return l.fillIndex(e,t,n),n}static fillIndex(e,t,n){const r=e.length-t.length;for(let o=0;o=0;e--)r[e]=c%i[e],c=Math.floor(c/i[e]);p||(l.fillIndex(r,e.dims,o),f=e.get(o)),h||(l.fillIndex(r,t.dims,s),d=t.get(s)),u.set(r,n(f,d))}}return u}}static isValidBroadcast(e,t){const n=e.length,r=t.length;if(n>r)return!1;for(let o=1;o<=n;o++)if(1!==e[n-o]&&e[n-o]!==t[r-o])return!1;return!0}static getBroadcastDims(e,t){const n=e.length,r=[];for(let o=0;o1&&1===a&&r.unshift(i)}return r}}t.BroadcastUtil=l,t.arrayCopyHelper=function(e,t,n,r,o){if(r<0||r>=t.length)throw new Error("sourceIndex out of bounds");if(n<0||n>=e.length)throw new Error("targetIndex out of bounds");if(r+o>t.length)throw new Error("source indices to be copied are outside bounds");if(n+o>e.length)throw new Error("target array is too small to hold result");for(let i=0;ii.default.isLong(e)?e.toNumber():e))}static tensorValueTypeFromProto(e){return{tensorType:c.tensorDataTypeFromProto(e.elemType),shape:{dims:c.tensorDimsFromProto(e.shape.dim.map((e=>e.dimValue)))}}}static tensorDimsFromORTFormat(e){const t=[];for(let n=0;ne.length)throw new Error(`invalid dimension of ${t} for sizeFromDimension as Tensor has ${e.length} dimensions.`);return d.getSizeFromDimensionRange(e,t,e.length)}static sizeToDimension(e,t){if(t<0||t>e.length)throw new Error(`invalid dimension of ${t} for sizeToDimension as Tensor has ${e.length} dimensions.`);return d.getSizeFromDimensionRange(e,0,t)}static getSizeFromDimensionRange(e,t,n){let r=1;for(let o=t;o=0;--r)n[r]=n[r+1]*e[r+1];return n}static transpose(e){return e.slice().reverse()}static indicesToOffset(e,t,n){void 0===n&&(n=e.length);let r=0;for(let o=0;o=t)throw new Error("unsupported axis for this operation.");return e<0?e+t:e}static normalizeAxes(e,t){return e.map((e=>this.normalizeAxis(e,t)))}static incrementIndex(e,t,n){if(0===t.length||0===e.length)throw new Error("Index incrementing unsupported for scalar Tensor");if(void 0===n)n=t.length;else if(n<=0||n>t.length)throw new Error("Incorrect axis to increment on");for(let r=n-1;r>=0&&(e[r]++,!(e[r]=e.length)throw new Error("the dimension with value zero exceeds the dimension size of the input tensor");r[a]=e[a]}else r[a]=t[a];i*=r[a]}}const a=d.size(e);if(-1!==o){if(a%i!=0)throw new Error(`the input tensor cannot be reshaped to the requested shape. Input shape: [${e}] Output shape: [${t}]`);r[o]=a/i}else if(i!==a)throw new Error("reshapedDims and originalDims don't have matching sizes");return r}static sortBasedOnPerm(e,t){return t?t.map((t=>e[t])):e.slice().reverse()}static padShape(e,t){const n=e.length;return e.map(((e,r)=>e+t[r]+t[r+n]))}static areEqual(e,t){return e.length===t.length&&e.every(((e,n)=>e===t[n]))}static validateDimsAndCalcSize(e){if(e.length>6)throw new TypeError("Only rank 0 to 6 is supported for tensor shape.");let t=1;for(const n of e){if(!Number.isInteger(n))throw new TypeError(`Invalid shape: ${n} is not an integer`);if(n<0||n>2147483647)throw new TypeError(`Invalid shape: length ${n} is not allowed`);t*=n}return t}static flattenShape(e,t){t<0&&(t+=e.length);const n=e.reduce(((e,t)=>e*t),1),r=e.slice(t).reduce(((e,t)=>e*t),1);return[n/r,r]}static squeezeShape(e,t){const n=new Array;t=d.normalizeAxes(t,e.length);for(let r=0;r=0;if(o&&1!==e[r])throw new Error("squeeze an axis of size different than 1");(0===t.length&&e[r]>1||t.length>0&&!o)&&n.push(e[r])}return n}static unsqueezeShape(e,t){const n=new Array(e.length+t.length);n.fill(0);for(let e=0;e=n.length)throw new Error("'axes' has an out of range axis");if(0!==n[r])throw new Error("'axes' has a duplicate axis");n[r]=1}let r=0;for(let t=0;t=t.length)throw new Error("sourceIndex out of bounds");if(n<0||n>=e.length)throw new Error("targetIndex out of bounds");if(r+o>t.length)throw new Error("source indices to be copied are outside bounds");if(n+o>e.length)throw new Error("target array is too small to hold result");for(let i=0;i=t.length)throw new Error("sourceIndex out of bounds");if(n<0||n>=e.length)throw new Error("targetIndex out of bounds");if(r+o>t.length)throw new Error("source indices to be copied are outside bounds");if(n+o>e.length)throw new Error("target array is too small to hold result");for(let a=0;a=t.length)throw new Error("sourceIndex out of bounds");if(n<0||n>=e.length)throw new Error("targetIndex out of bounds");if(r+o>t.length)throw new Error("source indices to be copied are outside bounds");if(n+o>e.length)throw new Error("target array is too small to hold result");for(let a=0;a=t.length)throw new Error("sourceIndex out of bounds");if(n<0||n>=e.length)throw new Error("targetIndex out of bounds");if(r+o>t.length)throw new Error("source indices to be copied are outside bounds");if(n+o>e.length)throw new Error("target array is too small to hold result");for(let i=0;it.push(n)));const a=h.calcReduceShape(i,t,!0),u=d.size(a),c=new s.Tensor(a,e.type),f=d.computeStrides(a),p=d.computeStrides(i),g=new Array(i.length);for(let n=0;n=t.length)return i(e[o]);const u=t[r],l=u>=n.length?1:d.size(n.slice(u+1));for(let c=0;c0!==e))}}t.ReduceUtil=h;class g{static adjustPoolAttributes(e,t,n,r,o,i){if(!e&&n.length!==t.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(e)for(let e=0;e=n.length?n.push(t[e+2]):n[e]=t[e+2];for(let e=0;e=n[e]||i[e+n.length]>=n[e])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(e,t,n,r,o,i){if(i){if(o.length!==2*(e.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(t.length!==e.length-2)throw new Error("length of strides should be the length of data dimensions");if(r.length!==e.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let a=0;a{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.iterateExtraOptions=void 0,t.iterateExtraOptions=(e,n,r,o)=>{if("object"==typeof e&&null!==e){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach((([e,i])=>{const a=n?n+e:e;if("object"==typeof i)(0,t.iterateExtraOptions)(i,a+".",r,o);else if("string"==typeof i||"number"==typeof i)o(a,i.toString());else{if("boolean"!=typeof i)throw new Error("Can't handle extra config type: "+typeof i);o(a,i?"1":"0")}}))}},2157:function(e,t,n){"use strict";var r,o=this&&this.__createBinding||(Object.create?function(e,t,n,r){void 0===r&&(r=n);var o=Object.getOwnPropertyDescriptor(t,n);o&&!("get"in o?!t.__esModule:o.writable||o.configurable)||(o={enumerable:!0,get:function(){return t[n]}}),Object.defineProperty(e,r,o)}:function(e,t,n,r){void 0===r&&(r=n),e[r]=t[n]}),i=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),a=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)"default"!==n&&Object.prototype.hasOwnProperty.call(e,n)&&o(t,e,n);return i(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.endProfiling=t.run=t.releaseSession=t.createSession=t.createSessionFinalize=t.createSessionAllocate=t.initOrt=t.initWasm=void 0;const s=n(1670),u=a(n(349)),l=n(6361),c=()=>!!s.env.wasm.proxy&&"undefined"!=typeof document;let f,d,p,h=!1,g=!1,m=!1;const b=[],y=[],A=[],v=[],w=[],_=[],x=()=>{if(h||!g||m||!f)throw new Error("worker not ready")},T=e=>{switch(e.data.type){case"init-wasm":h=!1,e.data.err?(m=!0,d[1](e.data.err)):(g=!0,d[0]());break;case"init-ort":e.data.err?p[1](e.data.err):p[0]();break;case"create_allocate":e.data.err?b.shift()[1](e.data.err):b.shift()[0](e.data.out);break;case"create_finalize":e.data.err?y.shift()[1](e.data.err):y.shift()[0](e.data.out);break;case"create":e.data.err?A.shift()[1](e.data.err):A.shift()[0](e.data.out);break;case"release":e.data.err?v.shift()[1](e.data.err):v.shift()[0]();break;case"run":e.data.err?w.shift()[1](e.data.err):w.shift()[0](e.data.out);break;case"end-profiling":e.data.err?_.shift()[1](e.data.err):_.shift()[0]()}},E="undefined"!=typeof document?null===(r=null===document||void 0===document?void 0:document.currentScript)||void 0===r?void 0:r.src:void 0;t.initWasm=async()=>{if(c()){if(g)return;if(h)throw new Error("multiple calls to 'initWasm()' detected.");if(m)throw new Error("previous call to 'initWasm()' failed.");return h=!0,void 0===s.env.wasm.wasmPaths&&E&&0!==E.indexOf("blob:")&&(s.env.wasm.wasmPaths=E.substr(0,+E.lastIndexOf("/")+1)),new Promise(((e,t)=>{null==f||f.terminate(),f=n(9710).Z(),f.onmessage=T,d=[e,t];const r={type:"init-wasm",in:s.env.wasm};f.postMessage(r)}))}return(0,l.initializeWebAssembly)(s.env.wasm)},t.initOrt=async(e,t)=>{if(c())return x(),new Promise(((n,r)=>{p=[n,r];const o={type:"init-ort",in:{numThreads:e,loggingLevel:t}};f.postMessage(o)}));u.initOrt(e,t)},t.createSessionAllocate=async e=>c()?(x(),new Promise(((t,n)=>{b.push([t,n]);const r={type:"create_allocate",in:{model:e}};f.postMessage(r,[e.buffer])}))):u.createSessionAllocate(e),t.createSessionFinalize=async(e,t)=>c()?(x(),new Promise(((n,r)=>{y.push([n,r]);const o={type:"create_finalize",in:{modeldata:e,options:t}};f.postMessage(o)}))):u.createSessionFinalize(e,t),t.createSession=async(e,t)=>c()?(x(),new Promise(((n,r)=>{A.push([n,r]);const o={type:"create",in:{model:e,options:t}};f.postMessage(o,[e.buffer])}))):u.createSession(e,t),t.releaseSession=async e=>{if(c())return x(),new Promise(((t,n)=>{v.push([t,n]);const r={type:"release",in:e};f.postMessage(r)}));u.releaseSession(e)},t.run=async(e,t,n,r,o)=>c()?(x(),new Promise(((i,a)=>{w.push([i,a]);const s={type:"run",in:{sessionId:e,inputIndices:t,inputs:n,outputIndices:r,options:o}};f.postMessage(s,u.extractTransferableBuffers(n))}))):u.run(e,t,n,r,o),t.endProfiling=async e=>{if(c())return x(),new Promise(((t,n)=>{_.push([t,n]);const r={type:"end-profiling",in:e};f.postMessage(r)}));u.endProfiling(e)}},586:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.setRunOptions=void 0;const r=n(7967),o=n(4983),i=n(6361);t.setRunOptions=e=>{const t=(0,i.getInstance)();let n=0;const a=[],s=e||{};try{if(void 0===(null==e?void 0:e.logSeverityLevel))s.logSeverityLevel=2;else if("number"!=typeof e.logSeverityLevel||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(void 0===(null==e?void 0:e.logVerbosityLevel))s.logVerbosityLevel=0;else if("number"!=typeof e.logVerbosityLevel||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);void 0===(null==e?void 0:e.terminate)&&(s.terminate=!1);let i=0;if(void 0!==(null==e?void 0:e.tag)&&(i=(0,o.allocWasmString)(e.tag,a)),n=t._OrtCreateRunOptions(s.logSeverityLevel,s.logVerbosityLevel,!!s.terminate,i),0===n)throw new Error("Can't create run options");return void 0!==(null==e?void 0:e.extra)&&(0,r.iterateExtraOptions)(e.extra,"",new WeakSet,((e,r)=>{const i=(0,o.allocWasmString)(e,a),s=(0,o.allocWasmString)(r,a);if(0!==t._OrtAddRunConfigEntry(n,i,s))throw new Error(`Can't set a run config entry: ${e} - ${r}`)})),[n,a]}catch(e){throw 0!==n&&t._OrtReleaseRunOptions(n),a.forEach(t._free),e}}},2306:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.OnnxruntimeWebAssemblySessionHandler=void 0;const r=n(2806),o=n(1670),i=n(2850),a=n(2157);let s;t.OnnxruntimeWebAssemblySessionHandler=class{async createSessionAllocate(e){const t=await fetch(e),n=await t.arrayBuffer();return(0,a.createSessionAllocate)(new Uint8Array(n))}async loadModel(e,t){if(s||(await(0,a.initOrt)(o.env.wasm.numThreads,(e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}})(o.env.logLevel)),s=!0),"string"==typeof e)if("undefined"==typeof fetch){const n=await(0,i.promisify)(r.readFile)(e);[this.sessionId,this.inputNames,this.outputNames]=await(0,a.createSession)(n,t)}else{const n=await this.createSessionAllocate(e);[this.sessionId,this.inputNames,this.outputNames]=await(0,a.createSessionFinalize)(n,t)}else[this.sessionId,this.inputNames,this.outputNames]=await(0,a.createSession)(e,t)}async dispose(){return(0,a.releaseSession)(this.sessionId)}async run(e,t,n){const r=[],i=[];Object.entries(e).forEach((e=>{const t=e[0],n=e[1],o=this.inputNames.indexOf(t);if(-1===o)throw new Error(`invalid input '${t}'`);r.push(n),i.push(o)}));const s=[];Object.entries(t).forEach((e=>{const t=e[0],n=this.outputNames.indexOf(t);if(-1===n)throw new Error(`invalid output '${t}'`);s.push(n)}));const u=await(0,a.run)(this.sessionId,i,r.map((e=>[e.type,e.dims,e.data])),s,n),l={};for(let e=0;e{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.setSessionOptions=void 0;const r=n(7967),o=n(4983),i=n(6361);t.setSessionOptions=e=>{const t=(0,i.getInstance)();let n=0;const a=[],s=e||{};(e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});const t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1")})(s);try{void 0===(null==e?void 0:e.graphOptimizationLevel)&&(s.graphOptimizationLevel="all");const u=(e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}})(s.graphOptimizationLevel);void 0===(null==e?void 0:e.enableCpuMemArena)&&(s.enableCpuMemArena=!0),void 0===(null==e?void 0:e.enableMemPattern)&&(s.enableMemPattern=!0),void 0===(null==e?void 0:e.executionMode)&&(s.executionMode="sequential");const l=(e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}})(s.executionMode);let c=0;if(void 0!==(null==e?void 0:e.logId)&&(c=(0,o.allocWasmString)(e.logId,a)),void 0===(null==e?void 0:e.logSeverityLevel))s.logSeverityLevel=2;else if("number"!=typeof e.logSeverityLevel||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(void 0===(null==e?void 0:e.logVerbosityLevel))s.logVerbosityLevel=0;else if("number"!=typeof e.logVerbosityLevel||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);if(void 0===(null==e?void 0:e.enableProfiling)&&(s.enableProfiling=!1),n=t._OrtCreateSessionOptions(u,!!s.enableCpuMemArena,!!s.enableMemPattern,l,!!s.enableProfiling,0,c,s.logSeverityLevel,s.logVerbosityLevel),0===n)throw new Error("Can't create session options");return(null==e?void 0:e.executionProviders)&&((e,t,n)=>{for(const r of t){let t="string"==typeof r?r:r.name;switch(t){case"xnnpack":t="XNNPACK";break;case"wasm":case"cpu":continue;default:throw new Error(`not supported EP: ${t}`)}const a=(0,o.allocWasmString)(t,n);if(0!==(0,i.getInstance)()._OrtAppendExecutionProvider(e,a))throw new Error(`Can't append execution provider: ${t}`)}})(n,e.executionProviders,a),void 0!==(null==e?void 0:e.extra)&&(0,r.iterateExtraOptions)(e.extra,"",new WeakSet,((e,r)=>{const i=(0,o.allocWasmString)(e,a),s=(0,o.allocWasmString)(r,a);if(0!==t._OrtAddSessionConfigEntry(n,i,s))throw new Error(`Can't set a session config entry: ${e} - ${r}`)})),[n,a]}catch(e){throw 0!==n&&t._OrtReleaseSessionOptions(n),a.forEach(t._free),e}}},4983:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.allocWasmString=void 0;const r=n(6361);t.allocWasmString=(e,t)=>{const n=(0,r.getInstance)(),o=n.lengthBytesUTF8(e)+1,i=n._malloc(o);return n.stringToUTF8(e,i,o),t.push(i),i}},349:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.extractTransferableBuffers=t.endProfiling=t.run=t.releaseSession=t.createSession=t.createSessionFinalize=t.createSessionAllocate=t.initOrt=void 0;const r=n(586),o=n(4919),i=n(4983),a=n(6361);t.initOrt=(e,t)=>{const n=(0,a.getInstance)()._OrtInit(e,t);if(0!==n)throw new Error(`Can't initialize onnxruntime. error code = ${n}`)};const s=new Map;t.createSessionAllocate=e=>{const t=(0,a.getInstance)(),n=t._malloc(e.byteLength);return t.HEAPU8.set(e,n),[n,e.byteLength]},t.createSessionFinalize=(e,t)=>{const n=(0,a.getInstance)();let r=0,i=0,u=[];try{if([i,u]=(0,o.setSessionOptions)(t),r=n._OrtCreateSession(e[0],e[1],i),0===r)throw new Error("Can't create a session")}finally{n._free(e[0]),n._OrtReleaseSessionOptions(i),u.forEach(n._free)}const l=n._OrtGetInputCount(r),c=n._OrtGetOutputCount(r),f=[],d=[],p=[],h=[];for(let e=0;e{const r=(0,t.createSessionAllocate)(e);return(0,t.createSessionFinalize)(r,n)},t.releaseSession=e=>{const t=(0,a.getInstance)(),n=s.get(e);if(!n)throw new Error("invalid session id");const r=n[0],o=n[1],i=n[2];o.forEach(t._OrtFree),i.forEach(t._OrtFree),t._OrtReleaseSession(r),s.delete(e)};const u=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;default:throw new Error(`unsupported data type: ${e}`)}},l=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";default:throw new Error(`unsupported data type: ${e}`)}},c=e=>{switch(e){case"float32":return Float32Array;case"uint8":case"bool":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}};t.run=(e,t,n,o,f)=>{const d=(0,a.getInstance)(),p=s.get(e);if(!p)throw new Error("invalid session id");const h=p[0],g=p[1],m=p[2],b=t.length,y=o.length;let A=0,v=[];const w=[],_=[];try{[A,v]=(0,r.setRunOptions)(f);for(let e=0;ed.HEAP32[e++]=t));const n=d._OrtCreateTensor(u(t),a,s,c,r.length);if(0===n)throw new Error("Can't create a tensor");w.push(n)}finally{d.stackRestore(l)}}const e=d.stackSave(),a=d.stackAlloc(4*b),s=d.stackAlloc(4*b),p=d.stackAlloc(4*y),x=d.stackAlloc(4*y);try{let e=a/4,n=s/4,r=p/4,i=x/4;for(let r=0;re*t));if(o=l(n),"string"===o){const e=[];let t=i/4;for(let n=0;n{const t=(0,a.getInstance)(),n=s.get(e);if(!n)throw new Error("invalid session id");const r=n[0],o=t._OrtEndProfiling(r);if(0===o)throw new Error("Can't get an profile file name");t._OrtFree(o)},t.extractTransferableBuffers=e=>{const t=[];for(const n of e){const e=n[2];!Array.isArray(e)&&e.buffer&&t.push(e.buffer)}return t}},6361:function(e,t,n){"use strict";var r=this&&this.__createBinding||(Object.create?function(e,t,n,r){void 0===r&&(r=n);var o=Object.getOwnPropertyDescriptor(t,n);o&&!("get"in o?!t.__esModule:o.writable||o.configurable)||(o={enumerable:!0,get:function(){return t[n]}}),Object.defineProperty(e,r,o)}:function(e,t,n,r){void 0===r&&(r=n),e[r]=t[n]}),o=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)"default"!==n&&Object.prototype.hasOwnProperty.call(e,n)&&r(t,e,n);return o(t,e),t},a=this&&this.__importDefault||function(e){return e&&e.__esModule?e:{default:e}};Object.defineProperty(t,"__esModule",{value:!0}),t.dispose=t.getInstance=t.initializeWebAssembly=void 0;const s=i(n(6449)),u=a(n(932)),l=n(3474);let c,f=!1,d=!1,p=!1;const h=(e,t)=>t?e?"ort-wasm-simd-threaded.wasm":"ort-wasm-threaded.wasm":e?"ort-wasm-simd.wasm":"ort-wasm.wasm";t.initializeWebAssembly=async e=>{if(f)return Promise.resolve();if(d)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(p)throw new Error("previous call to 'initializeWebAssembly()' failed.");d=!0;const t=e.initTimeout,r=e.numThreads,o=e.simd,i=r>1&&(()=>{try{return"undefined"!=typeof SharedArrayBuffer&&("undefined"!=typeof MessageChannel&&(new MessageChannel).port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11])))}catch(e){return!1}})(),a=o&&(()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch(e){return!1}})(),g="string"==typeof e.wasmPaths?e.wasmPaths:void 0,m=h(!1,i),b=h(a,i),y="object"==typeof e.wasmPaths?e.wasmPaths[b]:void 0;let A=!1;const v=[];if(t>0&&v.push(new Promise((e=>{setTimeout((()=>{A=!0,e()}),t)}))),v.push(new Promise(((e,t)=>{const r=i?l:u.default,o={locateFile:(e,t)=>i&&e.endsWith(".worker.js")&&"undefined"!=typeof Blob?URL.createObjectURL(new Blob([n(4154)],{type:"text/javascript"})):e===m?null!=y?y:(null!=g?g:t)+b:t+e};if(i)if("undefined"==typeof Blob)o.mainScriptUrlOrBlob=s.join("/","ort-wasm-threaded.js");else{const e=`var ortWasmThreaded=(function(){var _scriptDir;return ${r.toString()}})();`;o.mainScriptUrlOrBlob=new Blob([e],{type:"text/javascript"})}r(o).then((t=>{d=!1,f=!0,c=t,e()}),(e=>{d=!1,p=!0,t(e)}))}))),await Promise.race(v),A)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},t.getInstance=()=>{if(f&&c)return c;throw new Error("WebAssembly is not initialized yet.")},t.dispose=()=>{var e;!f||d||p||(d=!0,null===(e=c.PThread)||void 0===e||e.terminateAllThreads(),c=void 0,d=!1,f=!1,p=!0)}},9710:(e,t,n)=>{"use strict";n.d(t,{Z:()=>i});var r=n(477),o=n.n(r);function i(){return o()('/*!\n* ONNX Runtime Web v1.14.0\n* Copyright (c) Microsoft Corporation. All rights reserved.\n* Licensed under the MIT License.\n*/\n(()=>{var t={474:(t,e,n)=>{var _scriptDir,r=(_scriptDir=(_scriptDir="undefined"!=typeof document&&document.currentScript?document.currentScript.src:void 0)||"/index.js",function(t){function e(){return j.buffer!=D&&N(j.buffer),P}function r(){return j.buffer!=D&&N(j.buffer),U}function a(){return j.buffer!=D&&N(j.buffer),F}function i(){return j.buffer!=D&&N(j.buffer),I}function o(){return j.buffer!=D&&N(j.buffer),W}var u,c,s;t=t||{},u||(u=void 0!==t?t:{}),u.ready=new Promise((function(t,e){c=t,s=e}));var l,f,p,h,d,y,b=Object.assign({},u),m="./this.program",g=(t,e)=>{throw e},v="object"==typeof window,w="function"==typeof importScripts,_="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node,O=u.ENVIRONMENT_IS_PTHREAD||!1,A="";function S(t){return u.locateFile?u.locateFile(t,A):A+t}if(_){let e;A=w?n(908).dirname(A)+"/":"//",y=()=>{d||(h=n(384),d=n(908))},l=function(t,e){return y(),t=d.normalize(t),h.readFileSync(t,e?void 0:"utf8")},p=t=>((t=l(t,!0)).buffer||(t=new Uint8Array(t)),t),f=(t,e,n)=>{y(),t=d.normalize(t),h.readFile(t,(function(t,r){t?n(t):e(r.buffer)}))},1{if(Q())throw process.exitCode=t,e;e instanceof ct||x("exiting due to exception: "+e),process.exit(t)},u.inspect=function(){return"[Emscripten Module object]"};try{e=n(925)}catch(t){throw console.error(\'The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?\'),t}n.g.Worker=e.Worker}else(v||w)&&(w?A=self.location.href:"undefined"!=typeof document&&document.currentScript&&(A=document.currentScript.src),_scriptDir&&(A=_scriptDir),A=0!==A.indexOf("blob:")?A.substr(0,A.replace(/[?#].*/,"").lastIndexOf("/")+1):"",_||(l=t=>{var e=new XMLHttpRequest;return e.open("GET",t,!1),e.send(null),e.responseText},w&&(p=t=>{var e=new XMLHttpRequest;return e.open("GET",t,!1),e.responseType="arraybuffer",e.send(null),new Uint8Array(e.response)}),f=(t,e,n)=>{var r=new XMLHttpRequest;r.open("GET",t,!0),r.responseType="arraybuffer",r.onload=()=>{200==r.status||0==r.status&&r.response?e(r.response):n()},r.onerror=n,r.send(null)}));_&&"undefined"==typeof performance&&(n.g.performance=n(953).performance);var T=console.log.bind(console),E=console.warn.bind(console);_&&(y(),T=t=>h.writeSync(1,t+"\\n"),E=t=>h.writeSync(2,t+"\\n"));var M,C=u.print||T,x=u.printErr||E;Object.assign(u,b),b=null,u.thisProgram&&(m=u.thisProgram),u.quit&&(g=u.quit),u.wasmBinary&&(M=u.wasmBinary);var R=u.noExitRuntime||!1;"object"!=typeof WebAssembly&&at("no native wasm support detected");var j,k,D,P,U,F,I,W,H=!1,L="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0;function z(t,e,n){var r=(e>>>=0)+n;for(n=e;t[n]&&!(n>=r);)++n;if(16(a=224==(240&a)?(15&a)<<12|i<<6|o:(7&a)<<18|i<<12|o<<6|63&t[e++])?r+=String.fromCharCode(a):(a-=65536,r+=String.fromCharCode(55296|a>>10,56320|1023&a))}}else r+=String.fromCharCode(a)}return r}function Y(t,e){return(t>>>=0)?z(r(),t,e):""}function B(t,e,n,r){if(!(0>>=0;r=n+r-1;for(var i=0;i=o&&(o=65536+((1023&o)<<10)|1023&t.charCodeAt(++i)),127>=o){if(n>=r)break;e[n++>>>0]=o}else{if(2047>=o){if(n+1>=r)break;e[n++>>>0]=192|o>>6}else{if(65535>=o){if(n+2>=r)break;e[n++>>>0]=224|o>>12}else{if(n+3>=r)break;e[n++>>>0]=240|o>>18,e[n++>>>0]=128|o>>12&63}e[n++>>>0]=128|o>>6&63}e[n++>>>0]=128|63&o}}return e[n>>>0]=0,n-a}function G(t){for(var e=0,n=0;n=r?e++:2047>=r?e+=2:55296<=r&&57343>=r?(e+=4,++n):e+=3}return e}function N(t){D=t,u.HEAP8=P=new Int8Array(t),u.HEAP16=new Int16Array(t),u.HEAP32=F=new Int32Array(t),u.HEAPU8=U=new Uint8Array(t),u.HEAPU16=new Uint16Array(t),u.HEAPU32=I=new Uint32Array(t),u.HEAPF32=new Float32Array(t),u.HEAPF64=W=new Float64Array(t)}O&&(D=u.buffer);var V=u.INITIAL_MEMORY||16777216;if(O)j=u.wasmMemory,D=u.buffer;else if(u.wasmMemory)j=u.wasmMemory;else if(!((j=new WebAssembly.Memory({initial:V/65536,maximum:65536,shared:!0})).buffer instanceof SharedArrayBuffer))throw x("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),_&&console.log("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and also use a recent version)"),Error("bad memory");j&&(D=j.buffer),V=D.byteLength,N(D);var $,q=[],X=[],J=[],Z=[];function Q(){return R||!1}function K(){var t=u.preRun.shift();q.unshift(t)}var tt,et=0,nt=null,rt=null;function at(t){throw O?postMessage({cmd:"onAbort",arg:t}):u.onAbort&&u.onAbort(t),x(t="Aborted("+t+")"),H=!0,t=new WebAssembly.RuntimeError(t+". Build with -sASSERTIONS for more info."),s(t),t}function it(){return tt.startsWith("data:application/octet-stream;base64,")}function ot(){var t=tt;try{if(t==tt&&M)return new Uint8Array(M);if(p)return p(t);throw"both async and sync fetching of the wasm failed"}catch(t){at(t)}}tt="ort-wasm-threaded.wasm",it()||(tt=S(tt));var ut={};function ct(t){this.name="ExitStatus",this.message="Program terminated with exit("+t+")",this.status=t}function st(t){(t=ht.Vb[t])||at(),ht.mc(t)}function lt(t){var e=ht.Cc();if(!e)return 6;ht.ac.push(e),ht.Vb[t.Ub]=e,e.Ub=t.Ub;var n={cmd:"run",start_routine:t.Ic,arg:t.zc,pthread_ptr:t.Ub};return e.$b=()=>{n.time=performance.now(),e.postMessage(n,t.Nc)},e.loaded&&(e.$b(),delete e.$b),0}function ft(t){if(O)return $t(1,1,t);Q()||(ht.oc(),u.onExit&&u.onExit(t),H=!0),g(t,new ct(t))}function pt(t,e){if(!e&&O)throw bt(t),"unwind";Q()||O||(me(),dt(J),be(0),re[1].length&&ae(1,10),re[2].length&&ae(2,10),ht.oc()),ft(t)}var ht={Yb:[],ac:[],qc:[],Vb:{},fc:function(){O&&ht.Ec()},Pc:function(){},Ec:function(){ht.receiveObjectTransfer=ht.Gc,ht.threadInitTLS=ht.pc,ht.setExitStatus=ht.nc,R=!1},nc:function(){},oc:function(){for(var t of Object.values(ht.Vb))ht.mc(t);for(t of ht.Yb)t.terminate();ht.Yb=[]},mc:function(t){var e=t.Ub;delete ht.Vb[e],ht.Yb.push(t),ht.ac.splice(ht.ac.indexOf(t),1),t.Ub=0,Oe(e)},Gc:function(){},pc:function(){ht.qc.forEach((t=>t()))},Fc:function(t,e){t.onmessage=n=>{var r=(n=n.data).cmd;if(t.Ub&&(ht.Bc=t.Ub),n.targetThread&&n.targetThread!=he()){var a=ht.Vb[n.Qc];a?a.postMessage(n,n.transferList):x(\'Internal error! Worker sent a message "\'+r+\'" to target pthread \'+n.targetThread+", but that thread no longer exists!")}else"processProxyingQueue"===r?zt(n.queue):"spawnThread"===r?lt(n):"cleanupThread"===r?st(n.thread):"killThread"===r?(n=n.thread,r=ht.Vb[n],delete ht.Vb[n],r.terminate(),Oe(n),ht.ac.splice(ht.ac.indexOf(r),1),r.Ub=0):"cancelThread"===r?ht.Vb[n.thread].postMessage({cmd:"cancel"}):"loaded"===r?(t.loaded=!0,e&&e(t),t.$b&&(t.$b(),delete t.$b)):"print"===r?C("Thread "+n.threadId+": "+n.text):"printErr"===r?x("Thread "+n.threadId+": "+n.text):"alert"===r?alert("Thread "+n.threadId+": "+n.text):"setimmediate"===n.target?t.postMessage(n):"onAbort"===r?u.onAbort&&u.onAbort(n.arg):r&&x("worker sent an unknown command "+r);ht.Bc=void 0},t.onerror=t=>{throw x("worker sent an error! "+t.filename+":"+t.lineno+": "+t.message),t},_&&(t.on("message",(function(e){t.onmessage({data:e})})),t.on("error",(function(e){t.onerror(e)})),t.on("detachedExit",(function(){}))),t.postMessage({cmd:"load",urlOrBlob:u.mainScriptUrlOrBlob||_scriptDir,wasmMemory:j,wasmModule:k})},yc:function(){var t=S("ort-wasm-threaded.worker.js");ht.Yb.push(new Worker(t))},Cc:function(){return 0==ht.Yb.length&&(ht.yc(),ht.Fc(ht.Yb[0])),ht.Yb.pop()}};function dt(t){for(;0>2>>>0];t=a()[t+48>>2>>>0],Te(e,e-t),Me(e)};var mt=[];function gt(t){var e=mt[t];return e||(t>=mt.length&&(mt.length=t+1),mt[t]=e=$.get(t)),e}u.invokeEntryPoint=function(t,e){t=gt(t)(e),Q()?ht.nc(t):Ae(t)};var vt,wt,_t=[],Ot=0,At=0;function St(t){this.Zb=t,this.Sb=t-24,this.xc=function(t){i()[this.Sb+4>>2>>>0]=t},this.bc=function(){return i()[this.Sb+4>>2>>>0]},this.wc=function(t){i()[this.Sb+8>>2>>>0]=t},this.Dc=function(){return i()[this.Sb+8>>2>>>0]},this.rc=function(){a()[this.Sb>>2>>>0]=0},this.hc=function(t){t=t?1:0,e()[this.Sb+12>>0>>>0]=t},this.uc=function(){return 0!=e()[this.Sb+12>>0>>>0]},this.ic=function(t){t=t?1:0,e()[this.Sb+13>>0>>>0]=t},this.kc=function(){return 0!=e()[this.Sb+13>>0>>>0]},this.fc=function(t,e){this.cc(0),this.xc(t),this.wc(e),this.rc(),this.hc(!1),this.ic(!1)},this.sc=function(){Atomics.add(a(),this.Sb>>2,1)},this.Hc=function(){return 1===Atomics.sub(a(),this.Sb>>2,1)},this.cc=function(t){i()[this.Sb+16>>2>>>0]=t},this.tc=function(){return i()[this.Sb+16>>2>>>0]},this.vc=function(){if(Re(this.bc()))return i()[this.Zb>>2>>>0];var t=this.tc();return 0!==t?t:this.Zb}}function Tt(t){return ye(new St(t).Sb)}function Et(t,e,n,r){return O?$t(3,1,t,e,n,r):Mt(t,e,n,r)}function Mt(t,e,n,r){if("undefined"==typeof SharedArrayBuffer)return x("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var a=[];return O&&0===a.length?Et(t,e,n,r):(t={Ic:n,Ub:t,zc:r,Nc:a},O?(t.Oc="spawnThread",postMessage(t,a),0):lt(t))}function Ct(t,e,n){return O?$t(4,1,t,e,n):0}function xt(t,e){if(O)return $t(5,1,t,e)}function Rt(t,e){if(O)return $t(6,1,t,e)}function jt(t,e,n){if(O)return $t(7,1,t,e,n)}function kt(t,e,n){return O?$t(8,1,t,e,n):0}function Dt(t,e){if(O)return $t(9,1,t,e)}function Pt(t,e,n){if(O)return $t(10,1,t,e,n)}function Ut(t,e,n,r){if(O)return $t(11,1,t,e,n,r)}function Ft(t,e,n,r){if(O)return $t(12,1,t,e,n,r)}function It(t,e,n,r){if(O)return $t(13,1,t,e,n,r)}function Wt(t){if(O)return $t(14,1,t)}function Ht(t,e){if(O)return $t(15,1,t,e)}function Lt(t,e,n){if(O)return $t(16,1,t,e,n)}function zt(t){Atomics.store(a(),t>>2,1),he()&&_e(t),Atomics.compareExchange(a(),t>>2,1,0)}function Yt(t){return i()[t>>>2]+4294967296*a()[t+4>>>2]}function Bt(t,e,n,r,a,i){return O?$t(17,1,t,e,n,r,a,i):-52}function Gt(t,e,n,r,a,i){if(O)return $t(18,1,t,e,n,r,a,i)}function Nt(t){var n=G(t)+1,r=de(n);return r&&B(t,e(),r,n),r}function Vt(t,e,n){function r(t){return(t=t.toTimeString().match(/\\(([A-Za-z ]+)\\)$/))?t[1]:"GMT"}if(O)return $t(19,1,t,e,n);var o=(new Date).getFullYear(),u=new Date(o,0,1),c=new Date(o,6,1);o=u.getTimezoneOffset();var s=c.getTimezoneOffset(),l=Math.max(o,s);a()[t>>2>>>0]=60*l,a()[e>>2>>>0]=Number(o!=s),t=r(u),e=r(c),t=Nt(t),e=Nt(e),s>2>>>0]=t,i()[n+4>>2>>>0]=e):(i()[n>>2>>>0]=e,i()[n+4>>2>>>0]=t)}function $t(t,e){var n=arguments.length-2,r=arguments;return yt((()=>{for(var a=Ce(8*n),i=a>>3,u=0;u>>0]=c}return we(t,n,a,e)}))}u.executeNotifiedProxyingQueue=zt,wt=_?()=>{var t=process.hrtime();return 1e3*t[0]+t[1]/1e6}:O?()=>performance.now()-u.__performance_now_clock_drift:()=>performance.now();var qt,Xt=[],Jt={};function Zt(){if(!qt){var t,e={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:("object"==typeof navigator&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:m||"./this.program"};for(t in Jt)void 0===Jt[t]?delete e[t]:e[t]=Jt[t];var n=[];for(t in e)n.push(t+"="+e[t]);qt=n}return qt}function Qt(t,n){if(O)return $t(20,1,t,n);var r=0;return Zt().forEach((function(a,o){var u=n+r;for(o=i()[t+4*o>>2>>>0]=u,u=0;u>0>>>0]=a.charCodeAt(u);e()[o>>0>>>0]=0,r+=a.length+1})),0}function Kt(t,e){if(O)return $t(21,1,t,e);var n=Zt();i()[t>>2>>>0]=n.length;var r=0;return n.forEach((function(t){r+=t.length+1})),i()[e>>2>>>0]=r,0}function te(t){return O?$t(22,1,t):52}function ee(t,e,n,r){return O?$t(23,1,t,e,n,r):52}function ne(t,e,n,r,a){return O?$t(24,1,t,e,n,r,a):70}var re=[null,[],[]];function ae(t,e){var n=re[t];0===e||10===e?((1===t?C:x)(z(n,0)),n.length=0):n.push(e)}function ie(t,e,n,a){if(O)return $t(25,1,t,e,n,a);for(var o=0,u=0;u>2>>>0],s=i()[e+4>>2>>>0];e+=8;for(var l=0;l>>0]);o+=s}return i()[a>>2>>>0]=o,0}var oe=0;function ue(t){return 0==t%4&&(0!=t%100||0==t%400)}var ce=[31,29,31,30,31,30,31,31,30,31,30,31],se=[31,28,31,30,31,30,31,31,30,31,30,31];function le(t,n,r,i){function o(t,e,n){for(t="number"==typeof t?t.toString():t||"";t.lengtht?-1:0r-t.getDate())){t.setDate(t.getDate()+e);break}e-=r-t.getDate()+1,t.setDate(1),11>n?t.setMonth(n+1):(t.setMonth(0),t.setFullYear(t.getFullYear()+1))}return n=new Date(t.getFullYear()+1,0,4),e=s(new Date(t.getFullYear(),0,4)),n=s(n),0>=c(e,t)?0>=c(n,t)?t.getFullYear()+1:t.getFullYear():t.getFullYear()-1}var f=a()[i+40>>2>>>0];for(var p in i={Lc:a()[i>>2>>>0],Kc:a()[i+4>>2>>>0],dc:a()[i+8>>2>>>0],jc:a()[i+12>>2>>>0],ec:a()[i+16>>2>>>0],Xb:a()[i+20>>2>>>0],Tb:a()[i+24>>2>>>0],Wb:a()[i+28>>2>>>0],Rc:a()[i+32>>2>>>0],Jc:a()[i+36>>2>>>0],Mc:f?Y(f):""},r=Y(r),f={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})r=r.replace(new RegExp(p,"g"),f[p]);var h="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),d="January February March April May June July August September October November December".split(" ");for(p in f={"%a":function(t){return h[t.Tb].substring(0,3)},"%A":function(t){return h[t.Tb]},"%b":function(t){return d[t.ec].substring(0,3)},"%B":function(t){return d[t.ec]},"%C":function(t){return u((t.Xb+1900)/100|0,2)},"%d":function(t){return u(t.jc,2)},"%e":function(t){return o(t.jc,2," ")},"%g":function(t){return l(t).toString().substring(2)},"%G":function(t){return l(t)},"%H":function(t){return u(t.dc,2)},"%I":function(t){return 0==(t=t.dc)?t=12:12t.dc?"AM":"PM"},"%S":function(t){return u(t.Lc,2)},"%t":function(){return"\\t"},"%u":function(t){return t.Tb||7},"%U":function(t){return u(Math.floor((t.Wb+7-t.Tb)/7),2)},"%V":function(t){var e=Math.floor((t.Wb+7-(t.Tb+6)%7)/7);if(2>=(t.Tb+371-t.Wb-2)%7&&e++,e)53==e&&(4==(n=(t.Tb+371-t.Wb)%7)||3==n&&ue(t.Xb)||(e=1));else{e=52;var n=(t.Tb+7-t.Wb-1)%7;(4==n||5==n&&ue(t.Xb%400-1))&&e++}return u(e,2)},"%w":function(t){return t.Tb},"%W":function(t){return u(Math.floor((t.Wb+7-(t.Tb+6)%7)/7),2)},"%y":function(t){return(t.Xb+1900).toString().substring(2)},"%Y":function(t){return t.Xb+1900},"%z":function(t){var e=0<=(t=t.Jc);return t=Math.abs(t)/60,(e?"+":"-")+String("0000"+(t/60*100+t%60)).slice(-4)},"%Z":function(t){return t.Mc},"%%":function(){return"%"}},r=r.replace(/%%/g,"\\0\\0"),f)r.includes(p)&&(r=r.replace(new RegExp(p,"g"),f[p](i)));return p=function(t){var e=Array(G(t)+1);return B(t,e,0,e.length),e}(r=r.replace(/\\0\\0/g,"%")),p.length>n?0:(function(t,n){e().set(t,n>>>0)}(p,t),p.length-1)}ht.fc();var fe=[null,ft,bt,Et,Ct,xt,Rt,jt,kt,Dt,Pt,Ut,Ft,It,Wt,Ht,Lt,Bt,Gt,Vt,Qt,Kt,te,ee,ne,ie],pe={b:function(t){return de(t+24)+24},n:function(t){return(t=new St(t)).uc()||(t.hc(!0),Ot--),t.ic(!1),_t.push(t),t.sc(),t.vc()},ma:function(t){throw x("Unexpected exception thrown, this is not properly supported - aborting"),H=!0,t},x:function(){Se(0);var t=_t.pop();if(t.Hc()&&!t.kc()){var e=t.Dc();e&>(e)(t.Zb),Tt(t.Zb)}At=0},e:function(){var t=At;if(!t)return oe=0;var e=new St(t);e.cc(t);var n=e.bc();if(!n)return oe=0,t;for(var r=Array.prototype.slice.call(arguments),a=0;azt(r)));else if(O)postMessage({targetThread:t,cmd:"processProxyingQueue",queue:r});else{if(!(t=ht.Vb[t]))return;t.postMessage({cmd:"processProxyingQueue",queue:r})}return 1},Ea:function(){return-1},Pa:function(t,e){t=new Date(1e3*Yt(t)),a()[e>>2>>>0]=t.getUTCSeconds(),a()[e+4>>2>>>0]=t.getUTCMinutes(),a()[e+8>>2>>>0]=t.getUTCHours(),a()[e+12>>2>>>0]=t.getUTCDate(),a()[e+16>>2>>>0]=t.getUTCMonth(),a()[e+20>>2>>>0]=t.getUTCFullYear()-1900,a()[e+24>>2>>>0]=t.getUTCDay(),t=(t.getTime()-Date.UTC(t.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,a()[e+28>>2>>>0]=t},Qa:function(t,e){t=new Date(1e3*Yt(t)),a()[e>>2>>>0]=t.getSeconds(),a()[e+4>>2>>>0]=t.getMinutes(),a()[e+8>>2>>>0]=t.getHours(),a()[e+12>>2>>>0]=t.getDate(),a()[e+16>>2>>>0]=t.getMonth(),a()[e+20>>2>>>0]=t.getFullYear()-1900,a()[e+24>>2>>>0]=t.getDay();var n=new Date(t.getFullYear(),0,1),r=(t.getTime()-n.getTime())/864e5|0;a()[e+28>>2>>>0]=r,a()[e+36>>2>>>0]=-60*t.getTimezoneOffset(),r=new Date(t.getFullYear(),6,1).getTimezoneOffset(),t=0|(r!=(n=n.getTimezoneOffset())&&t.getTimezoneOffset()==Math.min(n,r)),a()[e+32>>2>>>0]=t},Ra:function(t){var e=new Date(a()[t+20>>2>>>0]+1900,a()[t+16>>2>>>0],a()[t+12>>2>>>0],a()[t+8>>2>>>0],a()[t+4>>2>>>0],a()[t>>2>>>0],0),n=a()[t+32>>2>>>0],r=e.getTimezoneOffset(),i=new Date(e.getFullYear(),0,1),o=new Date(e.getFullYear(),6,1).getTimezoneOffset(),u=i.getTimezoneOffset(),c=Math.min(u,o);return 0>n?a()[t+32>>2>>>0]=Number(o!=u&&c==r):0>2>>>0]=e.getDay(),n=(e.getTime()-i.getTime())/864e5|0,a()[t+28>>2>>>0]=n,a()[t>>2>>>0]=e.getSeconds(),a()[t+4>>2>>>0]=e.getMinutes(),a()[t+8>>2>>>0]=e.getHours(),a()[t+12>>2>>>0]=e.getDate(),a()[t+16>>2>>>0]=e.getMonth(),e.getTime()/1e3|0},Aa:Bt,Ba:Gt,Sa:function t(e,n,r){t.Ac||(t.Ac=!0,Vt(e,n,r))},y:function(){at("")},U:function(){if(!_&&!w){var t="Blocking on the main thread is very dangerous, see https://emscripten.org/docs/porting/pthreads.html#blocking-on-the-main-browser-thread";vt||(vt={}),vt[t]||(vt[t]=1,_&&(t="warning: "+t),x(t))}},ra:function(){return 4294901760},B:wt,Ia:function(t,e,n){r().copyWithin(t>>>0,e>>>0,e+n>>>0)},F:function(){return _?n(993).cpus().length:navigator.hardwareConcurrency},Da:function(t,e,n){Xt.length=e,n>>=3;for(var r=0;r>>0];return(0>t?ut[-t-1]:fe[t]).apply(null,Xt)},qa:function(t){var e=r().length;if((t>>>=0)<=e||4294901760=n;n*=2){var a=e*(1+.2/n);a=Math.min(a,t+100663296);var i=Math;a=Math.max(t,a),i=i.min.call(i,4294901760,a+(65536-a%65536)%65536);t:{try{j.grow(i-D.byteLength+65535>>>16),N(j.buffer);var o=1;break t}catch(t){}o=void 0}if(o)return!0}return!1},Na:function(){throw"unwind"},Ga:Qt,Ha:Kt,J:pt,I:te,S:ee,ga:ne,R:ie,d:function(){return oe},na:function t(r,a){t.lc||(t.lc=function(){if("object"==typeof crypto&&"function"==typeof crypto.getRandomValues){var t=new Uint8Array(1);return()=>(crypto.getRandomValues(t),t[0])}if(_)try{var e=n(Object(function(){var t=new Error("Cannot find module \'crypto\'");throw t.code="MODULE_NOT_FOUND",t}()));return()=>e.randomBytes(1)[0]}catch(t){}return()=>at("randomDevice")}());for(var i=0;i>0>>>0]=t.lc();return 0},ia:function(t,e,n){var r=Ee();try{return gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},ja:function(t,e,n){var r=Ee();try{return gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},K:function(t){var e=Ee();try{return gt(t)()}catch(t){if(Me(e),t!==t+0)throw t;Se(1,0)}},f:function(t,e){var n=Ee();try{return gt(t)(e)}catch(t){if(Me(n),t!==t+0)throw t;Se(1,0)}},P:function(t,e,n){var r=Ee();try{return gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},Q:function(t,e,n){var r=Ee();try{return gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},k:function(t,e,n){var r=Ee();try{return gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},p:function(t,e,n,r){var a=Ee();try{return gt(t)(e,n,r)}catch(t){if(Me(a),t!==t+0)throw t;Se(1,0)}},q:function(t,e,n,r,a){var i=Ee();try{return gt(t)(e,n,r,a)}catch(t){if(Me(i),t!==t+0)throw t;Se(1,0)}},N:function(t,e,n,r,a,i){var o=Ee();try{return gt(t)(e,n,r,a,i)}catch(t){if(Me(o),t!==t+0)throw t;Se(1,0)}},s:function(t,e,n,r,a,i){var o=Ee();try{return gt(t)(e,n,r,a,i)}catch(t){if(Me(o),t!==t+0)throw t;Se(1,0)}},w:function(t,e,n,r,a,i,o){var u=Ee();try{return gt(t)(e,n,r,a,i,o)}catch(t){if(Me(u),t!==t+0)throw t;Se(1,0)}},L:function(t,e,n,r,a,i,o,u){var c=Ee();try{return gt(t)(e,n,r,a,i,o,u)}catch(t){if(Me(c),t!==t+0)throw t;Se(1,0)}},E:function(t,e,n,r,a,i,o,u,c,s,l,f){var p=Ee();try{return gt(t)(e,n,r,a,i,o,u,c,s,l,f)}catch(t){if(Me(p),t!==t+0)throw t;Se(1,0)}},aa:function(t,e,n,r,a,i,o,u){var c=Ee();try{return He(t,e,n,r,a,i,o,u)}catch(t){if(Me(c),t!==t+0)throw t;Se(1,0)}},_:function(t,e,n,r,a,i,o){var u=Ee();try{return ke(t,e,n,r,a,i,o)}catch(t){if(Me(u),t!==t+0)throw t;Se(1,0)}},Z:function(t,e,n,r,a){var i=Ee();try{return Le(t,e,n,r,a)}catch(t){if(Me(i),t!==t+0)throw t;Se(1,0)}},ca:function(t,e,n,r){var a=Ee();try{return Ie(t,e,n,r)}catch(t){if(Me(a),t!==t+0)throw t;Se(1,0)}},$:function(t){var e=Ee();try{return je(t)}catch(t){if(Me(e),t!==t+0)throw t;Se(1,0)}},ba:function(t,e){var n=Ee();try{return We(t,e)}catch(t){if(Me(n),t!==t+0)throw t;Se(1,0)}},Y:function(t,e,n){var r=Ee();try{return De(t,e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},g:function(t){var e=Ee();try{gt(t)()}catch(t){if(Me(e),t!==t+0)throw t;Se(1,0)}},r:function(t,e){var n=Ee();try{gt(t)(e)}catch(t){if(Me(n),t!==t+0)throw t;Se(1,0)}},i:function(t,e,n){var r=Ee();try{gt(t)(e,n)}catch(t){if(Me(r),t!==t+0)throw t;Se(1,0)}},ha:function(t,e,n,r){var a=Ee();try{gt(t)(e,n,r)}catch(t){if(Me(a),t!==t+0)throw t;Se(1,0)}},m:function(t,e,n,r){var a=Ee();try{gt(t)(e,n,r)}catch(t){if(Me(a),t!==t+0)throw t;Se(1,0)}},v:function(t,e,n,r,a){var i=Ee();try{gt(t)(e,n,r,a)}catch(t){if(Me(i),t!==t+0)throw t;Se(1,0)}},u:function(t,e,n,r,a,i){var o=Ee();try{gt(t)(e,n,r,a,i)}catch(t){if(Me(o),t!==t+0)throw t;Se(1,0)}},O:function(t,e,n,r,a,i,o){var u=Ee();try{gt(t)(e,n,r,a,i,o)}catch(t){if(Me(u),t!==t+0)throw t;Se(1,0)}},A:function(t,e,n,r,a,i,o,u){var c=Ee();try{gt(t)(e,n,r,a,i,o,u)}catch(t){if(Me(c),t!==t+0)throw t;Se(1,0)}},ka:function(t,e,n,r,a,i,o,u,c){var s=Ee();try{gt(t)(e,n,r,a,i,o,u,c)}catch(t){if(Me(s),t!==t+0)throw t;Se(1,0)}},C:function(t,e,n,r,a,i,o,u,c,s,l){var f=Ee();try{gt(t)(e,n,r,a,i,o,u,c,s,l)}catch(t){if(Me(f),t!==t+0)throw t;Se(1,0)}},D:function(t,e,n,r,a,i,o,u,c,s,l,f,p,h,d,y){var b=Ee();try{gt(t)(e,n,r,a,i,o,u,c,s,l,f,p,h,d,y)}catch(t){if(Me(b),t!==t+0)throw t;Se(1,0)}},fa:function(t,e,n,r,a,i,o,u){var c=Ee();try{Pe(t,e,n,r,a,i,o,u)}catch(t){if(Me(c),t!==t+0)throw t;Se(1,0)}},da:function(t,e,n,r,a,i,o,u,c,s,l,f){var p=Ee();try{Fe(t,e,n,r,a,i,o,u,c,s,l,f)}catch(t){if(Me(p),t!==t+0)throw t;Se(1,0)}},ea:function(t,e,n,r,a,i){var o=Ee();try{Ue(t,e,n,r,a,i)}catch(t){if(Me(o),t!==t+0)throw t;Se(1,0)}},o:function(t){return t},a:j||u.wasmMemory,G:function(t){oe=t},la:le,z:function(t,e,n,r){return le(t,e,n,r)}};!function(){function t(t,e){u.asm=t.exports,ht.qc.push(u.asm.sb),$=u.asm.ub,X.unshift(u.asm.Va),k=e,O||(et--,u.monitorRunDependencies&&u.monitorRunDependencies(et),0==et&&(null!==nt&&(clearInterval(nt),nt=null),rt&&(t=rt,rt=null,t())))}function e(e){t(e.instance,e.module)}function n(t){return function(){if(!M&&(v||w)){if("function"==typeof fetch&&!tt.startsWith("file://"))return fetch(tt,{credentials:"same-origin"}).then((function(t){if(!t.ok)throw"failed to load wasm binary file at \'"+tt+"\'";return t.arrayBuffer()})).catch((function(){return ot()}));if(f)return new Promise((function(t,e){f(tt,(function(e){t(new Uint8Array(e))}),e)}))}return Promise.resolve().then((function(){return ot()}))}().then((function(t){return WebAssembly.instantiate(t,r)})).then((function(t){return t})).then(t,(function(t){x("failed to asynchronously prepare wasm: "+t),at(t)}))}var r={a:pe};if(O||(et++,u.monitorRunDependencies&&u.monitorRunDependencies(et)),u.instantiateWasm)try{return u.instantiateWasm(r,t)}catch(t){return x("Module.instantiateWasm callback failed with error: "+t),!1}(M||"function"!=typeof WebAssembly.instantiateStreaming||it()||tt.startsWith("file://")||_||"function"!=typeof fetch?n(e):fetch(tt,{credentials:"same-origin"}).then((function(t){return WebAssembly.instantiateStreaming(t,r).then(e,(function(t){return x("wasm streaming compile failed: "+t),x("falling back to ArrayBuffer instantiation"),n(e)}))}))).catch(s)}(),u.___wasm_call_ctors=function(){return(u.___wasm_call_ctors=u.asm.Va).apply(null,arguments)},u._OrtInit=function(){return(u._OrtInit=u.asm.Wa).apply(null,arguments)},u._OrtCreateSessionOptions=function(){return(u._OrtCreateSessionOptions=u.asm.Xa).apply(null,arguments)},u._OrtAppendExecutionProvider=function(){return(u._OrtAppendExecutionProvider=u.asm.Ya).apply(null,arguments)},u._OrtAddSessionConfigEntry=function(){return(u._OrtAddSessionConfigEntry=u.asm.Za).apply(null,arguments)},u._OrtReleaseSessionOptions=function(){return(u._OrtReleaseSessionOptions=u.asm._a).apply(null,arguments)},u._OrtCreateSession=function(){return(u._OrtCreateSession=u.asm.$a).apply(null,arguments)},u._OrtReleaseSession=function(){return(u._OrtReleaseSession=u.asm.ab).apply(null,arguments)},u._OrtGetInputCount=function(){return(u._OrtGetInputCount=u.asm.bb).apply(null,arguments)},u._OrtGetOutputCount=function(){return(u._OrtGetOutputCount=u.asm.cb).apply(null,arguments)},u._OrtGetInputName=function(){return(u._OrtGetInputName=u.asm.db).apply(null,arguments)},u._OrtGetOutputName=function(){return(u._OrtGetOutputName=u.asm.eb).apply(null,arguments)},u._OrtFree=function(){return(u._OrtFree=u.asm.fb).apply(null,arguments)},u._OrtCreateTensor=function(){return(u._OrtCreateTensor=u.asm.gb).apply(null,arguments)},u._OrtGetTensorData=function(){return(u._OrtGetTensorData=u.asm.hb).apply(null,arguments)},u._OrtReleaseTensor=function(){return(u._OrtReleaseTensor=u.asm.ib).apply(null,arguments)},u._OrtCreateRunOptions=function(){return(u._OrtCreateRunOptions=u.asm.jb).apply(null,arguments)},u._OrtAddRunConfigEntry=function(){return(u._OrtAddRunConfigEntry=u.asm.kb).apply(null,arguments)},u._OrtReleaseRunOptions=function(){return(u._OrtReleaseRunOptions=u.asm.lb).apply(null,arguments)},u._OrtRun=function(){return(u._OrtRun=u.asm.mb).apply(null,arguments)},u._OrtEndProfiling=function(){return(u._OrtEndProfiling=u.asm.nb).apply(null,arguments)};var he=u._pthread_self=function(){return(he=u._pthread_self=u.asm.ob).apply(null,arguments)},de=u._malloc=function(){return(de=u._malloc=u.asm.pb).apply(null,arguments)},ye=u._free=function(){return(ye=u._free=u.asm.qb).apply(null,arguments)},be=u._fflush=function(){return(be=u._fflush=u.asm.rb).apply(null,arguments)};u.__emscripten_tls_init=function(){return(u.__emscripten_tls_init=u.asm.sb).apply(null,arguments)};var me=u.___funcs_on_exit=function(){return(me=u.___funcs_on_exit=u.asm.tb).apply(null,arguments)},ge=u.__emscripten_thread_init=function(){return(ge=u.__emscripten_thread_init=u.asm.vb).apply(null,arguments)};u.__emscripten_thread_crashed=function(){return(u.__emscripten_thread_crashed=u.asm.wb).apply(null,arguments)};var ve,we=u._emscripten_run_in_main_runtime_thread_js=function(){return(we=u._emscripten_run_in_main_runtime_thread_js=u.asm.xb).apply(null,arguments)},_e=u.__emscripten_proxy_execute_task_queue=function(){return(_e=u.__emscripten_proxy_execute_task_queue=u.asm.yb).apply(null,arguments)},Oe=u.__emscripten_thread_free_data=function(){return(Oe=u.__emscripten_thread_free_data=u.asm.zb).apply(null,arguments)},Ae=u.__emscripten_thread_exit=function(){return(Ae=u.__emscripten_thread_exit=u.asm.Ab).apply(null,arguments)},Se=u._setThrew=function(){return(Se=u._setThrew=u.asm.Bb).apply(null,arguments)},Te=u._emscripten_stack_set_limits=function(){return(Te=u._emscripten_stack_set_limits=u.asm.Cb).apply(null,arguments)},Ee=u.stackSave=function(){return(Ee=u.stackSave=u.asm.Db).apply(null,arguments)},Me=u.stackRestore=function(){return(Me=u.stackRestore=u.asm.Eb).apply(null,arguments)},Ce=u.stackAlloc=function(){return(Ce=u.stackAlloc=u.asm.Fb).apply(null,arguments)},xe=u.___cxa_can_catch=function(){return(xe=u.___cxa_can_catch=u.asm.Gb).apply(null,arguments)},Re=u.___cxa_is_pointer_type=function(){return(Re=u.___cxa_is_pointer_type=u.asm.Hb).apply(null,arguments)},je=u.dynCall_j=function(){return(je=u.dynCall_j=u.asm.Ib).apply(null,arguments)},ke=u.dynCall_iiiiij=function(){return(ke=u.dynCall_iiiiij=u.asm.Jb).apply(null,arguments)},De=u.dynCall_jii=function(){return(De=u.dynCall_jii=u.asm.Kb).apply(null,arguments)},Pe=u.dynCall_viiiiij=function(){return(Pe=u.dynCall_viiiiij=u.asm.Lb).apply(null,arguments)},Ue=u.dynCall_vjji=function(){return(Ue=u.dynCall_vjji=u.asm.Mb).apply(null,arguments)},Fe=u.dynCall_viiijjjii=function(){return(Fe=u.dynCall_viiijjjii=u.asm.Nb).apply(null,arguments)},Ie=u.dynCall_iij=function(){return(Ie=u.dynCall_iij=u.asm.Ob).apply(null,arguments)},We=u.dynCall_ji=function(){return(We=u.dynCall_ji=u.asm.Pb).apply(null,arguments)},He=u.dynCall_iiiiiij=function(){return(He=u.dynCall_iiiiiij=u.asm.Qb).apply(null,arguments)},Le=u.dynCall_iiij=function(){return(Le=u.dynCall_iiij=u.asm.Rb).apply(null,arguments)};function ze(){function t(){if(!ve&&(ve=!0,u.calledRun=!0,!H)&&(O||dt(X),c(u),u.onRuntimeInitialized&&u.onRuntimeInitialized(),!O)){if(u.postRun)for("function"==typeof u.postRun&&(u.postRun=[u.postRun]);u.postRun.length;){var t=u.postRun.shift();Z.unshift(t)}dt(Z)}}if(!(0{var _scriptDir,r=(_scriptDir=(_scriptDir="undefined"!=typeof document&&document.currentScript?document.currentScript.src:void 0)||"/index.js",function(t){var e,r,a;t=t||{},e||(e=void 0!==t?t:{}),e.ready=new Promise((function(t,e){r=t,a=e}));var i,o,u,c,s,l,f=Object.assign({},e),p="./this.program",h=(t,e)=>{throw e},d="object"==typeof window,y="function"==typeof importScripts,b="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node,m="";b?(m=y?n(908).dirname(m)+"/":"//",l=()=>{s||(c=n(384),s=n(908))},i=function(t,e){return l(),t=s.normalize(t),c.readFileSync(t,e?void 0:"utf8")},u=t=>((t=i(t,!0)).buffer||(t=new Uint8Array(t)),t),o=(t,e,n)=>{l(),t=s.normalize(t),c.readFile(t,(function(t,r){t?n(t):e(r.buffer)}))},1{if(_||0{var e=new XMLHttpRequest;return e.open("GET",t,!1),e.send(null),e.responseText},y&&(u=t=>{var e=new XMLHttpRequest;return e.open("GET",t,!1),e.responseType="arraybuffer",e.send(null),new Uint8Array(e.response)}),o=(t,e,n)=>{var r=new XMLHttpRequest;r.open("GET",t,!0),r.responseType="arraybuffer",r.onload=()=>{200==r.status||0==r.status&&r.response?e(r.response):n()},r.onerror=n,r.send(null)});var g,v=e.print||console.log.bind(console),w=e.printErr||console.warn.bind(console);Object.assign(e,f),f=null,e.thisProgram&&(p=e.thisProgram),e.quit&&(h=e.quit),e.wasmBinary&&(g=e.wasmBinary);var _=e.noExitRuntime||!1;"object"!=typeof WebAssembly&&V("no native wasm support detected");var O,A,S,T,E,M,C=!1,x="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0;function R(t,e,n){var r=(e>>>=0)+n;for(n=e;t[n]&&!(n>=r);)++n;if(16(a=224==(240&a)?(15&a)<<12|i<<6|o:(7&a)<<18|i<<12|o<<6|63&t[e++])?r+=String.fromCharCode(a):(a-=65536,r+=String.fromCharCode(55296|a>>10,56320|1023&a))}}else r+=String.fromCharCode(a)}return r}function j(t,e){return(t>>>=0)?R(T,t,e):""}function k(t,e,n,r){if(!(0>>=0;r=n+r-1;for(var i=0;i=o&&(o=65536+((1023&o)<<10)|1023&t.charCodeAt(++i)),127>=o){if(n>=r)break;e[n++>>>0]=o}else{if(2047>=o){if(n+1>=r)break;e[n++>>>0]=192|o>>6}else{if(65535>=o){if(n+2>=r)break;e[n++>>>0]=224|o>>12}else{if(n+3>=r)break;e[n++>>>0]=240|o>>18,e[n++>>>0]=128|o>>12&63}e[n++>>>0]=128|o>>6&63}e[n++>>>0]=128|63&o}}return e[n>>>0]=0,n-a}function D(t){for(var e=0,n=0;n=r?e++:2047>=r?e+=2:55296<=r&&57343>=r?(e+=4,++n):e+=3}return e}function P(){var t=O.buffer;A=t,e.HEAP8=S=new Int8Array(t),e.HEAP16=new Int16Array(t),e.HEAP32=E=new Int32Array(t),e.HEAPU8=T=new Uint8Array(t),e.HEAPU16=new Uint16Array(t),e.HEAPU32=M=new Uint32Array(t),e.HEAPF32=new Float32Array(t),e.HEAPF64=new Float64Array(t)}var U,F=[],I=[],W=[],H=[],L=0;function z(){var t=e.preRun.shift();F.unshift(t)}var Y,B=0,G=null,N=null;function V(t){throw e.onAbort&&e.onAbort(t),w(t="Aborted("+t+")"),C=!0,t=new WebAssembly.RuntimeError(t+". Build with -sASSERTIONS for more info."),a(t),t}function $(){return Y.startsWith("data:application/octet-stream;base64,")}if(Y="ort-wasm.wasm",!$()){var q=Y;Y=e.locateFile?e.locateFile(q,m):m+q}function X(){var t=Y;try{if(t==Y&&g)return new Uint8Array(g);if(u)return u(t);throw"both async and sync fetching of the wasm failed"}catch(t){V(t)}}function J(t){this.name="ExitStatus",this.message="Program terminated with exit("+t+")",this.status=t}function Z(t){for(;0>2>>>0]=t},this.Eb=function(){return M[this.zb+4>>2>>>0]},this.Sb=function(t){M[this.zb+8>>2>>>0]=t},this.Wb=function(){return M[this.zb+8>>2>>>0]},this.Tb=function(){E[this.zb>>2>>>0]=0},this.Ib=function(t){S[this.zb+12>>0>>>0]=t?1:0},this.Pb=function(){return 0!=S[this.zb+12>>0>>>0]},this.Jb=function(t){S[this.zb+13>>0>>>0]=t?1:0},this.Lb=function(){return 0!=S[this.zb+13>>0>>>0]},this.Rb=function(t,e){this.Fb(0),this.Ub(t),this.Sb(e),this.Tb(),this.Ib(!1),this.Jb(!1)},this.Nb=function(){E[this.zb>>2>>>0]+=1},this.Xb=function(){var t=E[this.zb>>2>>>0];return E[this.zb>>2>>>0]=t-1,1===t},this.Fb=function(t){M[this.zb+16>>2>>>0]=t},this.Ob=function(){return M[this.zb+16>>2>>>0]},this.Qb=function(){if(Mt(this.Eb()))return M[this.Db>>2>>>0];var t=this.Ob();return 0!==t?t:this.Db}}function nt(t){return vt(new et(t).zb)}var rt=[];function at(t){var e=rt[t];return e||(t>=rt.length&&(rt.length=t+1),rt[t]=e=U.get(t)),e}function it(t){var e=D(t)+1,n=gt(e);return n&&k(t,S,n,e),n}var ot={};function ut(){if(!ct){var t,e={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:("object"==typeof navigator&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:p||"./this.program"};for(t in ot)void 0===ot[t]?delete e[t]:e[t]=ot[t];var n=[];for(t in e)n.push(t+"="+e[t]);ct=n}return ct}var ct,st=[null,[],[]];function lt(t,e){var n=st[t];0===e||10===e?((1===t?v:w)(R(n,0)),n.length=0):n.push(e)}var ft=0;function pt(t){return 0==t%4&&(0!=t%100||0==t%400)}var ht=[31,29,31,30,31,30,31,31,30,31,30,31],dt=[31,28,31,30,31,30,31,31,30,31,30,31];function yt(t,e,n,r){function a(t,e,n){for(t="number"==typeof t?t.toString():t||"";t.lengtht?-1:0r-t.getDate())){t.setDate(t.getDate()+e);break}e-=r-t.getDate()+1,t.setDate(1),11>n?t.setMonth(n+1):(t.setMonth(0),t.setFullYear(t.getFullYear()+1))}return n=new Date(t.getFullYear()+1,0,4),e=u(new Date(t.getFullYear(),0,4)),n=u(n),0>=o(e,t)?0>=o(n,t)?t.getFullYear()+1:t.getFullYear():t.getFullYear()-1}var s=E[r+40>>2>>>0];for(var l in r={$b:E[r>>2>>>0],Zb:E[r+4>>2>>>0],Gb:E[r+8>>2>>>0],Kb:E[r+12>>2>>>0],Hb:E[r+16>>2>>>0],Cb:E[r+20>>2>>>0],Ab:E[r+24>>2>>>0],Bb:E[r+28>>2>>>0],bc:E[r+32>>2>>>0],Yb:E[r+36>>2>>>0],ac:s?j(s):""},n=j(n),s={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})n=n.replace(new RegExp(l,"g"),s[l]);var f="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),p="January February March April May June July August September October November December".split(" ");for(l in s={"%a":function(t){return f[t.Ab].substring(0,3)},"%A":function(t){return f[t.Ab]},"%b":function(t){return p[t.Hb].substring(0,3)},"%B":function(t){return p[t.Hb]},"%C":function(t){return i((t.Cb+1900)/100|0,2)},"%d":function(t){return i(t.Kb,2)},"%e":function(t){return a(t.Kb,2," ")},"%g":function(t){return c(t).toString().substring(2)},"%G":function(t){return c(t)},"%H":function(t){return i(t.Gb,2)},"%I":function(t){return 0==(t=t.Gb)?t=12:12t.Gb?"AM":"PM"},"%S":function(t){return i(t.$b,2)},"%t":function(){return"\\t"},"%u":function(t){return t.Ab||7},"%U":function(t){return i(Math.floor((t.Bb+7-t.Ab)/7),2)},"%V":function(t){var e=Math.floor((t.Bb+7-(t.Ab+6)%7)/7);if(2>=(t.Ab+371-t.Bb-2)%7&&e++,e)53==e&&(4==(n=(t.Ab+371-t.Bb)%7)||3==n&&pt(t.Cb)||(e=1));else{e=52;var n=(t.Ab+7-t.Bb-1)%7;(4==n||5==n&&pt(t.Cb%400-1))&&e++}return i(e,2)},"%w":function(t){return t.Ab},"%W":function(t){return i(Math.floor((t.Bb+7-(t.Ab+6)%7)/7),2)},"%y":function(t){return(t.Cb+1900).toString().substring(2)},"%Y":function(t){return t.Cb+1900},"%z":function(t){var e=0<=(t=t.Yb);return t=Math.abs(t)/60,(e?"+":"-")+String("0000"+(t/60*100+t%60)).slice(-4)},"%Z":function(t){return t.ac},"%%":function(){return"%"}},n=n.replace(/%%/g,"\\0\\0"),s)n.includes(l)&&(n=n.replace(new RegExp(l,"g"),s[l](r)));return l=function(t){var e=Array(D(t)+1);return k(t,e,0,e.length),e}(n=n.replace(/\\0\\0/g,"%")),l.length>e?0:(S.set(l,t>>>0),l.length-1)}var bt={a:function(t){return gt(t+24)+24},m:function(t){return(t=new et(t)).Pb()||(t.Ib(!0),K--),t.Jb(!1),Q.push(t),t.Nb(),t.Qb()},ia:function(t){throw w("Unexpected exception thrown, this is not properly supported - aborting"),C=!0,t},w:function(){Ot(0);var t=Q.pop();if(t.Xb()&&!t.Lb()){var e=t.Wb();e&&at(e)(t.Db),nt(t.Db)}tt=0},d:function(){var t=tt;if(!t)return ft=0;var e=new et(t);e.Fb(t);var n=e.Eb();if(!n)return ft=0,t;for(var r=Array.prototype.slice.call(arguments),a=0;a>>2]+4294967296*E[t+4>>>2])),E[e>>2>>>0]=t.getUTCSeconds(),E[e+4>>2>>>0]=t.getUTCMinutes(),E[e+8>>2>>>0]=t.getUTCHours(),E[e+12>>2>>>0]=t.getUTCDate(),E[e+16>>2>>>0]=t.getUTCMonth(),E[e+20>>2>>>0]=t.getUTCFullYear()-1900,E[e+24>>2>>>0]=t.getUTCDay(),E[e+28>>2>>>0]=(t.getTime()-Date.UTC(t.getUTCFullYear(),0,1,0,0,0,0))/864e5|0},Ea:function(t,e){t=new Date(1e3*(M[t>>>2]+4294967296*E[t+4>>>2])),E[e>>2>>>0]=t.getSeconds(),E[e+4>>2>>>0]=t.getMinutes(),E[e+8>>2>>>0]=t.getHours(),E[e+12>>2>>>0]=t.getDate(),E[e+16>>2>>>0]=t.getMonth(),E[e+20>>2>>>0]=t.getFullYear()-1900,E[e+24>>2>>>0]=t.getDay();var n=new Date(t.getFullYear(),0,1);E[e+28>>2>>>0]=(t.getTime()-n.getTime())/864e5|0,E[e+36>>2>>>0]=-60*t.getTimezoneOffset();var r=new Date(t.getFullYear(),6,1).getTimezoneOffset();n=n.getTimezoneOffset(),E[e+32>>2>>>0]=0|(r!=n&&t.getTimezoneOffset()==Math.min(n,r))},Fa:function(t){var e=new Date(E[t+20>>2>>>0]+1900,E[t+16>>2>>>0],E[t+12>>2>>>0],E[t+8>>2>>>0],E[t+4>>2>>>0],E[t>>2>>>0],0),n=E[t+32>>2>>>0],r=e.getTimezoneOffset(),a=new Date(e.getFullYear(),0,1),i=new Date(e.getFullYear(),6,1).getTimezoneOffset(),o=a.getTimezoneOffset(),u=Math.min(o,i);return 0>n?E[t+32>>2>>>0]=Number(i!=o&&u==r):0>2>>>0]=e.getDay(),E[t+28>>2>>>0]=(e.getTime()-a.getTime())/864e5|0,E[t>>2>>>0]=e.getSeconds(),E[t+4>>2>>>0]=e.getMinutes(),E[t+8>>2>>>0]=e.getHours(),E[t+12>>2>>>0]=e.getDate(),E[t+16>>2>>>0]=e.getMonth(),e.getTime()/1e3|0},sa:function(){return-52},ta:function(){},Ga:function t(e,n,r){t.Vb||(t.Vb=!0,function(t,e,n){function r(t){return(t=t.toTimeString().match(/\\(([A-Za-z ]+)\\)$/))?t[1]:"GMT"}var a=(new Date).getFullYear(),i=new Date(a,0,1),o=new Date(a,6,1);a=i.getTimezoneOffset();var u=o.getTimezoneOffset();E[t>>2>>>0]=60*Math.max(a,u),E[e>>2>>>0]=Number(a!=u),t=r(i),e=r(o),t=it(t),e=it(e),u>2>>>0]=t,M[n+4>>2>>>0]=e):(M[n>>2>>>0]=e,M[n+4>>2>>>0]=t)}(e,n,r))},B:function(){V("")},ma:function(){return 4294901760},I:b?()=>{var t=process.hrtime();return 1e3*t[0]+t[1]/1e6}:()=>performance.now(),xa:function(t,e,n){T.copyWithin(t>>>0,e>>>0,e+n>>>0)},G:function(t){var e=T.length;if(4294901760<(t>>>=0))return!1;for(var n=1;4>=n;n*=2){var r=e*(1+.2/n);r=Math.min(r,t+100663296);var a=Math;r=Math.max(t,r),a=a.min.call(a,4294901760,r+(65536-r%65536)%65536);t:{try{O.grow(a-A.byteLength+65535>>>16),P();var i=1;break t}catch(t){}i=void 0}if(i)return!0}return!1},va:function(t,e){var n=0;return ut().forEach((function(r,a){var i=e+n;for(a=M[t+4*a>>2>>>0]=i,i=0;i>0>>>0]=r.charCodeAt(i);S[a>>0>>>0]=0,n+=r.length+1})),0},wa:function(t,e){var n=ut();M[t>>2>>>0]=n.length;var r=0;return n.forEach((function(t){r+=t.length+1})),M[e>>2>>>0]=r,0},ba:function(t){_||0>2>>>0],u=M[e+4>>2>>>0];e+=8;for(var c=0;c>>0]);a+=u}return M[r>>2>>>0]=a,0},c:function(){return ft},ja:function t(e,r){t.Mb||(t.Mb=function(){if("object"==typeof crypto&&"function"==typeof crypto.getRandomValues){var t=new Uint8Array(1);return()=>(crypto.getRandomValues(t),t[0])}if(b)try{var e=n(Object(function(){var t=new Error("Cannot find module \'crypto\'");throw t.code="MODULE_NOT_FOUND",t}()));return()=>e.randomBytes(1)[0]}catch(t){}return()=>V("randomDevice")}());for(var a=0;a>0>>>0]=t.Mb();return 0},ea:function(t,e,n){var r=At();try{return at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},fa:function(t,e,n){var r=At();try{return at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},J:function(t){var e=At();try{return at(t)()}catch(t){if(St(e),t!==t+0)throw t;Ot(1,0)}},e:function(t,e){var n=At();try{return at(t)(e)}catch(t){if(St(n),t!==t+0)throw t;Ot(1,0)}},N:function(t,e,n){var r=At();try{return at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},O:function(t,e,n){var r=At();try{return at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},j:function(t,e,n){var r=At();try{return at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},o:function(t,e,n,r){var a=At();try{return at(t)(e,n,r)}catch(t){if(St(a),t!==t+0)throw t;Ot(1,0)}},p:function(t,e,n,r,a){var i=At();try{return at(t)(e,n,r,a)}catch(t){if(St(i),t!==t+0)throw t;Ot(1,0)}},M:function(t,e,n,r,a,i){var o=At();try{return at(t)(e,n,r,a,i)}catch(t){if(St(o),t!==t+0)throw t;Ot(1,0)}},r:function(t,e,n,r,a,i){var o=At();try{return at(t)(e,n,r,a,i)}catch(t){if(St(o),t!==t+0)throw t;Ot(1,0)}},v:function(t,e,n,r,a,i,o){var u=At();try{return at(t)(e,n,r,a,i,o)}catch(t){if(St(u),t!==t+0)throw t;Ot(1,0)}},K:function(t,e,n,r,a,i,o,u){var c=At();try{return at(t)(e,n,r,a,i,o,u)}catch(t){if(St(c),t!==t+0)throw t;Ot(1,0)}},D:function(t,e,n,r,a,i,o,u,c,s,l,f){var p=At();try{return at(t)(e,n,r,a,i,o,u,c,s,l,f)}catch(t){if(St(p),t!==t+0)throw t;Ot(1,0)}},X:function(t,e,n,r,a,i,o,u){var c=At();try{return Ft(t,e,n,r,a,i,o,u)}catch(t){if(St(c),t!==t+0)throw t;Ot(1,0)}},V:function(t,e,n,r,a,i,o){var u=At();try{return xt(t,e,n,r,a,i,o)}catch(t){if(St(u),t!==t+0)throw t;Ot(1,0)}},U:function(t,e,n,r,a){var i=At();try{return It(t,e,n,r,a)}catch(t){if(St(i),t!==t+0)throw t;Ot(1,0)}},Z:function(t,e,n,r){var a=At();try{return Pt(t,e,n,r)}catch(t){if(St(a),t!==t+0)throw t;Ot(1,0)}},W:function(t){var e=At();try{return Ct(t)}catch(t){if(St(e),t!==t+0)throw t;Ot(1,0)}},Y:function(t,e){var n=At();try{return Ut(t,e)}catch(t){if(St(n),t!==t+0)throw t;Ot(1,0)}},T:function(t,e,n){var r=At();try{return Rt(t,e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},f:function(t){var e=At();try{at(t)()}catch(t){if(St(e),t!==t+0)throw t;Ot(1,0)}},q:function(t,e){var n=At();try{at(t)(e)}catch(t){if(St(n),t!==t+0)throw t;Ot(1,0)}},h:function(t,e,n){var r=At();try{at(t)(e,n)}catch(t){if(St(r),t!==t+0)throw t;Ot(1,0)}},da:function(t,e,n,r){var a=At();try{at(t)(e,n,r)}catch(t){if(St(a),t!==t+0)throw t;Ot(1,0)}},l:function(t,e,n,r){var a=At();try{at(t)(e,n,r)}catch(t){if(St(a),t!==t+0)throw t;Ot(1,0)}},t:function(t,e,n,r,a){var i=At();try{at(t)(e,n,r,a)}catch(t){if(St(i),t!==t+0)throw t;Ot(1,0)}},u:function(t,e,n,r,a,i){var o=At();try{at(t)(e,n,r,a,i)}catch(t){if(St(o),t!==t+0)throw t;Ot(1,0)}},x:function(t,e,n,r,a,i,o){var u=At();try{at(t)(e,n,r,a,i,o)}catch(t){if(St(u),t!==t+0)throw t;Ot(1,0)}},z:function(t,e,n,r,a,i,o,u){var c=At();try{at(t)(e,n,r,a,i,o,u)}catch(t){if(St(c),t!==t+0)throw t;Ot(1,0)}},ga:function(t,e,n,r,a,i,o,u,c){var s=At();try{at(t)(e,n,r,a,i,o,u,c)}catch(t){if(St(s),t!==t+0)throw t;Ot(1,0)}},A:function(t,e,n,r,a,i,o,u,c,s,l){var f=At();try{at(t)(e,n,r,a,i,o,u,c,s,l)}catch(t){if(St(f),t!==t+0)throw t;Ot(1,0)}},C:function(t,e,n,r,a,i,o,u,c,s,l,f,p,h,d,y){var b=At();try{at(t)(e,n,r,a,i,o,u,c,s,l,f,p,h,d,y)}catch(t){if(St(b),t!==t+0)throw t;Ot(1,0)}},aa:function(t,e,n,r,a,i,o,u){var c=At();try{jt(t,e,n,r,a,i,o,u)}catch(t){if(St(c),t!==t+0)throw t;Ot(1,0)}},_:function(t,e,n,r,a,i,o,u,c,s,l,f){var p=At();try{Dt(t,e,n,r,a,i,o,u,c,s,l,f)}catch(t){if(St(p),t!==t+0)throw t;Ot(1,0)}},$:function(t,e,n,r,a,i){var o=At();try{kt(t,e,n,r,a,i)}catch(t){if(St(o),t!==t+0)throw t;Ot(1,0)}},n:function(t){return t},F:function(t){ft=t},ha:yt,y:function(t,e,n,r){return yt(t,e,n,r)}};!function(){function t(t){e.asm=t.exports,O=e.asm.Ka,P(),U=e.asm.ib,I.unshift(e.asm.La),B--,e.monitorRunDependencies&&e.monitorRunDependencies(B),0==B&&(null!==G&&(clearInterval(G),G=null),N&&(t=N,N=null,t()))}function n(e){t(e.instance)}function r(t){return function(){if(!g&&(d||y)){if("function"==typeof fetch&&!Y.startsWith("file://"))return fetch(Y,{credentials:"same-origin"}).then((function(t){if(!t.ok)throw"failed to load wasm binary file at \'"+Y+"\'";return t.arrayBuffer()})).catch((function(){return X()}));if(o)return new Promise((function(t,e){o(Y,(function(e){t(new Uint8Array(e))}),e)}))}return Promise.resolve().then((function(){return X()}))}().then((function(t){return WebAssembly.instantiate(t,i)})).then((function(t){return t})).then(t,(function(t){w("failed to asynchronously prepare wasm: "+t),V(t)}))}var i={a:bt};if(B++,e.monitorRunDependencies&&e.monitorRunDependencies(B),e.instantiateWasm)try{return e.instantiateWasm(i,t)}catch(t){return w("Module.instantiateWasm callback failed with error: "+t),!1}(g||"function"!=typeof WebAssembly.instantiateStreaming||$()||Y.startsWith("file://")||b||"function"!=typeof fetch?r(n):fetch(Y,{credentials:"same-origin"}).then((function(t){return WebAssembly.instantiateStreaming(t,i).then(n,(function(t){return w("wasm streaming compile failed: "+t),w("falling back to ArrayBuffer instantiation"),r(n)}))}))).catch(a)}(),e.___wasm_call_ctors=function(){return(e.___wasm_call_ctors=e.asm.La).apply(null,arguments)},e._OrtInit=function(){return(e._OrtInit=e.asm.Ma).apply(null,arguments)},e._OrtCreateSessionOptions=function(){return(e._OrtCreateSessionOptions=e.asm.Na).apply(null,arguments)},e._OrtAppendExecutionProvider=function(){return(e._OrtAppendExecutionProvider=e.asm.Oa).apply(null,arguments)},e._OrtAddSessionConfigEntry=function(){return(e._OrtAddSessionConfigEntry=e.asm.Pa).apply(null,arguments)},e._OrtReleaseSessionOptions=function(){return(e._OrtReleaseSessionOptions=e.asm.Qa).apply(null,arguments)},e._OrtCreateSession=function(){return(e._OrtCreateSession=e.asm.Ra).apply(null,arguments)},e._OrtReleaseSession=function(){return(e._OrtReleaseSession=e.asm.Sa).apply(null,arguments)},e._OrtGetInputCount=function(){return(e._OrtGetInputCount=e.asm.Ta).apply(null,arguments)},e._OrtGetOutputCount=function(){return(e._OrtGetOutputCount=e.asm.Ua).apply(null,arguments)},e._OrtGetInputName=function(){return(e._OrtGetInputName=e.asm.Va).apply(null,arguments)},e._OrtGetOutputName=function(){return(e._OrtGetOutputName=e.asm.Wa).apply(null,arguments)},e._OrtFree=function(){return(e._OrtFree=e.asm.Xa).apply(null,arguments)},e._OrtCreateTensor=function(){return(e._OrtCreateTensor=e.asm.Ya).apply(null,arguments)},e._OrtGetTensorData=function(){return(e._OrtGetTensorData=e.asm.Za).apply(null,arguments)},e._OrtReleaseTensor=function(){return(e._OrtReleaseTensor=e.asm._a).apply(null,arguments)},e._OrtCreateRunOptions=function(){return(e._OrtCreateRunOptions=e.asm.$a).apply(null,arguments)},e._OrtAddRunConfigEntry=function(){return(e._OrtAddRunConfigEntry=e.asm.ab).apply(null,arguments)},e._OrtReleaseRunOptions=function(){return(e._OrtReleaseRunOptions=e.asm.bb).apply(null,arguments)},e._OrtRun=function(){return(e._OrtRun=e.asm.cb).apply(null,arguments)},e._OrtEndProfiling=function(){return(e._OrtEndProfiling=e.asm.db).apply(null,arguments)};var mt,gt=e._malloc=function(){return(gt=e._malloc=e.asm.eb).apply(null,arguments)},vt=e._free=function(){return(vt=e._free=e.asm.fb).apply(null,arguments)},wt=e._fflush=function(){return(wt=e._fflush=e.asm.gb).apply(null,arguments)},_t=e.___funcs_on_exit=function(){return(_t=e.___funcs_on_exit=e.asm.hb).apply(null,arguments)},Ot=e._setThrew=function(){return(Ot=e._setThrew=e.asm.jb).apply(null,arguments)},At=e.stackSave=function(){return(At=e.stackSave=e.asm.kb).apply(null,arguments)},St=e.stackRestore=function(){return(St=e.stackRestore=e.asm.lb).apply(null,arguments)},Tt=e.stackAlloc=function(){return(Tt=e.stackAlloc=e.asm.mb).apply(null,arguments)},Et=e.___cxa_can_catch=function(){return(Et=e.___cxa_can_catch=e.asm.nb).apply(null,arguments)},Mt=e.___cxa_is_pointer_type=function(){return(Mt=e.___cxa_is_pointer_type=e.asm.ob).apply(null,arguments)},Ct=e.dynCall_j=function(){return(Ct=e.dynCall_j=e.asm.pb).apply(null,arguments)},xt=e.dynCall_iiiiij=function(){return(xt=e.dynCall_iiiiij=e.asm.qb).apply(null,arguments)},Rt=e.dynCall_jii=function(){return(Rt=e.dynCall_jii=e.asm.rb).apply(null,arguments)},jt=e.dynCall_viiiiij=function(){return(jt=e.dynCall_viiiiij=e.asm.sb).apply(null,arguments)},kt=e.dynCall_vjji=function(){return(kt=e.dynCall_vjji=e.asm.tb).apply(null,arguments)},Dt=e.dynCall_viiijjjii=function(){return(Dt=e.dynCall_viiijjjii=e.asm.ub).apply(null,arguments)},Pt=e.dynCall_iij=function(){return(Pt=e.dynCall_iij=e.asm.vb).apply(null,arguments)},Ut=e.dynCall_ji=function(){return(Ut=e.dynCall_ji=e.asm.wb).apply(null,arguments)},Ft=e.dynCall_iiiiiij=function(){return(Ft=e.dynCall_iiiiiij=e.asm.xb).apply(null,arguments)},It=e.dynCall_iiij=function(){return(It=e.dynCall_iiij=e.asm.yb).apply(null,arguments)};function Wt(){function t(){if(!mt&&(mt=!0,e.calledRun=!0,!C)){if(Z(I),r(e),e.onRuntimeInitialized&&e.onRuntimeInitialized(),e.postRun)for("function"==typeof e.postRun&&(e.postRun=[e.postRun]);e.postRun.length;){var t=e.postRun.shift();H.unshift(t)}Z(H)}}if(!(0{"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.iterateExtraOptions=void 0,e.iterateExtraOptions=(t,n,r,a)=>{if("object"==typeof t&&null!==t){if(r.has(t))throw new Error("Circular reference in options");r.add(t)}Object.entries(t).forEach((([t,i])=>{const o=n?n+t:t;if("object"==typeof i)(0,e.iterateExtraOptions)(i,o+".",r,a);else if("string"==typeof i||"number"==typeof i)a(o,i.toString());else{if("boolean"!=typeof i)throw new Error("Can\'t handle extra config type: "+typeof i);a(o,i?"1":"0")}}))}},586:(t,e,n)=>{"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.setRunOptions=void 0;const r=n(967),a=n(983),i=n(361);e.setRunOptions=t=>{const e=(0,i.getInstance)();let n=0;const o=[],u=t||{};try{if(void 0===(null==t?void 0:t.logSeverityLevel))u.logSeverityLevel=2;else if("number"!=typeof t.logSeverityLevel||!Number.isInteger(t.logSeverityLevel)||t.logSeverityLevel<0||t.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${t.logSeverityLevel}`);if(void 0===(null==t?void 0:t.logVerbosityLevel))u.logVerbosityLevel=0;else if("number"!=typeof t.logVerbosityLevel||!Number.isInteger(t.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${t.logVerbosityLevel}`);void 0===(null==t?void 0:t.terminate)&&(u.terminate=!1);let i=0;if(void 0!==(null==t?void 0:t.tag)&&(i=(0,a.allocWasmString)(t.tag,o)),n=e._OrtCreateRunOptions(u.logSeverityLevel,u.logVerbosityLevel,!!u.terminate,i),0===n)throw new Error("Can\'t create run options");return void 0!==(null==t?void 0:t.extra)&&(0,r.iterateExtraOptions)(t.extra,"",new WeakSet,((t,r)=>{const i=(0,a.allocWasmString)(t,o),u=(0,a.allocWasmString)(r,o);if(0!==e._OrtAddRunConfigEntry(n,i,u))throw new Error(`Can\'t set a run config entry: ${t} - ${r}`)})),[n,o]}catch(t){throw 0!==n&&e._OrtReleaseRunOptions(n),o.forEach(e._free),t}}},919:(t,e,n)=>{"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.setSessionOptions=void 0;const r=n(967),a=n(983),i=n(361);e.setSessionOptions=t=>{const e=(0,i.getInstance)();let n=0;const o=[],u=t||{};(t=>{t.extra||(t.extra={}),t.extra.session||(t.extra.session={});const e=t.extra.session;e.use_ort_model_bytes_directly||(e.use_ort_model_bytes_directly="1")})(u);try{void 0===(null==t?void 0:t.graphOptimizationLevel)&&(u.graphOptimizationLevel="all");const c=(t=>{switch(t){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${t}`)}})(u.graphOptimizationLevel);void 0===(null==t?void 0:t.enableCpuMemArena)&&(u.enableCpuMemArena=!0),void 0===(null==t?void 0:t.enableMemPattern)&&(u.enableMemPattern=!0),void 0===(null==t?void 0:t.executionMode)&&(u.executionMode="sequential");const s=(t=>{switch(t){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${t}`)}})(u.executionMode);let l=0;if(void 0!==(null==t?void 0:t.logId)&&(l=(0,a.allocWasmString)(t.logId,o)),void 0===(null==t?void 0:t.logSeverityLevel))u.logSeverityLevel=2;else if("number"!=typeof t.logSeverityLevel||!Number.isInteger(t.logSeverityLevel)||t.logSeverityLevel<0||t.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${t.logSeverityLevel}`);if(void 0===(null==t?void 0:t.logVerbosityLevel))u.logVerbosityLevel=0;else if("number"!=typeof t.logVerbosityLevel||!Number.isInteger(t.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${t.logVerbosityLevel}`);if(void 0===(null==t?void 0:t.enableProfiling)&&(u.enableProfiling=!1),n=e._OrtCreateSessionOptions(c,!!u.enableCpuMemArena,!!u.enableMemPattern,s,!!u.enableProfiling,0,l,u.logSeverityLevel,u.logVerbosityLevel),0===n)throw new Error("Can\'t create session options");return(null==t?void 0:t.executionProviders)&&((t,e,n)=>{for(const r of e){let e="string"==typeof r?r:r.name;switch(e){case"xnnpack":e="XNNPACK";break;case"wasm":case"cpu":continue;default:throw new Error(`not supported EP: ${e}`)}const o=(0,a.allocWasmString)(e,n);if(0!==(0,i.getInstance)()._OrtAppendExecutionProvider(t,o))throw new Error(`Can\'t append execution provider: ${e}`)}})(n,t.executionProviders,o),void 0!==(null==t?void 0:t.extra)&&(0,r.iterateExtraOptions)(t.extra,"",new WeakSet,((t,r)=>{const i=(0,a.allocWasmString)(t,o),u=(0,a.allocWasmString)(r,o);if(0!==e._OrtAddSessionConfigEntry(n,i,u))throw new Error(`Can\'t set a session config entry: ${t} - ${r}`)})),[n,o]}catch(t){throw 0!==n&&e._OrtReleaseSessionOptions(n),o.forEach(e._free),t}}},983:(t,e,n)=>{"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.allocWasmString=void 0;const r=n(361);e.allocWasmString=(t,e)=>{const n=(0,r.getInstance)(),a=n.lengthBytesUTF8(t)+1,i=n._malloc(a);return n.stringToUTF8(t,i,a),e.push(i),i}},349:(t,e,n)=>{"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.extractTransferableBuffers=e.endProfiling=e.run=e.releaseSession=e.createSession=e.createSessionFinalize=e.createSessionAllocate=e.initOrt=void 0;const r=n(586),a=n(919),i=n(983),o=n(361);e.initOrt=(t,e)=>{const n=(0,o.getInstance)()._OrtInit(t,e);if(0!==n)throw new Error(`Can\'t initialize onnxruntime. error code = ${n}`)};const u=new Map;e.createSessionAllocate=t=>{const e=(0,o.getInstance)(),n=e._malloc(t.byteLength);return e.HEAPU8.set(t,n),[n,t.byteLength]},e.createSessionFinalize=(t,e)=>{const n=(0,o.getInstance)();let r=0,i=0,c=[];try{if([i,c]=(0,a.setSessionOptions)(e),r=n._OrtCreateSession(t[0],t[1],i),0===r)throw new Error("Can\'t create a session")}finally{n._free(t[0]),n._OrtReleaseSessionOptions(i),c.forEach(n._free)}const s=n._OrtGetInputCount(r),l=n._OrtGetOutputCount(r),f=[],p=[],h=[],d=[];for(let t=0;t{const r=(0,e.createSessionAllocate)(t);return(0,e.createSessionFinalize)(r,n)},e.releaseSession=t=>{const e=(0,o.getInstance)(),n=u.get(t);if(!n)throw new Error("invalid session id");const r=n[0],a=n[1],i=n[2];a.forEach(e._OrtFree),i.forEach(e._OrtFree),e._OrtReleaseSession(r),u.delete(t)};const c=t=>{switch(t){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;default:throw new Error(`unsupported data type: ${t}`)}},s=t=>{switch(t){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";default:throw new Error(`unsupported data type: ${t}`)}},l=t=>{switch(t){case"float32":return Float32Array;case"uint8":case"bool":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${t}`)}};e.run=(t,e,n,a,f)=>{const p=(0,o.getInstance)(),h=u.get(t);if(!h)throw new Error("invalid session id");const d=h[0],y=h[1],b=h[2],m=e.length,g=a.length;let v=0,w=[];const _=[],O=[];try{[v,w]=(0,r.setRunOptions)(f);for(let t=0;tp.HEAP32[t++]=e));const n=p._OrtCreateTensor(c(e),o,u,l,r.length);if(0===n)throw new Error("Can\'t create a tensor");_.push(n)}finally{p.stackRestore(s)}}const t=p.stackSave(),o=p.stackAlloc(4*m),u=p.stackAlloc(4*m),h=p.stackAlloc(4*g),A=p.stackAlloc(4*g);try{let n=o/4,r=u/4,i=h/4,c=A/4;for(let t=0;tt*e));if(a=s(o),"string"===a){const t=[];let e=i/4;for(let n=0;n{const e=(0,o.getInstance)(),n=u.get(t);if(!n)throw new Error("invalid session id");const r=n[0],a=e._OrtEndProfiling(r);if(0===a)throw new Error("Can\'t get an profile file name");e._OrtFree(a)},e.extractTransferableBuffers=t=>{const e=[];for(const n of t){const t=n[2];!Array.isArray(t)&&t.buffer&&e.push(t.buffer)}return e}},361:function(t,e,n){"use strict";var r=this&&this.__createBinding||(Object.create?function(t,e,n,r){void 0===r&&(r=n);var a=Object.getOwnPropertyDescriptor(e,n);a&&!("get"in a?!e.__esModule:a.writable||a.configurable)||(a={enumerable:!0,get:function(){return e[n]}}),Object.defineProperty(t,r,a)}:function(t,e,n,r){void 0===r&&(r=n),t[r]=e[n]}),a=this&&this.__setModuleDefault||(Object.create?function(t,e){Object.defineProperty(t,"default",{enumerable:!0,value:e})}:function(t,e){t.default=e}),i=this&&this.__importStar||function(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var n in t)"default"!==n&&Object.prototype.hasOwnProperty.call(t,n)&&r(e,t,n);return a(e,t),e},o=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(e,"__esModule",{value:!0}),e.dispose=e.getInstance=e.initializeWebAssembly=void 0;const u=i(n(449)),c=o(n(932)),s=n(474);let l,f=!1,p=!1,h=!1;const d=(t,e)=>e?t?"ort-wasm-simd-threaded.wasm":"ort-wasm-threaded.wasm":t?"ort-wasm-simd.wasm":"ort-wasm.wasm";e.initializeWebAssembly=async t=>{if(f)return Promise.resolve();if(p)throw new Error("multiple calls to \'initializeWebAssembly()\' detected.");if(h)throw new Error("previous call to \'initializeWebAssembly()\' failed.");p=!0;const e=t.initTimeout,r=t.numThreads,a=t.simd,i=r>1&&(()=>{try{return"undefined"!=typeof SharedArrayBuffer&&("undefined"!=typeof MessageChannel&&(new MessageChannel).port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11])))}catch(t){return!1}})(),o=a&&(()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch(t){return!1}})(),y="string"==typeof t.wasmPaths?t.wasmPaths:void 0,b=d(!1,i),m=d(o,i),g="object"==typeof t.wasmPaths?t.wasmPaths[m]:void 0;let v=!1;const w=[];if(e>0&&w.push(new Promise((t=>{setTimeout((()=>{v=!0,t()}),e)}))),w.push(new Promise(((t,e)=>{const r=i?s:c.default,a={locateFile:(t,e)=>i&&t.endsWith(".worker.js")&&"undefined"!=typeof Blob?URL.createObjectURL(new Blob([n(154)],{type:"text/javascript"})):t===b?null!=g?g:(null!=y?y:e)+m:e+t};if(i)if("undefined"==typeof Blob)a.mainScriptUrlOrBlob=u.join("/","ort-wasm-threaded.js");else{const t=`var ortWasmThreaded=(function(){var _scriptDir;return ${r.toString()}})();`;a.mainScriptUrlOrBlob=new Blob([t],{type:"text/javascript"})}r(a).then((e=>{p=!1,f=!0,l=e,t()}),(t=>{p=!1,h=!0,e(t)}))}))),await Promise.race(w),v)throw new Error(`WebAssembly backend initializing failed due to timeout: ${e}ms`)},e.getInstance=()=>{if(f&&l)return l;throw new Error("WebAssembly is not initialized yet.")},e.dispose=()=>{var t;!f||p||h||(p=!0,null===(t=l.PThread)||void 0===t||t.terminateAllThreads(),l=void 0,p=!1,f=!1,h=!0)}},154:t=>{"use strict";t.exports=\'"use strict";var e={},t="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node;if(t){var r=require("worker_threads"),a=r.parentPort;a.on("message",(e=>onmessage({data:e})));var o=require("fs");Object.assign(global,{self:global,require:require,Module:e,location:{href:__filename},Worker:r.Worker,importScripts:function(e){(0,eval)(o.readFileSync(e,"utf8"))},postMessage:function(e){a.postMessage(e)},performance:global.performance||{now:function(){return Date.now()}}})}var s=!1,n=[],i=function(){var e=Array.prototype.slice.call(arguments).join(" ");t?o.writeSync(2,e+"\\\\n"):console.error(e)};self.alert=function(){var t=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:t,threadId:e._pthread_self()})},e.instantiateWasm=(t,r)=>{var a=new WebAssembly.Instance(e.wasmModule,t);return r(a),e.wasmModule=null,a.exports},self.onunhandledrejection=e=>{throw e.reason??e},self.onmessage=t=>{try{if("load"===t.data.cmd){if(e.wasmModule=t.data.wasmModule,e.wasmMemory=t.data.wasmMemory,e.buffer=e.wasmMemory.buffer,e.ENVIRONMENT_IS_PTHREAD=!0,"string"==typeof t.data.urlOrBlob)importScripts(t.data.urlOrBlob);else{var r=URL.createObjectURL(t.data.urlOrBlob);importScripts(r),URL.revokeObjectURL(r)}ortWasmThreaded(e).then((function(t){e=t}))}else if("run"===t.data.cmd){e.__performance_now_clock_drift=performance.now()-t.data.time,e.__emscripten_thread_init(t.data.pthread_ptr,0,0,1),e.establishStackSpace(),e.PThread.receiveObjectTransfer(t.data),e.PThread.threadInitTLS(),s||(n.forEach((t=>{e.executeNotifiedProxyingQueue(t)})),n=[],s=!0);try{e.invokeEntryPoint(t.data.start_routine,t.data.arg)}catch(t){if("unwind"!=t){if(!(t instanceof e.ExitStatus))throw t;e.keepRuntimeAlive()||e.__emscripten_thread_exit(t.status)}}}else"cancel"===t.data.cmd?e._pthread_self()&&e.__emscripten_thread_exit(-1):"setimmediate"===t.data.target||("processProxyingQueue"===t.data.cmd?s?e.executeNotifiedProxyingQueue(t.data.queue):n.push(t.data.queue):(i("worker.js received unknown command "+t.data.cmd),i(t.data)))}catch(t){throw i("worker.js onmessage() captured an uncaught exception: "+t),t&&t.stack&&i(t.stack),e.__emscripten_thread_crashed&&e.__emscripten_thread_crashed(),t}};\\n\'},384:()=>{},993:()=>{},908:()=>{},953:()=>{},925:()=>{},449:()=>{}},e={};function n(r){var a=e[r];if(void 0!==a)return a.exports;var i=e[r]={exports:{}};return t[r].call(i.exports,i,i.exports,n),i.exports}n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(t){if("object"==typeof window)return window}}(),(()=>{"use strict";const t=n(349),e=n(361);self.onmessage=n=>{switch(n.data.type){case"init-wasm":(0,e.initializeWebAssembly)(n.data.in).then((()=>postMessage({type:"init-wasm"})),(t=>postMessage({type:"init-wasm",err:t})));break;case"init-ort":try{const{numThreads:e,loggingLevel:r}=n.data.in;(0,t.initOrt)(e,r),postMessage({type:"init-ort"})}catch(t){postMessage({type:"init-ort",err:t})}break;case"create_allocate":try{const{model:e}=n.data.in,r=(0,t.createSessionAllocate)(e);postMessage({type:"create_allocate",out:r})}catch(t){postMessage({type:"create_allocate",err:t})}break;case"create_finalize":try{const{modeldata:e,options:r}=n.data.in,a=(0,t.createSessionFinalize)(e,r);postMessage({type:"create_finalize",out:a})}catch(t){postMessage({type:"create_finalize",err:t})}break;case"create":try{const{model:e,options:r}=n.data.in,a=(0,t.createSession)(e,r);postMessage({type:"create",out:a})}catch(t){postMessage({type:"create",err:t})}break;case"release":try{const e=n.data.in;(0,t.releaseSession)(e),postMessage({type:"release"})}catch(t){postMessage({type:"release",err:t})}break;case"run":try{const{sessionId:e,inputIndices:r,inputs:a,outputIndices:i,options:o}=n.data.in,u=(0,t.run)(e,r,a,i,o);postMessage({type:"run",out:u},(0,t.extractTransferableBuffers)(u))}catch(t){postMessage({type:"run",err:t})}break;case"end-profiling":try{const e=n.data.in;(0,t.endProfiling)(e),postMessage({type:"end-profiling"})}catch(t){postMessage({type:"end-profiling",err:t})}}}})()})();\n',"Worker",void 0,void 0)}},477:e=>{"use strict";e.exports=function(e,t,n,r){var o=self||window;try{try{var i;try{i=new o.Blob([e])}catch(t){(i=new(o.BlobBuilder||o.WebKitBlobBuilder||o.MozBlobBuilder||o.MSBlobBuilder)).append(e),i=i.getBlob()}var a=o.URL||o.webkitURL,s=a.createObjectURL(i),u=new o[t](s,n);return a.revokeObjectURL(s),u}catch(r){return new o[t]("data:application/javascript,".concat(encodeURIComponent(e)),n)}}catch(e){if(!r)throw Error("Inline worker is not supported");return new o[t](r,n)}}},4154:e=>{"use strict";e.exports='"use strict";var e={},t="object"==typeof process&&"object"==typeof process.versions&&"string"==typeof process.versions.node;if(t){var r=require("worker_threads"),a=r.parentPort;a.on("message",(e=>onmessage({data:e})));var o=require("fs");Object.assign(global,{self:global,require:require,Module:e,location:{href:__filename},Worker:r.Worker,importScripts:function(e){(0,eval)(o.readFileSync(e,"utf8"))},postMessage:function(e){a.postMessage(e)},performance:global.performance||{now:function(){return Date.now()}}})}var s=!1,n=[],i=function(){var e=Array.prototype.slice.call(arguments).join(" ");t?o.writeSync(2,e+"\\n"):console.error(e)};self.alert=function(){var t=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:t,threadId:e._pthread_self()})},e.instantiateWasm=(t,r)=>{var a=new WebAssembly.Instance(e.wasmModule,t);return r(a),e.wasmModule=null,a.exports},self.onunhandledrejection=e=>{throw e.reason??e},self.onmessage=t=>{try{if("load"===t.data.cmd){if(e.wasmModule=t.data.wasmModule,e.wasmMemory=t.data.wasmMemory,e.buffer=e.wasmMemory.buffer,e.ENVIRONMENT_IS_PTHREAD=!0,"string"==typeof t.data.urlOrBlob)importScripts(t.data.urlOrBlob);else{var r=URL.createObjectURL(t.data.urlOrBlob);importScripts(r),URL.revokeObjectURL(r)}ortWasmThreaded(e).then((function(t){e=t}))}else if("run"===t.data.cmd){e.__performance_now_clock_drift=performance.now()-t.data.time,e.__emscripten_thread_init(t.data.pthread_ptr,0,0,1),e.establishStackSpace(),e.PThread.receiveObjectTransfer(t.data),e.PThread.threadInitTLS(),s||(n.forEach((t=>{e.executeNotifiedProxyingQueue(t)})),n=[],s=!0);try{e.invokeEntryPoint(t.data.start_routine,t.data.arg)}catch(t){if("unwind"!=t){if(!(t instanceof e.ExitStatus))throw t;e.keepRuntimeAlive()||e.__emscripten_thread_exit(t.status)}}}else"cancel"===t.data.cmd?e._pthread_self()&&e.__emscripten_thread_exit(-1):"setimmediate"===t.data.target||("processProxyingQueue"===t.data.cmd?s?e.executeNotifiedProxyingQueue(t.data.queue):n.push(t.data.queue):(i("worker.js received unknown command "+t.data.cmd),i(t.data)))}catch(t){throw i("worker.js onmessage() captured an uncaught exception: "+t),t&&t.stack&&i(t.stack),e.__emscripten_thread_crashed&&e.__emscripten_thread_crashed(),t}};\n'},1670:e=>{"use strict";e.exports=__WEBPACK_EXTERNAL_MODULE__1670__},7067:()=>{},1296:()=>{},1384:()=>{},3993:()=>{},908:()=>{},6953:()=>{},9925:()=>{},2806:()=>{},6449:()=>{},2850:()=>{},5381:()=>{},5686:(e,t,n)=>{"use strict";n.r(t),n.d(t,{flatbuffers:()=>r});var r={};r.Offset,r.Table,r.SIZEOF_SHORT=2,r.SIZEOF_INT=4,r.FILE_IDENTIFIER_LENGTH=4,r.SIZE_PREFIX_LENGTH=4,r.Encoding={UTF8_BYTES:1,UTF16_STRING:2},r.int32=new Int32Array(2),r.float32=new Float32Array(r.int32.buffer),r.float64=new Float64Array(r.int32.buffer),r.isLittleEndian=1===new Uint16Array(new Uint8Array([1,0]).buffer)[0],r.Long=function(e,t){this.low=0|e,this.high=0|t},r.Long.create=function(e,t){return 0==e&&0==t?r.Long.ZERO:new r.Long(e,t)},r.Long.prototype.toFloat64=function(){return(this.low>>>0)+4294967296*this.high},r.Long.prototype.equals=function(e){return this.low==e.low&&this.high==e.high},r.Long.ZERO=new r.Long(0,0),r.Builder=function(e){if(e)t=e;else var t=1024;this.bb=r.ByteBuffer.allocate(t),this.space=t,this.minalign=1,this.vtable=null,this.vtable_in_use=0,this.isNested=!1,this.object_start=0,this.vtables=[],this.vector_num_elems=0,this.force_defaults=!1},r.Builder.prototype.clear=function(){this.bb.clear(),this.space=this.bb.capacity(),this.minalign=1,this.vtable=null,this.vtable_in_use=0,this.isNested=!1,this.object_start=0,this.vtables=[],this.vector_num_elems=0,this.force_defaults=!1},r.Builder.prototype.forceDefaults=function(e){this.force_defaults=e},r.Builder.prototype.dataBuffer=function(){return this.bb},r.Builder.prototype.asUint8Array=function(){return this.bb.bytes().subarray(this.bb.position(),this.bb.position()+this.offset())},r.Builder.prototype.prep=function(e,t){e>this.minalign&&(this.minalign=e);for(var n=1+~(this.bb.capacity()-this.space+t)&e-1;this.space=0&&0==this.vtable[t];t--);for(var n=t+1;t>=0;t--)this.addInt16(0!=this.vtable[t]?e-this.vtable[t]:0);this.addInt16(e-this.object_start);var o=(n+2)*r.SIZEOF_SHORT;this.addInt16(o);var i=0,a=this.space;e:for(t=0;t=0;a--)this.writeInt8(i.charCodeAt(a))}this.prep(this.minalign,r.SIZEOF_INT+o),this.addOffset(e),o&&this.addInt32(this.bb.capacity()-this.space),this.bb.setPosition(this.space)},r.Builder.prototype.finishSizePrefixed=function(e,t){this.finish(e,t,!0)},r.Builder.prototype.requiredField=function(e,t){var n=this.bb.capacity()-e,r=n-this.bb.readInt32(n);if(0==this.bb.readInt16(r+t))throw new Error("FlatBuffers: field "+t+" must be set")},r.Builder.prototype.startVector=function(e,t,n){this.notNested(),this.vector_num_elems=t,this.prep(r.SIZEOF_INT,e*t),this.prep(n,e*t)},r.Builder.prototype.endVector=function(){return this.writeInt32(this.vector_num_elems),this.offset()},r.Builder.prototype.createString=function(e){if(e instanceof Uint8Array)var t=e;else{t=[];for(var n=0;n=56320?o:(o<<10)+e.charCodeAt(n++)+-56613888)<128?t.push(r):(r<2048?t.push(r>>6&31|192):(r<65536?t.push(r>>12&15|224):t.push(r>>18&7|240,r>>12&63|128),t.push(r>>6&63|128)),t.push(63&r|128))}}this.addInt8(0),this.startVector(1,t.length,1),this.bb.setPosition(this.space-=t.length),n=0;for(var i=this.space,a=this.bb.bytes();n>24},r.ByteBuffer.prototype.readUint8=function(e){return this.bytes_[e]},r.ByteBuffer.prototype.readInt16=function(e){return this.readUint16(e)<<16>>16},r.ByteBuffer.prototype.readUint16=function(e){return this.bytes_[e]|this.bytes_[e+1]<<8},r.ByteBuffer.prototype.readInt32=function(e){return this.bytes_[e]|this.bytes_[e+1]<<8|this.bytes_[e+2]<<16|this.bytes_[e+3]<<24},r.ByteBuffer.prototype.readUint32=function(e){return this.readInt32(e)>>>0},r.ByteBuffer.prototype.readInt64=function(e){return new r.Long(this.readInt32(e),this.readInt32(e+4))},r.ByteBuffer.prototype.readUint64=function(e){return new r.Long(this.readUint32(e),this.readUint32(e+4))},r.ByteBuffer.prototype.readFloat32=function(e){return r.int32[0]=this.readInt32(e),r.float32[0]},r.ByteBuffer.prototype.readFloat64=function(e){return r.int32[r.isLittleEndian?0:1]=this.readInt32(e),r.int32[r.isLittleEndian?1:0]=this.readInt32(e+4),r.float64[0]},r.ByteBuffer.prototype.writeInt8=function(e,t){this.bytes_[e]=t},r.ByteBuffer.prototype.writeUint8=function(e,t){this.bytes_[e]=t},r.ByteBuffer.prototype.writeInt16=function(e,t){this.bytes_[e]=t,this.bytes_[e+1]=t>>8},r.ByteBuffer.prototype.writeUint16=function(e,t){this.bytes_[e]=t,this.bytes_[e+1]=t>>8},r.ByteBuffer.prototype.writeInt32=function(e,t){this.bytes_[e]=t,this.bytes_[e+1]=t>>8,this.bytes_[e+2]=t>>16,this.bytes_[e+3]=t>>24},r.ByteBuffer.prototype.writeUint32=function(e,t){this.bytes_[e]=t,this.bytes_[e+1]=t>>8,this.bytes_[e+2]=t>>16,this.bytes_[e+3]=t>>24},r.ByteBuffer.prototype.writeInt64=function(e,t){this.writeInt32(e,t.low),this.writeInt32(e+4,t.high)},r.ByteBuffer.prototype.writeUint64=function(e,t){this.writeUint32(e,t.low),this.writeUint32(e+4,t.high)},r.ByteBuffer.prototype.writeFloat32=function(e,t){r.float32[0]=t,this.writeInt32(e,r.int32[0])},r.ByteBuffer.prototype.writeFloat64=function(e,t){r.float64[0]=t,this.writeInt32(e,r.int32[r.isLittleEndian?0:1]),this.writeInt32(e+4,r.int32[r.isLittleEndian?1:0])},r.ByteBuffer.prototype.getBufferIdentifier=function(){if(this.bytes_.length>10),56320+(1023&a)))}return o},r.ByteBuffer.prototype.__indirect=function(e){return e+this.readInt32(e)},r.ByteBuffer.prototype.__vector=function(e){return e+this.readInt32(e)+r.SIZEOF_INT},r.ByteBuffer.prototype.__vector_len=function(e){return this.readInt32(e+this.readInt32(e))},r.ByteBuffer.prototype.__has_identifier=function(e){if(e.length!=r.FILE_IDENTIFIER_LENGTH)throw new Error("FlatBuffers: file identifier must be length "+r.FILE_IDENTIFIER_LENGTH);for(var t=0;t{var t=e&&e.__esModule?()=>e.default:()=>e;return __nested_webpack_require_546802__.d(t,{a:t}),t},__nested_webpack_require_546802__.d=(e,t)=>{for(var n in t)__nested_webpack_require_546802__.o(t,n)&&!__nested_webpack_require_546802__.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},__nested_webpack_require_546802__.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),__nested_webpack_require_546802__.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),__nested_webpack_require_546802__.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};var __nested_webpack_exports__=__nested_webpack_require_546802__(6018);return __nested_webpack_exports__})(),module.exports=e(__webpack_require__(450))},606:e=>{var t,n,r=e.exports={};function o(){throw new Error("setTimeout has not been defined")}function i(){throw new Error("clearTimeout has not been defined")}function a(e){if(t===setTimeout)return setTimeout(e,0);if((t===o||!t)&&setTimeout)return t=setTimeout,setTimeout(e,0);try{return t(e,0)}catch(n){try{return t.call(null,e,0)}catch(n){return t.call(this,e,0)}}}!function(){try{t="function"==typeof setTimeout?setTimeout:o}catch(e){t=o}try{n="function"==typeof clearTimeout?clearTimeout:i}catch(e){n=i}}();var s,u=[],l=!1,c=-1;function f(){l&&s&&(l=!1,s.length?u=s.concat(u):c=-1,u.length&&d())}function d(){if(!l){var e=a(f);l=!0;for(var t=u.length;t;){for(s=u,u=[];++c1)for(var n=1;n{"use strict";var r=n(540),o=n(982);function i(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n