program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229283968))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229292224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229816576))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229818688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230343040))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230345152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238733824))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238766656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247155328))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247188160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255576832))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255585088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257682304))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257690560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258214912))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258217024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258741376))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258743488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267132160))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267164992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275553664))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275586496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283975168))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283983424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286080640))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286088896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286613248))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286615360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287139712))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287141824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295530496))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295563328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303952000))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303984832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312373504))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312381760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314478976))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314487232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315011584))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315013696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315538048))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315540160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323928832))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323961664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332350336))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332383168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340771840))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340780096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342877312))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342885568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343409920))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343412032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343936384))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343938496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352327168))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352360000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360748672))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360781504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369170176))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369178432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371275648))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371283904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371808256))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371810368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372334720))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372336832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380725504))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380758336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389147008))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389179840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397568512))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397576768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399673984))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399682240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400206592))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400208704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400733056))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400735168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409123840))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409156672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417545344))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417578176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425966848))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425975104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428072320))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428080576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428604928))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428607040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429131392))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429133504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437522176))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437555008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445943680))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445976512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454365184))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; int32 var_80 = const()[name = string("op_80"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_490_axis_0 = const()[name = string("op_490_axis_0"), val = int32(1)]; int32 var_490_batch_dims_0 = const()[name = string("op_490_batch_dims_0"), val = int32(0)]; bool var_490_validate_indices_0 = const()[name = string("op_490_validate_indices_0"), val = bool(false)]; tensor var_85_to_fp16 = const()[name = string("op_85_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454373440)))]; tensor var_490_cast_fp16 = gather(axis = var_490_axis_0, batch_dims = var_490_batch_dims_0, indices = select_0, validate_indices = var_490_validate_indices_0, x = var_85_to_fp16)[name = string("op_490_cast_fp16")]; tensor var_491 = const()[name = string("op_491"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_491, x = var_490_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_495_axis_0 = const()[name = string("op_495_axis_0"), val = int32(1)]; int32 var_495_batch_dims_0 = const()[name = string("op_495_batch_dims_0"), val = int32(0)]; bool var_495_validate_indices_0 = const()[name = string("op_495_validate_indices_0"), val = bool(false)]; tensor var_79_to_fp16 = const()[name = string("op_79_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471150720)))]; tensor var_495_cast_fp16 = gather(axis = var_495_axis_0, batch_dims = var_495_batch_dims_0, indices = select_0, validate_indices = var_495_validate_indices_0, x = var_79_to_fp16)[name = string("op_495_cast_fp16")]; tensor var_496 = const()[name = string("op_496"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_496, x = var_495_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_504_axes_0 = const()[name = string("op_504_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(487928000)))]; fp16 var_75_to_fp16 = const()[name = string("op_75_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_504_cast_fp16 = layer_norm(axes = var_504_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_504_cast_fp16")]; tensor var_507 = const()[name = string("op_507"), val = tensor([0, 2, 1])]; tensor var_509_axes_0 = const()[name = string("op_509_axes_0"), val = tensor([2])]; tensor var_508 = transpose(perm = var_507, x = var_504_cast_fp16)[name = string("transpose_63")]; tensor var_509 = expand_dims(axes = var_509_axes_0, x = var_508)[name = string("op_509")]; string var_516_pad_type_0 = const()[name = string("op_516_pad_type_0"), val = string("valid")]; tensor var_516_strides_0 = const()[name = string("op_516_strides_0"), val = tensor([1, 1])]; tensor var_516_pad_0 = const()[name = string("op_516_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_516_dilations_0 = const()[name = string("op_516_dilations_0"), val = tensor([1, 1])]; int32 var_516_groups_0 = const()[name = string("op_516_groups_0"), val = int32(1)]; tensor var_516 = conv(dilations = var_516_dilations_0, groups = var_516_groups_0, pad = var_516_pad_0, pad_type = var_516_pad_type_0, strides = var_516_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_509)[name = string("op_516")]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 32, 1, 64])]; tensor var_518 = reshape(shape = var_517, x = var_516)[name = string("op_518")]; string var_525_pad_type_0 = const()[name = string("op_525_pad_type_0"), val = string("valid")]; tensor var_525_strides_0 = const()[name = string("op_525_strides_0"), val = tensor([1, 1])]; tensor var_525_pad_0 = const()[name = string("op_525_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_525_dilations_0 = const()[name = string("op_525_dilations_0"), val = tensor([1, 1])]; int32 var_525_groups_0 = const()[name = string("op_525_groups_0"), val = int32(1)]; tensor var_525 = conv(dilations = var_525_dilations_0, groups = var_525_groups_0, pad = var_525_pad_0, pad_type = var_525_pad_type_0, strides = var_525_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_509)[name = string("op_525")]; tensor var_526 = const()[name = string("op_526"), val = tensor([1, 8, 1, 64])]; tensor var_527 = reshape(shape = var_526, x = var_525)[name = string("op_527")]; string var_534_pad_type_0 = const()[name = string("op_534_pad_type_0"), val = string("valid")]; tensor var_534_strides_0 = const()[name = string("op_534_strides_0"), val = tensor([1, 1])]; tensor var_534_pad_0 = const()[name = string("op_534_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_534_dilations_0 = const()[name = string("op_534_dilations_0"), val = tensor([1, 1])]; int32 var_534_groups_0 = const()[name = string("op_534_groups_0"), val = int32(1)]; tensor var_534 = conv(dilations = var_534_dilations_0, groups = var_534_groups_0, pad = var_534_pad_0, pad_type = var_534_pad_type_0, strides = var_534_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_509)[name = string("op_534")]; tensor var_535 = const()[name = string("op_535"), val = tensor([1, 8, 1, 64])]; tensor var_536 = reshape(shape = var_535, x = var_534)[name = string("op_536")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_518)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_518)[name = string("x2_1")]; tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; tensor var_550_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_550_cast_fp16")]; tensor var_551_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_551_cast_fp16")]; tensor var_552_cast_fp16 = sub(x = var_550_cast_fp16, y = var_551_cast_fp16)[name = string("op_552_cast_fp16")]; tensor var_553_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_553_cast_fp16")]; tensor var_554_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_554_cast_fp16")]; tensor var_555_cast_fp16 = add(x = var_553_cast_fp16, y = var_554_cast_fp16)[name = string("op_555_cast_fp16")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_80, interleave = rotated_1_interleave_0, values = (var_552_cast_fp16, var_555_cast_fp16))[name = string("rotated_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_527)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_527)[name = string("x2_3")]; tensor var_571_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_571_cast_fp16")]; tensor var_572_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_572_cast_fp16")]; tensor var_573_cast_fp16 = sub(x = var_571_cast_fp16, y = var_572_cast_fp16)[name = string("op_573_cast_fp16")]; tensor var_574_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_574_cast_fp16")]; tensor var_575_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_575_cast_fp16")]; tensor var_576_cast_fp16 = add(x = var_574_cast_fp16, y = var_575_cast_fp16)[name = string("op_576_cast_fp16")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_80, interleave = rotated_3_interleave_0, values = (var_573_cast_fp16, var_576_cast_fp16))[name = string("rotated_3_cast_fp16")]; int32 var_580 = const()[name = string("op_580"), val = int32(1)]; tensor var_581 = add(x = current_pos, y = var_580)[name = string("op_581")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_581, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_581, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_536, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_596_begin_0 = const()[name = string("op_596_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_596_end_0 = const()[name = string("op_596_end_0"), val = tensor([1, 8, 1024, 64])]; tensor var_596_end_mask_0 = const()[name = string("op_596_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_596_cast_fp16 = slice_by_index(begin = var_596_begin_0, end = var_596_end_0, end_mask = var_596_end_mask_0, x = coreml_update_state_33)[name = string("op_596_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_596_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_598_begin_0 = const()[name = string("op_598_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_598_end_0 = const()[name = string("op_598_end_0"), val = tensor([17, 8, 1024, 64])]; tensor var_598_end_mask_0 = const()[name = string("op_598_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_598_cast_fp16 = slice_by_index(begin = var_598_begin_0, end = var_598_end_0, end_mask = var_598_end_mask_0, x = coreml_update_state_33)[name = string("op_598_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_598_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_607 = const()[name = string("op_607"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_607, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_611 = const()[name = string("op_611"), val = tensor([1, -1, 1024, 64])]; tensor key_states_3_cast_fp16 = reshape(shape = var_611, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_614 = const()[name = string("op_614"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_614, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_618 = const()[name = string("op_618"), val = tensor([1, -1, 1024, 64])]; tensor value_states_3_cast_fp16 = reshape(shape = var_618, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_621_transpose_x_1 = const()[name = string("op_621_transpose_x_1"), val = bool(false)]; bool var_621_transpose_y_1 = const()[name = string("op_621_transpose_y_1"), val = bool(true)]; tensor var_621_cast_fp16 = matmul(transpose_x = var_621_transpose_x_1, transpose_y = var_621_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_621_cast_fp16")]; fp16 var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_621_cast_fp16, y = var_622_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_633_axes_0 = const()[name = string("op_633_axes_0"), val = tensor([-1])]; bool var_633_keep_dims_0 = const()[name = string("op_633_keep_dims_0"), val = bool(true)]; tensor var_633_cast_fp16 = reduce_sum(axes = var_633_axes_0, keep_dims = var_633_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_633_cast_fp16")]; tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_633_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_636_perm_0 = const()[name = string("op_636_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_638 = const()[name = string("op_638"), val = tensor([1, 1, 2048])]; tensor var_636_cast_fp16 = transpose(perm = var_636_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_62")]; tensor input_5_cast_fp16 = reshape(shape = var_638, x = var_636_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(487932160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490029376))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490037632)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_649_axes_0 = const()[name = string("op_649_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490041792)))]; tensor var_649_cast_fp16 = layer_norm(axes = var_649_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_649_cast_fp16")]; tensor var_656 = const()[name = string("op_656"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_657 = transpose(perm = var_656, x = var_649_cast_fp16)[name = string("transpose_61")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_657)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_679_axes_0 = const()[name = string("op_679_axes_0"), val = tensor([2])]; tensor var_679 = squeeze(axes = var_679_axes_0, x = hidden_states_7)[name = string("op_679")]; tensor var_680 = const()[name = string("op_680"), val = tensor([0, 2, 1])]; tensor var_681 = transpose(perm = var_680, x = var_679)[name = string("transpose_60")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_681)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_689_axes_0 = const()[name = string("op_689_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490045952)))]; tensor var_689_cast_fp16 = layer_norm(axes = var_689_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_689_cast_fp16")]; tensor var_692 = const()[name = string("op_692"), val = tensor([0, 2, 1])]; tensor var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor([2])]; tensor var_693 = transpose(perm = var_692, x = var_689_cast_fp16)[name = string("transpose_59")]; tensor var_694 = expand_dims(axes = var_694_axes_0, x = var_693)[name = string("op_694")]; string var_701_pad_type_0 = const()[name = string("op_701_pad_type_0"), val = string("valid")]; tensor var_701_strides_0 = const()[name = string("op_701_strides_0"), val = tensor([1, 1])]; tensor var_701_pad_0 = const()[name = string("op_701_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_701_dilations_0 = const()[name = string("op_701_dilations_0"), val = tensor([1, 1])]; int32 var_701_groups_0 = const()[name = string("op_701_groups_0"), val = int32(1)]; tensor var_701 = conv(dilations = var_701_dilations_0, groups = var_701_groups_0, pad = var_701_pad_0, pad_type = var_701_pad_type_0, strides = var_701_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_694)[name = string("op_701")]; tensor var_702 = const()[name = string("op_702"), val = tensor([1, 32, 1, 64])]; tensor var_703 = reshape(shape = var_702, x = var_701)[name = string("op_703")]; string var_710_pad_type_0 = const()[name = string("op_710_pad_type_0"), val = string("valid")]; tensor var_710_strides_0 = const()[name = string("op_710_strides_0"), val = tensor([1, 1])]; tensor var_710_pad_0 = const()[name = string("op_710_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_710_dilations_0 = const()[name = string("op_710_dilations_0"), val = tensor([1, 1])]; int32 var_710_groups_0 = const()[name = string("op_710_groups_0"), val = int32(1)]; tensor var_710 = conv(dilations = var_710_dilations_0, groups = var_710_groups_0, pad = var_710_pad_0, pad_type = var_710_pad_type_0, strides = var_710_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_694)[name = string("op_710")]; tensor var_711 = const()[name = string("op_711"), val = tensor([1, 8, 1, 64])]; tensor var_712 = reshape(shape = var_711, x = var_710)[name = string("op_712")]; string var_719_pad_type_0 = const()[name = string("op_719_pad_type_0"), val = string("valid")]; tensor var_719_strides_0 = const()[name = string("op_719_strides_0"), val = tensor([1, 1])]; tensor var_719_pad_0 = const()[name = string("op_719_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_719_dilations_0 = const()[name = string("op_719_dilations_0"), val = tensor([1, 1])]; int32 var_719_groups_0 = const()[name = string("op_719_groups_0"), val = int32(1)]; tensor var_719 = conv(dilations = var_719_dilations_0, groups = var_719_groups_0, pad = var_719_pad_0, pad_type = var_719_pad_type_0, strides = var_719_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_694)[name = string("op_719")]; tensor var_720 = const()[name = string("op_720"), val = tensor([1, 8, 1, 64])]; tensor var_721 = reshape(shape = var_720, x = var_719)[name = string("op_721")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_703)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_703)[name = string("x2_5")]; tensor var_735_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_735_cast_fp16")]; tensor var_736_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_736_cast_fp16")]; tensor var_737_cast_fp16 = sub(x = var_735_cast_fp16, y = var_736_cast_fp16)[name = string("op_737_cast_fp16")]; tensor var_738_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_738_cast_fp16")]; tensor var_739_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_740_cast_fp16 = add(x = var_738_cast_fp16, y = var_739_cast_fp16)[name = string("op_740_cast_fp16")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_80, interleave = rotated_5_interleave_0, values = (var_737_cast_fp16, var_740_cast_fp16))[name = string("rotated_5_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_712)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_712)[name = string("x2_7")]; tensor var_756_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_756_cast_fp16")]; tensor var_757_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_757_cast_fp16")]; tensor var_758_cast_fp16 = sub(x = var_756_cast_fp16, y = var_757_cast_fp16)[name = string("op_758_cast_fp16")]; tensor var_759_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_759_cast_fp16")]; tensor var_760_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_760_cast_fp16")]; tensor var_761_cast_fp16 = add(x = var_759_cast_fp16, y = var_760_cast_fp16)[name = string("op_761_cast_fp16")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_80, interleave = rotated_7_interleave_0, values = (var_758_cast_fp16, var_761_cast_fp16))[name = string("rotated_7_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_581, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_581, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_721, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_781_begin_0 = const()[name = string("op_781_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_781_end_0 = const()[name = string("op_781_end_0"), val = tensor([2, 8, 1024, 64])]; tensor var_781_end_mask_0 = const()[name = string("op_781_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_781_cast_fp16 = slice_by_index(begin = var_781_begin_0, end = var_781_end_0, end_mask = var_781_end_mask_0, x = coreml_update_state_35)[name = string("op_781_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_781_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_783_begin_0 = const()[name = string("op_783_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_783_end_0 = const()[name = string("op_783_end_0"), val = tensor([18, 8, 1024, 64])]; tensor var_783_end_mask_0 = const()[name = string("op_783_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_783_cast_fp16 = slice_by_index(begin = var_783_begin_0, end = var_783_end_0, end_mask = var_783_end_mask_0, x = coreml_update_state_35)[name = string("op_783_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_783_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_792 = const()[name = string("op_792"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_792, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_796 = const()[name = string("op_796"), val = tensor([1, -1, 1024, 64])]; tensor key_states_7_cast_fp16 = reshape(shape = var_796, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_799 = const()[name = string("op_799"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_799, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_803 = const()[name = string("op_803"), val = tensor([1, -1, 1024, 64])]; tensor value_states_7_cast_fp16 = reshape(shape = var_803, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; bool var_806_transpose_x_1 = const()[name = string("op_806_transpose_x_1"), val = bool(false)]; bool var_806_transpose_y_1 = const()[name = string("op_806_transpose_y_1"), val = bool(true)]; tensor var_806_cast_fp16 = matmul(transpose_x = var_806_transpose_x_1, transpose_y = var_806_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_806_cast_fp16")]; fp16 var_807_to_fp16 = const()[name = string("op_807_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_806_cast_fp16, y = var_807_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_818_axes_0 = const()[name = string("op_818_axes_0"), val = tensor([-1])]; bool var_818_keep_dims_0 = const()[name = string("op_818_keep_dims_0"), val = bool(true)]; tensor var_818_cast_fp16 = reduce_sum(axes = var_818_axes_0, keep_dims = var_818_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_818_cast_fp16")]; tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_818_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; tensor var_821_perm_0 = const()[name = string("op_821_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_823 = const()[name = string("op_823"), val = tensor([1, 1, 2048])]; tensor var_821_cast_fp16 = transpose(perm = var_821_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_58")]; tensor input_19_cast_fp16 = reshape(shape = var_823, x = var_821_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490050112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492147328))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_834_axes_0 = const()[name = string("op_834_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492155584)))]; tensor var_834_cast_fp16 = layer_norm(axes = var_834_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_834_cast_fp16")]; tensor var_841 = const()[name = string("op_841"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_842 = transpose(perm = var_841, x = var_834_cast_fp16)[name = string("transpose_57")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_842)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_864_axes_0 = const()[name = string("op_864_axes_0"), val = tensor([2])]; tensor var_864 = squeeze(axes = var_864_axes_0, x = hidden_states_15)[name = string("op_864")]; tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1])]; tensor var_866 = transpose(perm = var_865, x = var_864)[name = string("transpose_56")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_866)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_874_axes_0 = const()[name = string("op_874_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492159744)))]; tensor var_874_cast_fp16 = layer_norm(axes = var_874_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_874_cast_fp16")]; tensor var_877 = const()[name = string("op_877"), val = tensor([0, 2, 1])]; tensor var_879_axes_0 = const()[name = string("op_879_axes_0"), val = tensor([2])]; tensor var_878 = transpose(perm = var_877, x = var_874_cast_fp16)[name = string("transpose_55")]; tensor var_879 = expand_dims(axes = var_879_axes_0, x = var_878)[name = string("op_879")]; string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")]; tensor var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor([1, 1])]; tensor var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor([1, 1])]; int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)]; tensor var_886 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_879)[name = string("op_886")]; tensor var_887 = const()[name = string("op_887"), val = tensor([1, 32, 1, 64])]; tensor var_888 = reshape(shape = var_887, x = var_886)[name = string("op_888")]; string var_895_pad_type_0 = const()[name = string("op_895_pad_type_0"), val = string("valid")]; tensor var_895_strides_0 = const()[name = string("op_895_strides_0"), val = tensor([1, 1])]; tensor var_895_pad_0 = const()[name = string("op_895_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_895_dilations_0 = const()[name = string("op_895_dilations_0"), val = tensor([1, 1])]; int32 var_895_groups_0 = const()[name = string("op_895_groups_0"), val = int32(1)]; tensor var_895 = conv(dilations = var_895_dilations_0, groups = var_895_groups_0, pad = var_895_pad_0, pad_type = var_895_pad_type_0, strides = var_895_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_879)[name = string("op_895")]; tensor var_896 = const()[name = string("op_896"), val = tensor([1, 8, 1, 64])]; tensor var_897 = reshape(shape = var_896, x = var_895)[name = string("op_897")]; string var_904_pad_type_0 = const()[name = string("op_904_pad_type_0"), val = string("valid")]; tensor var_904_strides_0 = const()[name = string("op_904_strides_0"), val = tensor([1, 1])]; tensor var_904_pad_0 = const()[name = string("op_904_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_904_dilations_0 = const()[name = string("op_904_dilations_0"), val = tensor([1, 1])]; int32 var_904_groups_0 = const()[name = string("op_904_groups_0"), val = int32(1)]; tensor var_904 = conv(dilations = var_904_dilations_0, groups = var_904_groups_0, pad = var_904_pad_0, pad_type = var_904_pad_type_0, strides = var_904_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_879)[name = string("op_904")]; tensor var_905 = const()[name = string("op_905"), val = tensor([1, 8, 1, 64])]; tensor var_906 = reshape(shape = var_905, x = var_904)[name = string("op_906")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_888)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_888)[name = string("x2_9")]; tensor var_920_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_920_cast_fp16")]; tensor var_921_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_921_cast_fp16")]; tensor var_922_cast_fp16 = sub(x = var_920_cast_fp16, y = var_921_cast_fp16)[name = string("op_922_cast_fp16")]; tensor var_923_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_923_cast_fp16")]; tensor var_924_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_924_cast_fp16")]; tensor var_925_cast_fp16 = add(x = var_923_cast_fp16, y = var_924_cast_fp16)[name = string("op_925_cast_fp16")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9_cast_fp16 = concat(axis = var_80, interleave = rotated_9_interleave_0, values = (var_922_cast_fp16, var_925_cast_fp16))[name = string("rotated_9_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_897)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_897)[name = string("x2_11")]; tensor var_941_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_941_cast_fp16")]; tensor var_942_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_942_cast_fp16")]; tensor var_943_cast_fp16 = sub(x = var_941_cast_fp16, y = var_942_cast_fp16)[name = string("op_943_cast_fp16")]; tensor var_944_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_944_cast_fp16")]; tensor var_945_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_945_cast_fp16")]; tensor var_946_cast_fp16 = add(x = var_944_cast_fp16, y = var_945_cast_fp16)[name = string("op_946_cast_fp16")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11_cast_fp16 = concat(axis = var_80, interleave = rotated_11_interleave_0, values = (var_943_cast_fp16, var_946_cast_fp16))[name = string("rotated_11_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_581, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_35)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_581, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_906, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_966_begin_0 = const()[name = string("op_966_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_966_end_0 = const()[name = string("op_966_end_0"), val = tensor([3, 8, 1024, 64])]; tensor var_966_end_mask_0 = const()[name = string("op_966_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_966_cast_fp16 = slice_by_index(begin = var_966_begin_0, end = var_966_end_0, end_mask = var_966_end_mask_0, x = coreml_update_state_37)[name = string("op_966_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_966_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_968_begin_0 = const()[name = string("op_968_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_968_end_0 = const()[name = string("op_968_end_0"), val = tensor([19, 8, 1024, 64])]; tensor var_968_end_mask_0 = const()[name = string("op_968_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = coreml_update_state_37)[name = string("op_968_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_968_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_977 = const()[name = string("op_977"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_977, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_981 = const()[name = string("op_981"), val = tensor([1, -1, 1024, 64])]; tensor key_states_11_cast_fp16 = reshape(shape = var_981, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_984 = const()[name = string("op_984"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_984, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_988 = const()[name = string("op_988"), val = tensor([1, -1, 1024, 64])]; tensor value_states_11_cast_fp16 = reshape(shape = var_988, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; bool var_991_transpose_x_1 = const()[name = string("op_991_transpose_x_1"), val = bool(false)]; bool var_991_transpose_y_1 = const()[name = string("op_991_transpose_y_1"), val = bool(true)]; tensor var_991_cast_fp16 = matmul(transpose_x = var_991_transpose_x_1, transpose_y = var_991_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_991_cast_fp16")]; fp16 var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_991_cast_fp16, y = var_992_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_1003_axes_0 = const()[name = string("op_1003_axes_0"), val = tensor([-1])]; bool var_1003_keep_dims_0 = const()[name = string("op_1003_keep_dims_0"), val = bool(true)]; tensor var_1003_cast_fp16 = reduce_sum(axes = var_1003_axes_0, keep_dims = var_1003_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_1003_cast_fp16")]; tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_1003_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_1006_perm_0 = const()[name = string("op_1006_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1008 = const()[name = string("op_1008"), val = tensor([1, 1, 2048])]; tensor var_1006_cast_fp16 = transpose(perm = var_1006_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_54")]; tensor input_33_cast_fp16 = reshape(shape = var_1008, x = var_1006_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492163904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494261120))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_1019_axes_0 = const()[name = string("op_1019_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494269376)))]; tensor var_1019_cast_fp16 = layer_norm(axes = var_1019_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_1019_cast_fp16")]; tensor var_1026 = const()[name = string("op_1026"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_1027 = transpose(perm = var_1026, x = var_1019_cast_fp16)[name = string("transpose_53")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1027)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_1049_axes_0 = const()[name = string("op_1049_axes_0"), val = tensor([2])]; tensor var_1049 = squeeze(axes = var_1049_axes_0, x = hidden_states_23)[name = string("op_1049")]; tensor var_1050 = const()[name = string("op_1050"), val = tensor([0, 2, 1])]; tensor var_1051 = transpose(perm = var_1050, x = var_1049)[name = string("transpose_52")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_1051)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_1059_axes_0 = const()[name = string("op_1059_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494273536)))]; tensor var_1059_cast_fp16 = layer_norm(axes = var_1059_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_1059_cast_fp16")]; tensor var_1062 = const()[name = string("op_1062"), val = tensor([0, 2, 1])]; tensor var_1064_axes_0 = const()[name = string("op_1064_axes_0"), val = tensor([2])]; tensor var_1063 = transpose(perm = var_1062, x = var_1059_cast_fp16)[name = string("transpose_51")]; tensor var_1064 = expand_dims(axes = var_1064_axes_0, x = var_1063)[name = string("op_1064")]; string var_1071_pad_type_0 = const()[name = string("op_1071_pad_type_0"), val = string("valid")]; tensor var_1071_strides_0 = const()[name = string("op_1071_strides_0"), val = tensor([1, 1])]; tensor var_1071_pad_0 = const()[name = string("op_1071_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1071_dilations_0 = const()[name = string("op_1071_dilations_0"), val = tensor([1, 1])]; int32 var_1071_groups_0 = const()[name = string("op_1071_groups_0"), val = int32(1)]; tensor var_1071 = conv(dilations = var_1071_dilations_0, groups = var_1071_groups_0, pad = var_1071_pad_0, pad_type = var_1071_pad_type_0, strides = var_1071_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_1064)[name = string("op_1071")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([1, 32, 1, 64])]; tensor var_1073 = reshape(shape = var_1072, x = var_1071)[name = string("op_1073")]; string var_1080_pad_type_0 = const()[name = string("op_1080_pad_type_0"), val = string("valid")]; tensor var_1080_strides_0 = const()[name = string("op_1080_strides_0"), val = tensor([1, 1])]; tensor var_1080_pad_0 = const()[name = string("op_1080_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1080_dilations_0 = const()[name = string("op_1080_dilations_0"), val = tensor([1, 1])]; int32 var_1080_groups_0 = const()[name = string("op_1080_groups_0"), val = int32(1)]; tensor var_1080 = conv(dilations = var_1080_dilations_0, groups = var_1080_groups_0, pad = var_1080_pad_0, pad_type = var_1080_pad_type_0, strides = var_1080_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_1064)[name = string("op_1080")]; tensor var_1081 = const()[name = string("op_1081"), val = tensor([1, 8, 1, 64])]; tensor var_1082 = reshape(shape = var_1081, x = var_1080)[name = string("op_1082")]; string var_1089_pad_type_0 = const()[name = string("op_1089_pad_type_0"), val = string("valid")]; tensor var_1089_strides_0 = const()[name = string("op_1089_strides_0"), val = tensor([1, 1])]; tensor var_1089_pad_0 = const()[name = string("op_1089_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1089_dilations_0 = const()[name = string("op_1089_dilations_0"), val = tensor([1, 1])]; int32 var_1089_groups_0 = const()[name = string("op_1089_groups_0"), val = int32(1)]; tensor var_1089 = conv(dilations = var_1089_dilations_0, groups = var_1089_groups_0, pad = var_1089_pad_0, pad_type = var_1089_pad_type_0, strides = var_1089_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_1064)[name = string("op_1089")]; tensor var_1090 = const()[name = string("op_1090"), val = tensor([1, 8, 1, 64])]; tensor var_1091 = reshape(shape = var_1090, x = var_1089)[name = string("op_1091")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_1073)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_1073)[name = string("x2_13")]; tensor var_1105_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_1105_cast_fp16")]; tensor var_1106_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_1106_cast_fp16")]; tensor var_1107_cast_fp16 = sub(x = var_1105_cast_fp16, y = var_1106_cast_fp16)[name = string("op_1107_cast_fp16")]; tensor var_1108_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_1108_cast_fp16")]; tensor var_1109_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_1109_cast_fp16")]; tensor var_1110_cast_fp16 = add(x = var_1108_cast_fp16, y = var_1109_cast_fp16)[name = string("op_1110_cast_fp16")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13_cast_fp16 = concat(axis = var_80, interleave = rotated_13_interleave_0, values = (var_1107_cast_fp16, var_1110_cast_fp16))[name = string("rotated_13_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_1082)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_1082)[name = string("x2_15")]; tensor var_1126_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_1126_cast_fp16")]; tensor var_1127_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_1127_cast_fp16")]; tensor var_1128_cast_fp16 = sub(x = var_1126_cast_fp16, y = var_1127_cast_fp16)[name = string("op_1128_cast_fp16")]; tensor var_1129_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_1129_cast_fp16")]; tensor var_1130_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_1130_cast_fp16")]; tensor var_1131_cast_fp16 = add(x = var_1129_cast_fp16, y = var_1130_cast_fp16)[name = string("op_1131_cast_fp16")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15_cast_fp16 = concat(axis = var_80, interleave = rotated_15_interleave_0, values = (var_1128_cast_fp16, var_1131_cast_fp16))[name = string("rotated_15_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_581, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_581, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_1091, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_1151_begin_0 = const()[name = string("op_1151_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1151_end_0 = const()[name = string("op_1151_end_0"), val = tensor([4, 8, 1024, 64])]; tensor var_1151_end_mask_0 = const()[name = string("op_1151_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1151_cast_fp16 = slice_by_index(begin = var_1151_begin_0, end = var_1151_end_0, end_mask = var_1151_end_mask_0, x = coreml_update_state_39)[name = string("op_1151_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_1151_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_1153_begin_0 = const()[name = string("op_1153_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1153_end_0 = const()[name = string("op_1153_end_0"), val = tensor([20, 8, 1024, 64])]; tensor var_1153_end_mask_0 = const()[name = string("op_1153_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1153_cast_fp16 = slice_by_index(begin = var_1153_begin_0, end = var_1153_end_0, end_mask = var_1153_end_mask_0, x = coreml_update_state_39)[name = string("op_1153_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_1153_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_1162 = const()[name = string("op_1162"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_1162, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1166 = const()[name = string("op_1166"), val = tensor([1, -1, 1024, 64])]; tensor key_states_15_cast_fp16 = reshape(shape = var_1166, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_1169 = const()[name = string("op_1169"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_1169, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_1173 = const()[name = string("op_1173"), val = tensor([1, -1, 1024, 64])]; tensor value_states_15_cast_fp16 = reshape(shape = var_1173, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_1176_transpose_x_1 = const()[name = string("op_1176_transpose_x_1"), val = bool(false)]; bool var_1176_transpose_y_1 = const()[name = string("op_1176_transpose_y_1"), val = bool(true)]; tensor var_1176_cast_fp16 = matmul(transpose_x = var_1176_transpose_x_1, transpose_y = var_1176_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_1176_cast_fp16")]; fp16 var_1177_to_fp16 = const()[name = string("op_1177_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_1176_cast_fp16, y = var_1177_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_1188_axes_0 = const()[name = string("op_1188_axes_0"), val = tensor([-1])]; bool var_1188_keep_dims_0 = const()[name = string("op_1188_keep_dims_0"), val = bool(true)]; tensor var_1188_cast_fp16 = reduce_sum(axes = var_1188_axes_0, keep_dims = var_1188_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1188_cast_fp16")]; tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1188_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; tensor var_1191_perm_0 = const()[name = string("op_1191_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1193 = const()[name = string("op_1193"), val = tensor([1, 1, 2048])]; tensor var_1191_cast_fp16 = transpose(perm = var_1191_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_50")]; tensor input_47_cast_fp16 = reshape(shape = var_1193, x = var_1191_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494277696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496374912))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_1204_axes_0 = const()[name = string("op_1204_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496383168)))]; tensor var_1204_cast_fp16 = layer_norm(axes = var_1204_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1204_cast_fp16")]; tensor var_1211 = const()[name = string("op_1211"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_1212 = transpose(perm = var_1211, x = var_1204_cast_fp16)[name = string("transpose_49")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1212)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1234_axes_0 = const()[name = string("op_1234_axes_0"), val = tensor([2])]; tensor var_1234 = squeeze(axes = var_1234_axes_0, x = hidden_states_31)[name = string("op_1234")]; tensor var_1235 = const()[name = string("op_1235"), val = tensor([0, 2, 1])]; tensor var_1236 = transpose(perm = var_1235, x = var_1234)[name = string("transpose_48")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1236)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1244_axes_0 = const()[name = string("op_1244_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496387328)))]; tensor var_1244_cast_fp16 = layer_norm(axes = var_1244_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1244_cast_fp16")]; tensor var_1247 = const()[name = string("op_1247"), val = tensor([0, 2, 1])]; tensor var_1249_axes_0 = const()[name = string("op_1249_axes_0"), val = tensor([2])]; tensor var_1248 = transpose(perm = var_1247, x = var_1244_cast_fp16)[name = string("transpose_47")]; tensor var_1249 = expand_dims(axes = var_1249_axes_0, x = var_1248)[name = string("op_1249")]; string var_1256_pad_type_0 = const()[name = string("op_1256_pad_type_0"), val = string("valid")]; tensor var_1256_strides_0 = const()[name = string("op_1256_strides_0"), val = tensor([1, 1])]; tensor var_1256_pad_0 = const()[name = string("op_1256_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1256_dilations_0 = const()[name = string("op_1256_dilations_0"), val = tensor([1, 1])]; int32 var_1256_groups_0 = const()[name = string("op_1256_groups_0"), val = int32(1)]; tensor var_1256 = conv(dilations = var_1256_dilations_0, groups = var_1256_groups_0, pad = var_1256_pad_0, pad_type = var_1256_pad_type_0, strides = var_1256_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1249)[name = string("op_1256")]; tensor var_1257 = const()[name = string("op_1257"), val = tensor([1, 32, 1, 64])]; tensor var_1258 = reshape(shape = var_1257, x = var_1256)[name = string("op_1258")]; string var_1265_pad_type_0 = const()[name = string("op_1265_pad_type_0"), val = string("valid")]; tensor var_1265_strides_0 = const()[name = string("op_1265_strides_0"), val = tensor([1, 1])]; tensor var_1265_pad_0 = const()[name = string("op_1265_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1265_dilations_0 = const()[name = string("op_1265_dilations_0"), val = tensor([1, 1])]; int32 var_1265_groups_0 = const()[name = string("op_1265_groups_0"), val = int32(1)]; tensor var_1265 = conv(dilations = var_1265_dilations_0, groups = var_1265_groups_0, pad = var_1265_pad_0, pad_type = var_1265_pad_type_0, strides = var_1265_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1249)[name = string("op_1265")]; tensor var_1266 = const()[name = string("op_1266"), val = tensor([1, 8, 1, 64])]; tensor var_1267 = reshape(shape = var_1266, x = var_1265)[name = string("op_1267")]; string var_1274_pad_type_0 = const()[name = string("op_1274_pad_type_0"), val = string("valid")]; tensor var_1274_strides_0 = const()[name = string("op_1274_strides_0"), val = tensor([1, 1])]; tensor var_1274_pad_0 = const()[name = string("op_1274_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1274_dilations_0 = const()[name = string("op_1274_dilations_0"), val = tensor([1, 1])]; int32 var_1274_groups_0 = const()[name = string("op_1274_groups_0"), val = int32(1)]; tensor var_1274 = conv(dilations = var_1274_dilations_0, groups = var_1274_groups_0, pad = var_1274_pad_0, pad_type = var_1274_pad_type_0, strides = var_1274_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1249)[name = string("op_1274")]; tensor var_1275 = const()[name = string("op_1275"), val = tensor([1, 8, 1, 64])]; tensor var_1276 = reshape(shape = var_1275, x = var_1274)[name = string("op_1276")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1258)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1258)[name = string("x2_17")]; tensor var_1290_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1290_cast_fp16")]; tensor var_1291_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1292_cast_fp16 = sub(x = var_1290_cast_fp16, y = var_1291_cast_fp16)[name = string("op_1292_cast_fp16")]; tensor var_1293_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1293_cast_fp16")]; tensor var_1294_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1294_cast_fp16")]; tensor var_1295_cast_fp16 = add(x = var_1293_cast_fp16, y = var_1294_cast_fp16)[name = string("op_1295_cast_fp16")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17_cast_fp16 = concat(axis = var_80, interleave = rotated_17_interleave_0, values = (var_1292_cast_fp16, var_1295_cast_fp16))[name = string("rotated_17_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1267)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1267)[name = string("x2_19")]; tensor var_1311_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1311_cast_fp16")]; tensor var_1312_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1312_cast_fp16")]; tensor var_1313_cast_fp16 = sub(x = var_1311_cast_fp16, y = var_1312_cast_fp16)[name = string("op_1313_cast_fp16")]; tensor var_1314_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1314_cast_fp16")]; tensor var_1315_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1315_cast_fp16")]; tensor var_1316_cast_fp16 = add(x = var_1314_cast_fp16, y = var_1315_cast_fp16)[name = string("op_1316_cast_fp16")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19_cast_fp16 = concat(axis = var_80, interleave = rotated_19_interleave_0, values = (var_1313_cast_fp16, var_1316_cast_fp16))[name = string("rotated_19_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_581, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_581, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1276, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_1336_begin_0 = const()[name = string("op_1336_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1336_end_0 = const()[name = string("op_1336_end_0"), val = tensor([5, 8, 1024, 64])]; tensor var_1336_end_mask_0 = const()[name = string("op_1336_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = coreml_update_state_41)[name = string("op_1336_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1336_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1338_begin_0 = const()[name = string("op_1338_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_1338_end_0 = const()[name = string("op_1338_end_0"), val = tensor([21, 8, 1024, 64])]; tensor var_1338_end_mask_0 = const()[name = string("op_1338_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1338_cast_fp16 = slice_by_index(begin = var_1338_begin_0, end = var_1338_end_0, end_mask = var_1338_end_mask_0, x = coreml_update_state_41)[name = string("op_1338_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1338_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1347 = const()[name = string("op_1347"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1347, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1351 = const()[name = string("op_1351"), val = tensor([1, -1, 1024, 64])]; tensor key_states_19_cast_fp16 = reshape(shape = var_1351, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1354 = const()[name = string("op_1354"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1354, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; tensor var_1358 = const()[name = string("op_1358"), val = tensor([1, -1, 1024, 64])]; tensor value_states_19_cast_fp16 = reshape(shape = var_1358, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; bool var_1361_transpose_x_1 = const()[name = string("op_1361_transpose_x_1"), val = bool(false)]; bool var_1361_transpose_y_1 = const()[name = string("op_1361_transpose_y_1"), val = bool(true)]; tensor var_1361_cast_fp16 = matmul(transpose_x = var_1361_transpose_x_1, transpose_y = var_1361_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1361_cast_fp16")]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_17_cast_fp16 = mul(x = var_1361_cast_fp16, y = var_1362_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1373_axes_0 = const()[name = string("op_1373_axes_0"), val = tensor([-1])]; bool var_1373_keep_dims_0 = const()[name = string("op_1373_keep_dims_0"), val = bool(true)]; tensor var_1373_cast_fp16 = reduce_sum(axes = var_1373_axes_0, keep_dims = var_1373_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1373_cast_fp16")]; tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1373_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1376_perm_0 = const()[name = string("op_1376_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1378 = const()[name = string("op_1378"), val = tensor([1, 1, 2048])]; tensor var_1376_cast_fp16 = transpose(perm = var_1376_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_46")]; tensor input_61_cast_fp16 = reshape(shape = var_1378, x = var_1376_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498488704))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1389_axes_0 = const()[name = string("op_1389_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498496960)))]; tensor var_1389_cast_fp16 = layer_norm(axes = var_1389_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1389_cast_fp16")]; tensor var_1396 = const()[name = string("op_1396"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1397 = transpose(perm = var_1396, x = var_1389_cast_fp16)[name = string("transpose_45")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1397)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1419_axes_0 = const()[name = string("op_1419_axes_0"), val = tensor([2])]; tensor var_1419 = squeeze(axes = var_1419_axes_0, x = hidden_states_39)[name = string("op_1419")]; tensor var_1420 = const()[name = string("op_1420"), val = tensor([0, 2, 1])]; tensor var_1421 = transpose(perm = var_1420, x = var_1419)[name = string("transpose_44")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1421)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1429_axes_0 = const()[name = string("op_1429_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498501120)))]; tensor var_1429_cast_fp16 = layer_norm(axes = var_1429_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1429_cast_fp16")]; tensor var_1432 = const()[name = string("op_1432"), val = tensor([0, 2, 1])]; tensor var_1434_axes_0 = const()[name = string("op_1434_axes_0"), val = tensor([2])]; tensor var_1433 = transpose(perm = var_1432, x = var_1429_cast_fp16)[name = string("transpose_43")]; tensor var_1434 = expand_dims(axes = var_1434_axes_0, x = var_1433)[name = string("op_1434")]; string var_1441_pad_type_0 = const()[name = string("op_1441_pad_type_0"), val = string("valid")]; tensor var_1441_strides_0 = const()[name = string("op_1441_strides_0"), val = tensor([1, 1])]; tensor var_1441_pad_0 = const()[name = string("op_1441_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1441_dilations_0 = const()[name = string("op_1441_dilations_0"), val = tensor([1, 1])]; int32 var_1441_groups_0 = const()[name = string("op_1441_groups_0"), val = int32(1)]; tensor var_1441 = conv(dilations = var_1441_dilations_0, groups = var_1441_groups_0, pad = var_1441_pad_0, pad_type = var_1441_pad_type_0, strides = var_1441_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1434)[name = string("op_1441")]; tensor var_1442 = const()[name = string("op_1442"), val = tensor([1, 32, 1, 64])]; tensor var_1443 = reshape(shape = var_1442, x = var_1441)[name = string("op_1443")]; string var_1450_pad_type_0 = const()[name = string("op_1450_pad_type_0"), val = string("valid")]; tensor var_1450_strides_0 = const()[name = string("op_1450_strides_0"), val = tensor([1, 1])]; tensor var_1450_pad_0 = const()[name = string("op_1450_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1450_dilations_0 = const()[name = string("op_1450_dilations_0"), val = tensor([1, 1])]; int32 var_1450_groups_0 = const()[name = string("op_1450_groups_0"), val = int32(1)]; tensor var_1450 = conv(dilations = var_1450_dilations_0, groups = var_1450_groups_0, pad = var_1450_pad_0, pad_type = var_1450_pad_type_0, strides = var_1450_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1434)[name = string("op_1450")]; tensor var_1451 = const()[name = string("op_1451"), val = tensor([1, 8, 1, 64])]; tensor var_1452 = reshape(shape = var_1451, x = var_1450)[name = string("op_1452")]; string var_1459_pad_type_0 = const()[name = string("op_1459_pad_type_0"), val = string("valid")]; tensor var_1459_strides_0 = const()[name = string("op_1459_strides_0"), val = tensor([1, 1])]; tensor var_1459_pad_0 = const()[name = string("op_1459_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1459_dilations_0 = const()[name = string("op_1459_dilations_0"), val = tensor([1, 1])]; int32 var_1459_groups_0 = const()[name = string("op_1459_groups_0"), val = int32(1)]; tensor var_1459 = conv(dilations = var_1459_dilations_0, groups = var_1459_groups_0, pad = var_1459_pad_0, pad_type = var_1459_pad_type_0, strides = var_1459_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1434)[name = string("op_1459")]; tensor var_1460 = const()[name = string("op_1460"), val = tensor([1, 8, 1, 64])]; tensor var_1461 = reshape(shape = var_1460, x = var_1459)[name = string("op_1461")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1443)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1443)[name = string("x2_21")]; tensor var_1475_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1475_cast_fp16")]; tensor var_1476_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1476_cast_fp16")]; tensor var_1477_cast_fp16 = sub(x = var_1475_cast_fp16, y = var_1476_cast_fp16)[name = string("op_1477_cast_fp16")]; tensor var_1478_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor var_1479_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1479_cast_fp16")]; tensor var_1480_cast_fp16 = add(x = var_1478_cast_fp16, y = var_1479_cast_fp16)[name = string("op_1480_cast_fp16")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21_cast_fp16 = concat(axis = var_80, interleave = rotated_21_interleave_0, values = (var_1477_cast_fp16, var_1480_cast_fp16))[name = string("rotated_21_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1452)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1452)[name = string("x2_23")]; tensor var_1496_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor var_1497_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor var_1498_cast_fp16 = sub(x = var_1496_cast_fp16, y = var_1497_cast_fp16)[name = string("op_1498_cast_fp16")]; tensor var_1499_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1500_cast_fp16")]; tensor var_1501_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1500_cast_fp16)[name = string("op_1501_cast_fp16")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23_cast_fp16 = concat(axis = var_80, interleave = rotated_23_interleave_0, values = (var_1498_cast_fp16, var_1501_cast_fp16))[name = string("rotated_23_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_581, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_581, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1461, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_1521_begin_0 = const()[name = string("op_1521_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1521_end_0 = const()[name = string("op_1521_end_0"), val = tensor([6, 8, 1024, 64])]; tensor var_1521_end_mask_0 = const()[name = string("op_1521_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1521_cast_fp16 = slice_by_index(begin = var_1521_begin_0, end = var_1521_end_0, end_mask = var_1521_end_mask_0, x = coreml_update_state_43)[name = string("op_1521_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1521_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1523_begin_0 = const()[name = string("op_1523_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_1523_end_0 = const()[name = string("op_1523_end_0"), val = tensor([22, 8, 1024, 64])]; tensor var_1523_end_mask_0 = const()[name = string("op_1523_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1523_cast_fp16 = slice_by_index(begin = var_1523_begin_0, end = var_1523_end_0, end_mask = var_1523_end_mask_0, x = coreml_update_state_43)[name = string("op_1523_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1523_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1532 = const()[name = string("op_1532"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1532, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1536 = const()[name = string("op_1536"), val = tensor([1, -1, 1024, 64])]; tensor key_states_23_cast_fp16 = reshape(shape = var_1536, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1539 = const()[name = string("op_1539"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1539, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_1543 = const()[name = string("op_1543"), val = tensor([1, -1, 1024, 64])]; tensor value_states_23_cast_fp16 = reshape(shape = var_1543, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; bool var_1546_transpose_x_1 = const()[name = string("op_1546_transpose_x_1"), val = bool(false)]; bool var_1546_transpose_y_1 = const()[name = string("op_1546_transpose_y_1"), val = bool(true)]; tensor var_1546_cast_fp16 = matmul(transpose_x = var_1546_transpose_x_1, transpose_y = var_1546_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1546_cast_fp16")]; fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_21_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1558_axes_0 = const()[name = string("op_1558_axes_0"), val = tensor([-1])]; bool var_1558_keep_dims_0 = const()[name = string("op_1558_keep_dims_0"), val = bool(true)]; tensor var_1558_cast_fp16 = reduce_sum(axes = var_1558_axes_0, keep_dims = var_1558_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1558_cast_fp16")]; tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1558_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_1561_perm_0 = const()[name = string("op_1561_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1563 = const()[name = string("op_1563"), val = tensor([1, 1, 2048])]; tensor var_1561_cast_fp16 = transpose(perm = var_1561_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_42")]; tensor input_75_cast_fp16 = reshape(shape = var_1563, x = var_1561_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498505280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500602496))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1574_axes_0 = const()[name = string("op_1574_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500610752)))]; tensor var_1574_cast_fp16 = layer_norm(axes = var_1574_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1574_cast_fp16")]; tensor var_1581 = const()[name = string("op_1581"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1582 = transpose(perm = var_1581, x = var_1574_cast_fp16)[name = string("transpose_41")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1582)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1604_axes_0 = const()[name = string("op_1604_axes_0"), val = tensor([2])]; tensor var_1604 = squeeze(axes = var_1604_axes_0, x = hidden_states_47)[name = string("op_1604")]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([0, 2, 1])]; tensor var_1606 = transpose(perm = var_1605, x = var_1604)[name = string("transpose_40")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1606)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1614_axes_0 = const()[name = string("op_1614_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500614912)))]; tensor var_1614_cast_fp16 = layer_norm(axes = var_1614_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1614_cast_fp16")]; tensor var_1617 = const()[name = string("op_1617"), val = tensor([0, 2, 1])]; tensor var_1619_axes_0 = const()[name = string("op_1619_axes_0"), val = tensor([2])]; tensor var_1618 = transpose(perm = var_1617, x = var_1614_cast_fp16)[name = string("transpose_39")]; tensor var_1619 = expand_dims(axes = var_1619_axes_0, x = var_1618)[name = string("op_1619")]; string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")]; tensor var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor([1, 1])]; tensor var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor([1, 1])]; int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)]; tensor var_1626 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1619)[name = string("op_1626")]; tensor var_1627 = const()[name = string("op_1627"), val = tensor([1, 32, 1, 64])]; tensor var_1628 = reshape(shape = var_1627, x = var_1626)[name = string("op_1628")]; string var_1635_pad_type_0 = const()[name = string("op_1635_pad_type_0"), val = string("valid")]; tensor var_1635_strides_0 = const()[name = string("op_1635_strides_0"), val = tensor([1, 1])]; tensor var_1635_pad_0 = const()[name = string("op_1635_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1635_dilations_0 = const()[name = string("op_1635_dilations_0"), val = tensor([1, 1])]; int32 var_1635_groups_0 = const()[name = string("op_1635_groups_0"), val = int32(1)]; tensor var_1635 = conv(dilations = var_1635_dilations_0, groups = var_1635_groups_0, pad = var_1635_pad_0, pad_type = var_1635_pad_type_0, strides = var_1635_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1619)[name = string("op_1635")]; tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 8, 1, 64])]; tensor var_1637 = reshape(shape = var_1636, x = var_1635)[name = string("op_1637")]; string var_1644_pad_type_0 = const()[name = string("op_1644_pad_type_0"), val = string("valid")]; tensor var_1644_strides_0 = const()[name = string("op_1644_strides_0"), val = tensor([1, 1])]; tensor var_1644_pad_0 = const()[name = string("op_1644_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1644_dilations_0 = const()[name = string("op_1644_dilations_0"), val = tensor([1, 1])]; int32 var_1644_groups_0 = const()[name = string("op_1644_groups_0"), val = int32(1)]; tensor var_1644 = conv(dilations = var_1644_dilations_0, groups = var_1644_groups_0, pad = var_1644_pad_0, pad_type = var_1644_pad_type_0, strides = var_1644_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1619)[name = string("op_1644")]; tensor var_1645 = const()[name = string("op_1645"), val = tensor([1, 8, 1, 64])]; tensor var_1646 = reshape(shape = var_1645, x = var_1644)[name = string("op_1646")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1628)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1628)[name = string("x2_25")]; tensor var_1660_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1660_cast_fp16")]; tensor var_1661_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1661_cast_fp16")]; tensor var_1662_cast_fp16 = sub(x = var_1660_cast_fp16, y = var_1661_cast_fp16)[name = string("op_1662_cast_fp16")]; tensor var_1663_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1663_cast_fp16")]; tensor var_1664_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1664_cast_fp16")]; tensor var_1665_cast_fp16 = add(x = var_1663_cast_fp16, y = var_1664_cast_fp16)[name = string("op_1665_cast_fp16")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25_cast_fp16 = concat(axis = var_80, interleave = rotated_25_interleave_0, values = (var_1662_cast_fp16, var_1665_cast_fp16))[name = string("rotated_25_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1637)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1637)[name = string("x2_27")]; tensor var_1681_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1681_cast_fp16")]; tensor var_1682_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1682_cast_fp16")]; tensor var_1683_cast_fp16 = sub(x = var_1681_cast_fp16, y = var_1682_cast_fp16)[name = string("op_1683_cast_fp16")]; tensor var_1684_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1684_cast_fp16")]; tensor var_1685_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1685_cast_fp16")]; tensor var_1686_cast_fp16 = add(x = var_1684_cast_fp16, y = var_1685_cast_fp16)[name = string("op_1686_cast_fp16")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27_cast_fp16 = concat(axis = var_80, interleave = rotated_27_interleave_0, values = (var_1683_cast_fp16, var_1686_cast_fp16))[name = string("rotated_27_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_581, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_581, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1646, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_1706_begin_0 = const()[name = string("op_1706_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_1706_end_0 = const()[name = string("op_1706_end_0"), val = tensor([7, 8, 1024, 64])]; tensor var_1706_end_mask_0 = const()[name = string("op_1706_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1706_cast_fp16 = slice_by_index(begin = var_1706_begin_0, end = var_1706_end_0, end_mask = var_1706_end_mask_0, x = coreml_update_state_45)[name = string("op_1706_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1706_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1708_begin_0 = const()[name = string("op_1708_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_1708_end_0 = const()[name = string("op_1708_end_0"), val = tensor([23, 8, 1024, 64])]; tensor var_1708_end_mask_0 = const()[name = string("op_1708_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1708_cast_fp16 = slice_by_index(begin = var_1708_begin_0, end = var_1708_end_0, end_mask = var_1708_end_mask_0, x = coreml_update_state_45)[name = string("op_1708_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1708_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1717 = const()[name = string("op_1717"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1717, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1721 = const()[name = string("op_1721"), val = tensor([1, -1, 1024, 64])]; tensor key_states_27_cast_fp16 = reshape(shape = var_1721, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1724 = const()[name = string("op_1724"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1724, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_1728 = const()[name = string("op_1728"), val = tensor([1, -1, 1024, 64])]; tensor value_states_27_cast_fp16 = reshape(shape = var_1728, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_1731_transpose_x_1 = const()[name = string("op_1731_transpose_x_1"), val = bool(false)]; bool var_1731_transpose_y_1 = const()[name = string("op_1731_transpose_y_1"), val = bool(true)]; tensor var_1731_cast_fp16 = matmul(transpose_x = var_1731_transpose_x_1, transpose_y = var_1731_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1731_cast_fp16")]; fp16 var_1732_to_fp16 = const()[name = string("op_1732_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_25_cast_fp16 = mul(x = var_1731_cast_fp16, y = var_1732_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1743_axes_0 = const()[name = string("op_1743_axes_0"), val = tensor([-1])]; bool var_1743_keep_dims_0 = const()[name = string("op_1743_keep_dims_0"), val = bool(true)]; tensor var_1743_cast_fp16 = reduce_sum(axes = var_1743_axes_0, keep_dims = var_1743_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1743_cast_fp16")]; tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1743_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_1746_perm_0 = const()[name = string("op_1746_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 1, 2048])]; tensor var_1746_cast_fp16 = transpose(perm = var_1746_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_38")]; tensor input_89_cast_fp16 = reshape(shape = var_1748, x = var_1746_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500619072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502716288))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1759_axes_0 = const()[name = string("op_1759_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502724544)))]; tensor var_1759_cast_fp16 = layer_norm(axes = var_1759_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1759_cast_fp16")]; tensor var_1766 = const()[name = string("op_1766"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1767 = transpose(perm = var_1766, x = var_1759_cast_fp16)[name = string("transpose_37")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1767)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1789_axes_0 = const()[name = string("op_1789_axes_0"), val = tensor([2])]; tensor var_1789 = squeeze(axes = var_1789_axes_0, x = hidden_states_55)[name = string("op_1789")]; tensor var_1790 = const()[name = string("op_1790"), val = tensor([0, 2, 1])]; tensor var_1791 = transpose(perm = var_1790, x = var_1789)[name = string("transpose_36")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1791)[name = string("hidden_states_57_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1799_axes_0 = const()[name = string("op_1799_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502728704)))]; tensor var_1799_cast_fp16 = layer_norm(axes = var_1799_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1799_cast_fp16")]; tensor var_1802 = const()[name = string("op_1802"), val = tensor([0, 2, 1])]; tensor var_1804_axes_0 = const()[name = string("op_1804_axes_0"), val = tensor([2])]; tensor var_1803 = transpose(perm = var_1802, x = var_1799_cast_fp16)[name = string("transpose_35")]; tensor var_1804 = expand_dims(axes = var_1804_axes_0, x = var_1803)[name = string("op_1804")]; string var_1811_pad_type_0 = const()[name = string("op_1811_pad_type_0"), val = string("valid")]; tensor var_1811_strides_0 = const()[name = string("op_1811_strides_0"), val = tensor([1, 1])]; tensor var_1811_pad_0 = const()[name = string("op_1811_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1811_dilations_0 = const()[name = string("op_1811_dilations_0"), val = tensor([1, 1])]; int32 var_1811_groups_0 = const()[name = string("op_1811_groups_0"), val = int32(1)]; tensor var_1811 = conv(dilations = var_1811_dilations_0, groups = var_1811_groups_0, pad = var_1811_pad_0, pad_type = var_1811_pad_type_0, strides = var_1811_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1804)[name = string("op_1811")]; tensor var_1812 = const()[name = string("op_1812"), val = tensor([1, 32, 1, 64])]; tensor var_1813 = reshape(shape = var_1812, x = var_1811)[name = string("op_1813")]; string var_1820_pad_type_0 = const()[name = string("op_1820_pad_type_0"), val = string("valid")]; tensor var_1820_strides_0 = const()[name = string("op_1820_strides_0"), val = tensor([1, 1])]; tensor var_1820_pad_0 = const()[name = string("op_1820_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1820_dilations_0 = const()[name = string("op_1820_dilations_0"), val = tensor([1, 1])]; int32 var_1820_groups_0 = const()[name = string("op_1820_groups_0"), val = int32(1)]; tensor var_1820 = conv(dilations = var_1820_dilations_0, groups = var_1820_groups_0, pad = var_1820_pad_0, pad_type = var_1820_pad_type_0, strides = var_1820_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1804)[name = string("op_1820")]; tensor var_1821 = const()[name = string("op_1821"), val = tensor([1, 8, 1, 64])]; tensor var_1822 = reshape(shape = var_1821, x = var_1820)[name = string("op_1822")]; string var_1829_pad_type_0 = const()[name = string("op_1829_pad_type_0"), val = string("valid")]; tensor var_1829_strides_0 = const()[name = string("op_1829_strides_0"), val = tensor([1, 1])]; tensor var_1829_pad_0 = const()[name = string("op_1829_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1829_dilations_0 = const()[name = string("op_1829_dilations_0"), val = tensor([1, 1])]; int32 var_1829_groups_0 = const()[name = string("op_1829_groups_0"), val = int32(1)]; tensor var_1829 = conv(dilations = var_1829_dilations_0, groups = var_1829_groups_0, pad = var_1829_pad_0, pad_type = var_1829_pad_type_0, strides = var_1829_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1804)[name = string("op_1829")]; tensor var_1830 = const()[name = string("op_1830"), val = tensor([1, 8, 1, 64])]; tensor var_1831 = reshape(shape = var_1830, x = var_1829)[name = string("op_1831")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1813)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1813)[name = string("x2_29")]; tensor var_1845_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1845_cast_fp16")]; tensor var_1846_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1846_cast_fp16")]; tensor var_1847_cast_fp16 = sub(x = var_1845_cast_fp16, y = var_1846_cast_fp16)[name = string("op_1847_cast_fp16")]; tensor var_1848_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1848_cast_fp16")]; tensor var_1849_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1849_cast_fp16")]; tensor var_1850_cast_fp16 = add(x = var_1848_cast_fp16, y = var_1849_cast_fp16)[name = string("op_1850_cast_fp16")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29_cast_fp16 = concat(axis = var_80, interleave = rotated_29_interleave_0, values = (var_1847_cast_fp16, var_1850_cast_fp16))[name = string("rotated_29_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1822)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1822)[name = string("x2_31")]; tensor var_1866_cast_fp16 = mul(x = x1_31, y = cos_3_cast_fp16)[name = string("op_1866_cast_fp16")]; tensor var_1867_cast_fp16 = mul(x = x2_31, y = sin_3_cast_fp16)[name = string("op_1867_cast_fp16")]; tensor var_1868_cast_fp16 = sub(x = var_1866_cast_fp16, y = var_1867_cast_fp16)[name = string("op_1868_cast_fp16")]; tensor var_1869_cast_fp16 = mul(x = x2_31, y = cos_3_cast_fp16)[name = string("op_1869_cast_fp16")]; tensor var_1870_cast_fp16 = mul(x = x1_31, y = sin_3_cast_fp16)[name = string("op_1870_cast_fp16")]; tensor var_1871_cast_fp16 = add(x = var_1869_cast_fp16, y = var_1870_cast_fp16)[name = string("op_1871_cast_fp16")]; bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; tensor rotated_31_cast_fp16 = concat(axis = var_80, interleave = rotated_31_interleave_0, values = (var_1868_cast_fp16, var_1871_cast_fp16))[name = string("rotated_31_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_581, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31_cast_fp16, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_581, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1831, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_1891_begin_0 = const()[name = string("op_1891_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_1891_end_0 = const()[name = string("op_1891_end_0"), val = tensor([8, 8, 1024, 64])]; tensor var_1891_end_mask_0 = const()[name = string("op_1891_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1891_cast_fp16 = slice_by_index(begin = var_1891_begin_0, end = var_1891_end_0, end_mask = var_1891_end_mask_0, x = coreml_update_state_47)[name = string("op_1891_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1891_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_1893_begin_0 = const()[name = string("op_1893_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_1893_end_0 = const()[name = string("op_1893_end_0"), val = tensor([24, 8, 1024, 64])]; tensor var_1893_end_mask_0 = const()[name = string("op_1893_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1893_cast_fp16 = slice_by_index(begin = var_1893_begin_0, end = var_1893_end_0, end_mask = var_1893_end_mask_0, x = coreml_update_state_47)[name = string("op_1893_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1893_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1902 = const()[name = string("op_1902"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1902, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1906 = const()[name = string("op_1906"), val = tensor([1, -1, 1024, 64])]; tensor key_states_31_cast_fp16 = reshape(shape = var_1906, x = x_209_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_1909 = const()[name = string("op_1909"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_1909, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([1, -1, 1024, 64])]; tensor value_states_31_cast_fp16 = reshape(shape = var_1913, x = x_215_cast_fp16)[name = string("value_states_31_cast_fp16")]; bool var_1916_transpose_x_1 = const()[name = string("op_1916_transpose_x_1"), val = bool(false)]; bool var_1916_transpose_y_1 = const()[name = string("op_1916_transpose_y_1"), val = bool(true)]; tensor var_1916_cast_fp16 = matmul(transpose_x = var_1916_transpose_x_1, transpose_y = var_1916_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_31_cast_fp16)[name = string("op_1916_cast_fp16")]; fp16 var_1917_to_fp16 = const()[name = string("op_1917_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_29_cast_fp16 = mul(x = var_1916_cast_fp16, y = var_1917_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; tensor var_1928_axes_0 = const()[name = string("op_1928_axes_0"), val = tensor([-1])]; bool var_1928_keep_dims_0 = const()[name = string("op_1928_keep_dims_0"), val = bool(true)]; tensor var_1928_cast_fp16 = reduce_sum(axes = var_1928_axes_0, keep_dims = var_1928_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1928_cast_fp16")]; tensor attn_weights_31_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1928_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_31_cast_fp16, y = value_states_31_cast_fp16)[name = string("attn_output_43_cast_fp16")]; tensor var_1931_perm_0 = const()[name = string("op_1931_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1933 = const()[name = string("op_1933"), val = tensor([1, 1, 2048])]; tensor var_1931_cast_fp16 = transpose(perm = var_1931_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_34")]; tensor input_103_cast_fp16 = reshape(shape = var_1933, x = var_1931_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502732864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504830080))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; tensor var_1944_axes_0 = const()[name = string("op_1944_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504838336)))]; tensor var_1944_cast_fp16 = layer_norm(axes = var_1944_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1944_cast_fp16")]; tensor var_1951 = const()[name = string("op_1951"), val = tensor([0, 2, 1])]; tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; tensor var_1952 = transpose(perm = var_1951, x = var_1944_cast_fp16)[name = string("transpose_33")]; tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1952)[name = string("input_107")]; string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; tensor var_1974_axes_0 = const()[name = string("op_1974_axes_0"), val = tensor([2])]; tensor var_1974 = squeeze(axes = var_1974_axes_0, x = hidden_states_63)[name = string("op_1974")]; tensor var_1975 = const()[name = string("op_1975"), val = tensor([0, 2, 1])]; tensor var_1976 = transpose(perm = var_1975, x = var_1974)[name = string("transpose_32")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1976)[name = string("hidden_states_65_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; tensor var_1984_axes_0 = const()[name = string("op_1984_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504842496)))]; tensor var_1984_cast_fp16 = layer_norm(axes = var_1984_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1984_cast_fp16")]; tensor var_1987 = const()[name = string("op_1987"), val = tensor([0, 2, 1])]; tensor var_1989_axes_0 = const()[name = string("op_1989_axes_0"), val = tensor([2])]; tensor var_1988 = transpose(perm = var_1987, x = var_1984_cast_fp16)[name = string("transpose_31")]; tensor var_1989 = expand_dims(axes = var_1989_axes_0, x = var_1988)[name = string("op_1989")]; string var_1996_pad_type_0 = const()[name = string("op_1996_pad_type_0"), val = string("valid")]; tensor var_1996_strides_0 = const()[name = string("op_1996_strides_0"), val = tensor([1, 1])]; tensor var_1996_pad_0 = const()[name = string("op_1996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1996_dilations_0 = const()[name = string("op_1996_dilations_0"), val = tensor([1, 1])]; int32 var_1996_groups_0 = const()[name = string("op_1996_groups_0"), val = int32(1)]; tensor var_1996 = conv(dilations = var_1996_dilations_0, groups = var_1996_groups_0, pad = var_1996_pad_0, pad_type = var_1996_pad_type_0, strides = var_1996_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_1989)[name = string("op_1996")]; tensor var_1997 = const()[name = string("op_1997"), val = tensor([1, 32, 1, 64])]; tensor var_1998 = reshape(shape = var_1997, x = var_1996)[name = string("op_1998")]; string var_2005_pad_type_0 = const()[name = string("op_2005_pad_type_0"), val = string("valid")]; tensor var_2005_strides_0 = const()[name = string("op_2005_strides_0"), val = tensor([1, 1])]; tensor var_2005_pad_0 = const()[name = string("op_2005_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2005_dilations_0 = const()[name = string("op_2005_dilations_0"), val = tensor([1, 1])]; int32 var_2005_groups_0 = const()[name = string("op_2005_groups_0"), val = int32(1)]; tensor var_2005 = conv(dilations = var_2005_dilations_0, groups = var_2005_groups_0, pad = var_2005_pad_0, pad_type = var_2005_pad_type_0, strides = var_2005_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_1989)[name = string("op_2005")]; tensor var_2006 = const()[name = string("op_2006"), val = tensor([1, 8, 1, 64])]; tensor var_2007 = reshape(shape = var_2006, x = var_2005)[name = string("op_2007")]; string var_2014_pad_type_0 = const()[name = string("op_2014_pad_type_0"), val = string("valid")]; tensor var_2014_strides_0 = const()[name = string("op_2014_strides_0"), val = tensor([1, 1])]; tensor var_2014_pad_0 = const()[name = string("op_2014_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2014_dilations_0 = const()[name = string("op_2014_dilations_0"), val = tensor([1, 1])]; int32 var_2014_groups_0 = const()[name = string("op_2014_groups_0"), val = int32(1)]; tensor var_2014 = conv(dilations = var_2014_dilations_0, groups = var_2014_groups_0, pad = var_2014_pad_0, pad_type = var_2014_pad_type_0, strides = var_2014_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_1989)[name = string("op_2014")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 8, 1, 64])]; tensor var_2016 = reshape(shape = var_2015, x = var_2014)[name = string("op_2016")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1998)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1998)[name = string("x2_33")]; tensor var_2030_cast_fp16 = mul(x = x1_33, y = cos_3_cast_fp16)[name = string("op_2030_cast_fp16")]; tensor var_2031_cast_fp16 = mul(x = x2_33, y = sin_3_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_cast_fp16 = sub(x = var_2030_cast_fp16, y = var_2031_cast_fp16)[name = string("op_2032_cast_fp16")]; tensor var_2033_cast_fp16 = mul(x = x2_33, y = cos_3_cast_fp16)[name = string("op_2033_cast_fp16")]; tensor var_2034_cast_fp16 = mul(x = x1_33, y = sin_3_cast_fp16)[name = string("op_2034_cast_fp16")]; tensor var_2035_cast_fp16 = add(x = var_2033_cast_fp16, y = var_2034_cast_fp16)[name = string("op_2035_cast_fp16")]; bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; tensor rotated_33_cast_fp16 = concat(axis = var_80, interleave = rotated_33_interleave_0, values = (var_2032_cast_fp16, var_2035_cast_fp16))[name = string("rotated_33_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_2007)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_2007)[name = string("x2_35")]; tensor var_2051_cast_fp16 = mul(x = x1_35, y = cos_3_cast_fp16)[name = string("op_2051_cast_fp16")]; tensor var_2052_cast_fp16 = mul(x = x2_35, y = sin_3_cast_fp16)[name = string("op_2052_cast_fp16")]; tensor var_2053_cast_fp16 = sub(x = var_2051_cast_fp16, y = var_2052_cast_fp16)[name = string("op_2053_cast_fp16")]; tensor var_2054_cast_fp16 = mul(x = x2_35, y = cos_3_cast_fp16)[name = string("op_2054_cast_fp16")]; tensor var_2055_cast_fp16 = mul(x = x1_35, y = sin_3_cast_fp16)[name = string("op_2055_cast_fp16")]; tensor var_2056_cast_fp16 = add(x = var_2054_cast_fp16, y = var_2055_cast_fp16)[name = string("op_2056_cast_fp16")]; bool rotated_35_interleave_0 = const()[name = string("rotated_35_interleave_0"), val = bool(false)]; tensor rotated_35_cast_fp16 = concat(axis = var_80, interleave = rotated_35_interleave_0, values = (var_2053_cast_fp16, var_2056_cast_fp16))[name = string("rotated_35_cast_fp16")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_581, concat_67_values3_0))[name = string("concat_67")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated_35_cast_fp16, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([24])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([25])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_581, concat_71_values3_0))[name = string("concat_71")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_2016, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_2076_begin_0 = const()[name = string("op_2076_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2076_end_0 = const()[name = string("op_2076_end_0"), val = tensor([9, 8, 1024, 64])]; tensor var_2076_end_mask_0 = const()[name = string("op_2076_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2076_cast_fp16 = slice_by_index(begin = var_2076_begin_0, end = var_2076_end_0, end_mask = var_2076_end_mask_0, x = coreml_update_state_49)[name = string("op_2076_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_2076_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_2078_begin_0 = const()[name = string("op_2078_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_2078_end_0 = const()[name = string("op_2078_end_0"), val = tensor([25, 8, 1024, 64])]; tensor var_2078_end_mask_0 = const()[name = string("op_2078_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = coreml_update_state_49)[name = string("op_2078_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_2078_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_2087 = const()[name = string("op_2087"), val = tensor([1, 4, 1, 1])]; tensor x_237_cast_fp16 = tile(reps = var_2087, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; tensor var_2091 = const()[name = string("op_2091"), val = tensor([1, -1, 1024, 64])]; tensor key_states_35_cast_fp16 = reshape(shape = var_2091, x = x_237_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_241_cast_fp16")]; tensor var_2094 = const()[name = string("op_2094"), val = tensor([1, 4, 1, 1])]; tensor x_243_cast_fp16 = tile(reps = var_2094, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; tensor var_2098 = const()[name = string("op_2098"), val = tensor([1, -1, 1024, 64])]; tensor value_states_35_cast_fp16 = reshape(shape = var_2098, x = x_243_cast_fp16)[name = string("value_states_35_cast_fp16")]; bool var_2101_transpose_x_1 = const()[name = string("op_2101_transpose_x_1"), val = bool(false)]; bool var_2101_transpose_y_1 = const()[name = string("op_2101_transpose_y_1"), val = bool(true)]; tensor var_2101_cast_fp16 = matmul(transpose_x = var_2101_transpose_x_1, transpose_y = var_2101_transpose_y_1, x = rotated_33_cast_fp16, y = key_states_35_cast_fp16)[name = string("op_2101_cast_fp16")]; fp16 var_2102_to_fp16 = const()[name = string("op_2102_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_33_cast_fp16 = mul(x = var_2101_cast_fp16, y = var_2102_to_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor x_245_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; tensor exp_x_17_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_17_cast_fp16")]; tensor var_2113_axes_0 = const()[name = string("op_2113_axes_0"), val = tensor([-1])]; bool var_2113_keep_dims_0 = const()[name = string("op_2113_keep_dims_0"), val = bool(true)]; tensor var_2113_cast_fp16 = reduce_sum(axes = var_2113_axes_0, keep_dims = var_2113_keep_dims_0, x = exp_x_17_cast_fp16)[name = string("op_2113_cast_fp16")]; tensor attn_weights_35_cast_fp16 = real_div(x = exp_x_17_cast_fp16, y = var_2113_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = attn_weights_35_cast_fp16, y = value_states_35_cast_fp16)[name = string("attn_output_49_cast_fp16")]; tensor var_2116_perm_0 = const()[name = string("op_2116_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2118 = const()[name = string("op_2118"), val = tensor([1, 1, 2048])]; tensor var_2116_cast_fp16 = transpose(perm = var_2116_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_30")]; tensor input_117_cast_fp16 = reshape(shape = var_2118, x = var_2116_cast_fp16)[name = string("input_117_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504846656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506943872))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor mean_35_cast_fp16 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_35_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_35_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_2129_axes_0 = const()[name = string("op_2129_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506952128)))]; tensor var_2129_cast_fp16 = layer_norm(axes = var_2129_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_2129_cast_fp16")]; tensor var_2136 = const()[name = string("op_2136"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_2137 = transpose(perm = var_2136, x = var_2129_cast_fp16)[name = string("transpose_29")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_2137)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string up_states_17_pad_type_0 = const()[name = string("up_states_17_pad_type_0"), val = string("valid")]; tensor up_states_17_strides_0 = const()[name = string("up_states_17_strides_0"), val = tensor([1, 1])]; tensor up_states_17_pad_0 = const()[name = string("up_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_17_dilations_0 = const()[name = string("up_states_17_dilations_0"), val = tensor([1, 1])]; int32 up_states_17_groups_0 = const()[name = string("up_states_17_groups_0"), val = int32(1)]; tensor up_states_17 = conv(dilations = up_states_17_dilations_0, groups = up_states_17_groups_0, pad = up_states_17_pad_0, pad_type = up_states_17_pad_type_0, strides = up_states_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states_17")]; tensor gate_states_17 = silu(x = input_123)[name = string("gate_states_17")]; tensor input_125 = mul(x = gate_states_17, y = up_states_17)[name = string("input_125")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor hidden_states_71 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_125)[name = string("hidden_states_71")]; tensor var_2159_axes_0 = const()[name = string("op_2159_axes_0"), val = tensor([2])]; tensor var_2159 = squeeze(axes = var_2159_axes_0, x = hidden_states_71)[name = string("op_2159")]; tensor var_2160 = const()[name = string("op_2160"), val = tensor([0, 2, 1])]; tensor var_2161 = transpose(perm = var_2160, x = var_2159)[name = string("transpose_28")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = var_2161)[name = string("hidden_states_73_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor mean_37_cast_fp16 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_37_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_37_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_2169_axes_0 = const()[name = string("op_2169_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506956288)))]; tensor var_2169_cast_fp16 = layer_norm(axes = var_2169_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_2169_cast_fp16")]; tensor var_2172 = const()[name = string("op_2172"), val = tensor([0, 2, 1])]; tensor var_2174_axes_0 = const()[name = string("op_2174_axes_0"), val = tensor([2])]; tensor var_2173 = transpose(perm = var_2172, x = var_2169_cast_fp16)[name = string("transpose_27")]; tensor var_2174 = expand_dims(axes = var_2174_axes_0, x = var_2173)[name = string("op_2174")]; string var_2181_pad_type_0 = const()[name = string("op_2181_pad_type_0"), val = string("valid")]; tensor var_2181_strides_0 = const()[name = string("op_2181_strides_0"), val = tensor([1, 1])]; tensor var_2181_pad_0 = const()[name = string("op_2181_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2181_dilations_0 = const()[name = string("op_2181_dilations_0"), val = tensor([1, 1])]; int32 var_2181_groups_0 = const()[name = string("op_2181_groups_0"), val = int32(1)]; tensor var_2181 = conv(dilations = var_2181_dilations_0, groups = var_2181_groups_0, pad = var_2181_pad_0, pad_type = var_2181_pad_type_0, strides = var_2181_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_2174)[name = string("op_2181")]; tensor var_2182 = const()[name = string("op_2182"), val = tensor([1, 32, 1, 64])]; tensor var_2183 = reshape(shape = var_2182, x = var_2181)[name = string("op_2183")]; string var_2190_pad_type_0 = const()[name = string("op_2190_pad_type_0"), val = string("valid")]; tensor var_2190_strides_0 = const()[name = string("op_2190_strides_0"), val = tensor([1, 1])]; tensor var_2190_pad_0 = const()[name = string("op_2190_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2190_dilations_0 = const()[name = string("op_2190_dilations_0"), val = tensor([1, 1])]; int32 var_2190_groups_0 = const()[name = string("op_2190_groups_0"), val = int32(1)]; tensor var_2190 = conv(dilations = var_2190_dilations_0, groups = var_2190_groups_0, pad = var_2190_pad_0, pad_type = var_2190_pad_type_0, strides = var_2190_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_2174)[name = string("op_2190")]; tensor var_2191 = const()[name = string("op_2191"), val = tensor([1, 8, 1, 64])]; tensor var_2192 = reshape(shape = var_2191, x = var_2190)[name = string("op_2192")]; string var_2199_pad_type_0 = const()[name = string("op_2199_pad_type_0"), val = string("valid")]; tensor var_2199_strides_0 = const()[name = string("op_2199_strides_0"), val = tensor([1, 1])]; tensor var_2199_pad_0 = const()[name = string("op_2199_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2199_dilations_0 = const()[name = string("op_2199_dilations_0"), val = tensor([1, 1])]; int32 var_2199_groups_0 = const()[name = string("op_2199_groups_0"), val = int32(1)]; tensor var_2199 = conv(dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_2174)[name = string("op_2199")]; tensor var_2200 = const()[name = string("op_2200"), val = tensor([1, 8, 1, 64])]; tensor var_2201 = reshape(shape = var_2200, x = var_2199)[name = string("op_2201")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_2183)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_2183)[name = string("x2_37")]; tensor var_2215_cast_fp16 = mul(x = x1_37, y = cos_3_cast_fp16)[name = string("op_2215_cast_fp16")]; tensor var_2216_cast_fp16 = mul(x = x2_37, y = sin_3_cast_fp16)[name = string("op_2216_cast_fp16")]; tensor var_2217_cast_fp16 = sub(x = var_2215_cast_fp16, y = var_2216_cast_fp16)[name = string("op_2217_cast_fp16")]; tensor var_2218_cast_fp16 = mul(x = x2_37, y = cos_3_cast_fp16)[name = string("op_2218_cast_fp16")]; tensor var_2219_cast_fp16 = mul(x = x1_37, y = sin_3_cast_fp16)[name = string("op_2219_cast_fp16")]; tensor var_2220_cast_fp16 = add(x = var_2218_cast_fp16, y = var_2219_cast_fp16)[name = string("op_2220_cast_fp16")]; bool rotated_37_interleave_0 = const()[name = string("rotated_37_interleave_0"), val = bool(false)]; tensor rotated_37_cast_fp16 = concat(axis = var_80, interleave = rotated_37_interleave_0, values = (var_2217_cast_fp16, var_2220_cast_fp16))[name = string("rotated_37_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_2192)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_2192)[name = string("x2_39")]; tensor var_2236_cast_fp16 = mul(x = x1_39, y = cos_3_cast_fp16)[name = string("op_2236_cast_fp16")]; tensor var_2237_cast_fp16 = mul(x = x2_39, y = sin_3_cast_fp16)[name = string("op_2237_cast_fp16")]; tensor var_2238_cast_fp16 = sub(x = var_2236_cast_fp16, y = var_2237_cast_fp16)[name = string("op_2238_cast_fp16")]; tensor var_2239_cast_fp16 = mul(x = x2_39, y = cos_3_cast_fp16)[name = string("op_2239_cast_fp16")]; tensor var_2240_cast_fp16 = mul(x = x1_39, y = sin_3_cast_fp16)[name = string("op_2240_cast_fp16")]; tensor var_2241_cast_fp16 = add(x = var_2239_cast_fp16, y = var_2240_cast_fp16)[name = string("op_2241_cast_fp16")]; bool rotated_39_interleave_0 = const()[name = string("rotated_39_interleave_0"), val = bool(false)]; tensor rotated_39_cast_fp16 = concat(axis = var_80, interleave = rotated_39_interleave_0, values = (var_2238_cast_fp16, var_2241_cast_fp16))[name = string("rotated_39_cast_fp16")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_581, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = rotated_39_cast_fp16, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([25])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([26])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_581, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = var_2201, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_2261_begin_0 = const()[name = string("op_2261_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2261_end_0 = const()[name = string("op_2261_end_0"), val = tensor([10, 8, 1024, 64])]; tensor var_2261_end_mask_0 = const()[name = string("op_2261_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2261_cast_fp16 = slice_by_index(begin = var_2261_begin_0, end = var_2261_end_0, end_mask = var_2261_end_mask_0, x = coreml_update_state_51)[name = string("op_2261_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_2261_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_2263_begin_0 = const()[name = string("op_2263_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_2263_end_0 = const()[name = string("op_2263_end_0"), val = tensor([26, 8, 1024, 64])]; tensor var_2263_end_mask_0 = const()[name = string("op_2263_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2263_cast_fp16 = slice_by_index(begin = var_2263_begin_0, end = var_2263_end_0, end_mask = var_2263_end_mask_0, x = coreml_update_state_51)[name = string("op_2263_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_2263_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_263_axes_0 = const()[name = string("x_263_axes_0"), val = tensor([1])]; tensor x_263_cast_fp16 = expand_dims(axes = x_263_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_263_cast_fp16")]; tensor var_2272 = const()[name = string("op_2272"), val = tensor([1, 4, 1, 1])]; tensor x_265_cast_fp16 = tile(reps = var_2272, x = x_263_cast_fp16)[name = string("x_265_cast_fp16")]; tensor var_2276 = const()[name = string("op_2276"), val = tensor([1, -1, 1024, 64])]; tensor key_states_39_cast_fp16 = reshape(shape = var_2276, x = x_265_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor x_269_axes_0 = const()[name = string("x_269_axes_0"), val = tensor([1])]; tensor x_269_cast_fp16 = expand_dims(axes = x_269_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_2279 = const()[name = string("op_2279"), val = tensor([1, 4, 1, 1])]; tensor x_271_cast_fp16 = tile(reps = var_2279, x = x_269_cast_fp16)[name = string("x_271_cast_fp16")]; tensor var_2283 = const()[name = string("op_2283"), val = tensor([1, -1, 1024, 64])]; tensor value_states_39_cast_fp16 = reshape(shape = var_2283, x = x_271_cast_fp16)[name = string("value_states_39_cast_fp16")]; bool var_2286_transpose_x_1 = const()[name = string("op_2286_transpose_x_1"), val = bool(false)]; bool var_2286_transpose_y_1 = const()[name = string("op_2286_transpose_y_1"), val = bool(true)]; tensor var_2286_cast_fp16 = matmul(transpose_x = var_2286_transpose_x_1, transpose_y = var_2286_transpose_y_1, x = rotated_37_cast_fp16, y = key_states_39_cast_fp16)[name = string("op_2286_cast_fp16")]; fp16 var_2287_to_fp16 = const()[name = string("op_2287_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_37_cast_fp16 = mul(x = var_2286_cast_fp16, y = var_2287_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor x_273_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("x_273_cast_fp16")]; tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; tensor reduce_max_9_cast_fp16 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_273_cast_fp16)[name = string("reduce_max_9_cast_fp16")]; tensor x_275_cast_fp16 = sub(x = x_273_cast_fp16, y = reduce_max_9_cast_fp16)[name = string("x_275_cast_fp16")]; tensor exp_x_19_cast_fp16 = exp(x = x_275_cast_fp16)[name = string("exp_x_19_cast_fp16")]; tensor var_2298_axes_0 = const()[name = string("op_2298_axes_0"), val = tensor([-1])]; bool var_2298_keep_dims_0 = const()[name = string("op_2298_keep_dims_0"), val = bool(true)]; tensor var_2298_cast_fp16 = reduce_sum(axes = var_2298_axes_0, keep_dims = var_2298_keep_dims_0, x = exp_x_19_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor attn_weights_39_cast_fp16 = real_div(x = exp_x_19_cast_fp16, y = var_2298_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = attn_weights_39_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_2301_perm_0 = const()[name = string("op_2301_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2303 = const()[name = string("op_2303"), val = tensor([1, 1, 2048])]; tensor var_2301_cast_fp16 = transpose(perm = var_2301_perm_0, x = attn_output_55_cast_fp16)[name = string("transpose_26")]; tensor input_131_cast_fp16 = reshape(shape = var_2303, x = var_2301_cast_fp16)[name = string("input_131_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506960448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509057664))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = linear_9_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_133_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_39_cast_fp16)[name = string("input_133_cast_fp16")]; tensor var_2314_axes_0 = const()[name = string("op_2314_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509065920)))]; tensor var_2314_cast_fp16 = layer_norm(axes = var_2314_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_133_cast_fp16)[name = string("op_2314_cast_fp16")]; tensor var_2321 = const()[name = string("op_2321"), val = tensor([0, 2, 1])]; tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; tensor var_2322 = transpose(perm = var_2321, x = var_2314_cast_fp16)[name = string("transpose_25")]; tensor input_135 = expand_dims(axes = input_135_axes_0, x = var_2322)[name = string("input_135")]; string input_137_pad_type_0 = const()[name = string("input_137_pad_type_0"), val = string("valid")]; tensor input_137_strides_0 = const()[name = string("input_137_strides_0"), val = tensor([1, 1])]; tensor input_137_pad_0 = const()[name = string("input_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_137_dilations_0 = const()[name = string("input_137_dilations_0"), val = tensor([1, 1])]; int32 input_137_groups_0 = const()[name = string("input_137_groups_0"), val = int32(1)]; tensor input_137 = conv(dilations = input_137_dilations_0, groups = input_137_groups_0, pad = input_137_pad_0, pad_type = input_137_pad_type_0, strides = input_137_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_135)[name = string("input_137")]; string up_states_19_pad_type_0 = const()[name = string("up_states_19_pad_type_0"), val = string("valid")]; tensor up_states_19_strides_0 = const()[name = string("up_states_19_strides_0"), val = tensor([1, 1])]; tensor up_states_19_pad_0 = const()[name = string("up_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_19_dilations_0 = const()[name = string("up_states_19_dilations_0"), val = tensor([1, 1])]; int32 up_states_19_groups_0 = const()[name = string("up_states_19_groups_0"), val = int32(1)]; tensor up_states_19 = conv(dilations = up_states_19_dilations_0, groups = up_states_19_groups_0, pad = up_states_19_pad_0, pad_type = up_states_19_pad_type_0, strides = up_states_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_135)[name = string("up_states_19")]; tensor gate_states_19 = silu(x = input_137)[name = string("gate_states_19")]; tensor input_139 = mul(x = gate_states_19, y = up_states_19)[name = string("input_139")]; string hidden_states_79_pad_type_0 = const()[name = string("hidden_states_79_pad_type_0"), val = string("valid")]; tensor hidden_states_79_strides_0 = const()[name = string("hidden_states_79_strides_0"), val = tensor([1, 1])]; tensor hidden_states_79_pad_0 = const()[name = string("hidden_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_79_dilations_0 = const()[name = string("hidden_states_79_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_79_groups_0 = const()[name = string("hidden_states_79_groups_0"), val = int32(1)]; tensor hidden_states_79 = conv(dilations = hidden_states_79_dilations_0, groups = hidden_states_79_groups_0, pad = hidden_states_79_pad_0, pad_type = hidden_states_79_pad_type_0, strides = hidden_states_79_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_139)[name = string("hidden_states_79")]; tensor var_2344_axes_0 = const()[name = string("op_2344_axes_0"), val = tensor([2])]; tensor var_2344 = squeeze(axes = var_2344_axes_0, x = hidden_states_79)[name = string("op_2344")]; tensor var_2345 = const()[name = string("op_2345"), val = tensor([0, 2, 1])]; tensor var_2346 = transpose(perm = var_2345, x = var_2344)[name = string("transpose_24")]; tensor hidden_states_81_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_2346)[name = string("hidden_states_81_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_81_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_141_cast_fp16 = sub(x = hidden_states_81_cast_fp16, y = mean_41_cast_fp16)[name = string("input_141_cast_fp16")]; tensor var_2354_axes_0 = const()[name = string("op_2354_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509070080)))]; tensor var_2354_cast_fp16 = layer_norm(axes = var_2354_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_141_cast_fp16)[name = string("op_2354_cast_fp16")]; tensor var_2357 = const()[name = string("op_2357"), val = tensor([0, 2, 1])]; tensor var_2359_axes_0 = const()[name = string("op_2359_axes_0"), val = tensor([2])]; tensor var_2358 = transpose(perm = var_2357, x = var_2354_cast_fp16)[name = string("transpose_23")]; tensor var_2359 = expand_dims(axes = var_2359_axes_0, x = var_2358)[name = string("op_2359")]; string var_2366_pad_type_0 = const()[name = string("op_2366_pad_type_0"), val = string("valid")]; tensor var_2366_strides_0 = const()[name = string("op_2366_strides_0"), val = tensor([1, 1])]; tensor var_2366_pad_0 = const()[name = string("op_2366_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2366_dilations_0 = const()[name = string("op_2366_dilations_0"), val = tensor([1, 1])]; int32 var_2366_groups_0 = const()[name = string("op_2366_groups_0"), val = int32(1)]; tensor var_2366 = conv(dilations = var_2366_dilations_0, groups = var_2366_groups_0, pad = var_2366_pad_0, pad_type = var_2366_pad_type_0, strides = var_2366_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_2359)[name = string("op_2366")]; tensor var_2367 = const()[name = string("op_2367"), val = tensor([1, 32, 1, 64])]; tensor var_2368 = reshape(shape = var_2367, x = var_2366)[name = string("op_2368")]; string var_2375_pad_type_0 = const()[name = string("op_2375_pad_type_0"), val = string("valid")]; tensor var_2375_strides_0 = const()[name = string("op_2375_strides_0"), val = tensor([1, 1])]; tensor var_2375_pad_0 = const()[name = string("op_2375_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2375_dilations_0 = const()[name = string("op_2375_dilations_0"), val = tensor([1, 1])]; int32 var_2375_groups_0 = const()[name = string("op_2375_groups_0"), val = int32(1)]; tensor var_2375 = conv(dilations = var_2375_dilations_0, groups = var_2375_groups_0, pad = var_2375_pad_0, pad_type = var_2375_pad_type_0, strides = var_2375_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_2359)[name = string("op_2375")]; tensor var_2376 = const()[name = string("op_2376"), val = tensor([1, 8, 1, 64])]; tensor var_2377 = reshape(shape = var_2376, x = var_2375)[name = string("op_2377")]; string var_2384_pad_type_0 = const()[name = string("op_2384_pad_type_0"), val = string("valid")]; tensor var_2384_strides_0 = const()[name = string("op_2384_strides_0"), val = tensor([1, 1])]; tensor var_2384_pad_0 = const()[name = string("op_2384_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2384_dilations_0 = const()[name = string("op_2384_dilations_0"), val = tensor([1, 1])]; int32 var_2384_groups_0 = const()[name = string("op_2384_groups_0"), val = int32(1)]; tensor var_2384 = conv(dilations = var_2384_dilations_0, groups = var_2384_groups_0, pad = var_2384_pad_0, pad_type = var_2384_pad_type_0, strides = var_2384_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_2359)[name = string("op_2384")]; tensor var_2385 = const()[name = string("op_2385"), val = tensor([1, 8, 1, 64])]; tensor var_2386 = reshape(shape = var_2385, x = var_2384)[name = string("op_2386")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_2368)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_2368)[name = string("x2_41")]; tensor var_2400_cast_fp16 = mul(x = x1_41, y = cos_3_cast_fp16)[name = string("op_2400_cast_fp16")]; tensor var_2401_cast_fp16 = mul(x = x2_41, y = sin_3_cast_fp16)[name = string("op_2401_cast_fp16")]; tensor var_2402_cast_fp16 = sub(x = var_2400_cast_fp16, y = var_2401_cast_fp16)[name = string("op_2402_cast_fp16")]; tensor var_2403_cast_fp16 = mul(x = x2_41, y = cos_3_cast_fp16)[name = string("op_2403_cast_fp16")]; tensor var_2404_cast_fp16 = mul(x = x1_41, y = sin_3_cast_fp16)[name = string("op_2404_cast_fp16")]; tensor var_2405_cast_fp16 = add(x = var_2403_cast_fp16, y = var_2404_cast_fp16)[name = string("op_2405_cast_fp16")]; bool rotated_41_interleave_0 = const()[name = string("rotated_41_interleave_0"), val = bool(false)]; tensor rotated_41_cast_fp16 = concat(axis = var_80, interleave = rotated_41_interleave_0, values = (var_2402_cast_fp16, var_2405_cast_fp16))[name = string("rotated_41_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_2377)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_2377)[name = string("x2_43")]; tensor var_2421_cast_fp16 = mul(x = x1_43, y = cos_3_cast_fp16)[name = string("op_2421_cast_fp16")]; tensor var_2422_cast_fp16 = mul(x = x2_43, y = sin_3_cast_fp16)[name = string("op_2422_cast_fp16")]; tensor var_2423_cast_fp16 = sub(x = var_2421_cast_fp16, y = var_2422_cast_fp16)[name = string("op_2423_cast_fp16")]; tensor var_2424_cast_fp16 = mul(x = x2_43, y = cos_3_cast_fp16)[name = string("op_2424_cast_fp16")]; tensor var_2425_cast_fp16 = mul(x = x1_43, y = sin_3_cast_fp16)[name = string("op_2425_cast_fp16")]; tensor var_2426_cast_fp16 = add(x = var_2424_cast_fp16, y = var_2425_cast_fp16)[name = string("op_2426_cast_fp16")]; bool rotated_43_interleave_0 = const()[name = string("rotated_43_interleave_0"), val = bool(false)]; tensor rotated_43_cast_fp16 = concat(axis = var_80, interleave = rotated_43_interleave_0, values = (var_2423_cast_fp16, var_2426_cast_fp16))[name = string("rotated_43_cast_fp16")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_581, concat_83_values3_0))[name = string("concat_83")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = rotated_43_cast_fp16, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([26])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([27])]; int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_581, concat_87_values3_0))[name = string("concat_87")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = var_2386, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_2446_begin_0 = const()[name = string("op_2446_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_2446_end_0 = const()[name = string("op_2446_end_0"), val = tensor([11, 8, 1024, 64])]; tensor var_2446_end_mask_0 = const()[name = string("op_2446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2446_cast_fp16 = slice_by_index(begin = var_2446_begin_0, end = var_2446_end_0, end_mask = var_2446_end_mask_0, x = coreml_update_state_53)[name = string("op_2446_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_2446_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_2448_begin_0 = const()[name = string("op_2448_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_2448_end_0 = const()[name = string("op_2448_end_0"), val = tensor([27, 8, 1024, 64])]; tensor var_2448_end_mask_0 = const()[name = string("op_2448_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2448_cast_fp16 = slice_by_index(begin = var_2448_begin_0, end = var_2448_end_0, end_mask = var_2448_end_mask_0, x = coreml_update_state_53)[name = string("op_2448_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_2448_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_291_axes_0 = const()[name = string("x_291_axes_0"), val = tensor([1])]; tensor x_291_cast_fp16 = expand_dims(axes = x_291_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_291_cast_fp16")]; tensor var_2457 = const()[name = string("op_2457"), val = tensor([1, 4, 1, 1])]; tensor x_293_cast_fp16 = tile(reps = var_2457, x = x_291_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_2461 = const()[name = string("op_2461"), val = tensor([1, -1, 1024, 64])]; tensor key_states_43_cast_fp16 = reshape(shape = var_2461, x = x_293_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor x_297_axes_0 = const()[name = string("x_297_axes_0"), val = tensor([1])]; tensor x_297_cast_fp16 = expand_dims(axes = x_297_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_297_cast_fp16")]; tensor var_2464 = const()[name = string("op_2464"), val = tensor([1, 4, 1, 1])]; tensor x_299_cast_fp16 = tile(reps = var_2464, x = x_297_cast_fp16)[name = string("x_299_cast_fp16")]; tensor var_2468 = const()[name = string("op_2468"), val = tensor([1, -1, 1024, 64])]; tensor value_states_43_cast_fp16 = reshape(shape = var_2468, x = x_299_cast_fp16)[name = string("value_states_43_cast_fp16")]; bool var_2471_transpose_x_1 = const()[name = string("op_2471_transpose_x_1"), val = bool(false)]; bool var_2471_transpose_y_1 = const()[name = string("op_2471_transpose_y_1"), val = bool(true)]; tensor var_2471_cast_fp16 = matmul(transpose_x = var_2471_transpose_x_1, transpose_y = var_2471_transpose_y_1, x = rotated_41_cast_fp16, y = key_states_43_cast_fp16)[name = string("op_2471_cast_fp16")]; fp16 var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_41_cast_fp16 = mul(x = var_2471_cast_fp16, y = var_2472_to_fp16)[name = string("attn_weights_41_cast_fp16")]; tensor x_301_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask)[name = string("x_301_cast_fp16")]; tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; tensor reduce_max_10_cast_fp16 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_301_cast_fp16)[name = string("reduce_max_10_cast_fp16")]; tensor x_303_cast_fp16 = sub(x = x_301_cast_fp16, y = reduce_max_10_cast_fp16)[name = string("x_303_cast_fp16")]; tensor exp_x_21_cast_fp16 = exp(x = x_303_cast_fp16)[name = string("exp_x_21_cast_fp16")]; tensor var_2483_axes_0 = const()[name = string("op_2483_axes_0"), val = tensor([-1])]; bool var_2483_keep_dims_0 = const()[name = string("op_2483_keep_dims_0"), val = bool(true)]; tensor var_2483_cast_fp16 = reduce_sum(axes = var_2483_axes_0, keep_dims = var_2483_keep_dims_0, x = exp_x_21_cast_fp16)[name = string("op_2483_cast_fp16")]; tensor attn_weights_43_cast_fp16 = real_div(x = exp_x_21_cast_fp16, y = var_2483_cast_fp16)[name = string("attn_weights_43_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_43_cast_fp16, y = value_states_43_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_2486_perm_0 = const()[name = string("op_2486_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2488 = const()[name = string("op_2488"), val = tensor([1, 1, 2048])]; tensor var_2486_cast_fp16 = transpose(perm = var_2486_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_22")]; tensor input_145_cast_fp16 = reshape(shape = var_2488, x = var_2486_cast_fp16)[name = string("input_145_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509074240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511171456))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor mean_43_cast_fp16 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_43_cast_fp16")]; tensor input_147_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_43_cast_fp16)[name = string("input_147_cast_fp16")]; tensor var_2499_axes_0 = const()[name = string("op_2499_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511179712)))]; tensor var_2499_cast_fp16 = layer_norm(axes = var_2499_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_147_cast_fp16)[name = string("op_2499_cast_fp16")]; tensor var_2506 = const()[name = string("op_2506"), val = tensor([0, 2, 1])]; tensor input_149_axes_0 = const()[name = string("input_149_axes_0"), val = tensor([2])]; tensor var_2507 = transpose(perm = var_2506, x = var_2499_cast_fp16)[name = string("transpose_21")]; tensor input_149 = expand_dims(axes = input_149_axes_0, x = var_2507)[name = string("input_149")]; string input_151_pad_type_0 = const()[name = string("input_151_pad_type_0"), val = string("valid")]; tensor input_151_strides_0 = const()[name = string("input_151_strides_0"), val = tensor([1, 1])]; tensor input_151_pad_0 = const()[name = string("input_151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_151_dilations_0 = const()[name = string("input_151_dilations_0"), val = tensor([1, 1])]; int32 input_151_groups_0 = const()[name = string("input_151_groups_0"), val = int32(1)]; tensor input_151 = conv(dilations = input_151_dilations_0, groups = input_151_groups_0, pad = input_151_pad_0, pad_type = input_151_pad_type_0, strides = input_151_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_149)[name = string("input_151")]; string up_states_21_pad_type_0 = const()[name = string("up_states_21_pad_type_0"), val = string("valid")]; tensor up_states_21_strides_0 = const()[name = string("up_states_21_strides_0"), val = tensor([1, 1])]; tensor up_states_21_pad_0 = const()[name = string("up_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_21_dilations_0 = const()[name = string("up_states_21_dilations_0"), val = tensor([1, 1])]; int32 up_states_21_groups_0 = const()[name = string("up_states_21_groups_0"), val = int32(1)]; tensor up_states_21 = conv(dilations = up_states_21_dilations_0, groups = up_states_21_groups_0, pad = up_states_21_pad_0, pad_type = up_states_21_pad_type_0, strides = up_states_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_149)[name = string("up_states_21")]; tensor gate_states_21 = silu(x = input_151)[name = string("gate_states_21")]; tensor input_153 = mul(x = gate_states_21, y = up_states_21)[name = string("input_153")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor hidden_states_87 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_153)[name = string("hidden_states_87")]; tensor var_2529_axes_0 = const()[name = string("op_2529_axes_0"), val = tensor([2])]; tensor var_2529 = squeeze(axes = var_2529_axes_0, x = hidden_states_87)[name = string("op_2529")]; tensor var_2530 = const()[name = string("op_2530"), val = tensor([0, 2, 1])]; tensor var_2531 = transpose(perm = var_2530, x = var_2529)[name = string("transpose_20")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = var_2531)[name = string("hidden_states_89_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor mean_45_cast_fp16 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_45_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_45_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_2539_axes_0 = const()[name = string("op_2539_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511183872)))]; tensor var_2539_cast_fp16 = layer_norm(axes = var_2539_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_2539_cast_fp16")]; tensor var_2542 = const()[name = string("op_2542"), val = tensor([0, 2, 1])]; tensor var_2544_axes_0 = const()[name = string("op_2544_axes_0"), val = tensor([2])]; tensor var_2543 = transpose(perm = var_2542, x = var_2539_cast_fp16)[name = string("transpose_19")]; tensor var_2544 = expand_dims(axes = var_2544_axes_0, x = var_2543)[name = string("op_2544")]; string var_2551_pad_type_0 = const()[name = string("op_2551_pad_type_0"), val = string("valid")]; tensor var_2551_strides_0 = const()[name = string("op_2551_strides_0"), val = tensor([1, 1])]; tensor var_2551_pad_0 = const()[name = string("op_2551_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2551_dilations_0 = const()[name = string("op_2551_dilations_0"), val = tensor([1, 1])]; int32 var_2551_groups_0 = const()[name = string("op_2551_groups_0"), val = int32(1)]; tensor var_2551 = conv(dilations = var_2551_dilations_0, groups = var_2551_groups_0, pad = var_2551_pad_0, pad_type = var_2551_pad_type_0, strides = var_2551_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_2544)[name = string("op_2551")]; tensor var_2552 = const()[name = string("op_2552"), val = tensor([1, 32, 1, 64])]; tensor var_2553 = reshape(shape = var_2552, x = var_2551)[name = string("op_2553")]; string var_2560_pad_type_0 = const()[name = string("op_2560_pad_type_0"), val = string("valid")]; tensor var_2560_strides_0 = const()[name = string("op_2560_strides_0"), val = tensor([1, 1])]; tensor var_2560_pad_0 = const()[name = string("op_2560_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2560_dilations_0 = const()[name = string("op_2560_dilations_0"), val = tensor([1, 1])]; int32 var_2560_groups_0 = const()[name = string("op_2560_groups_0"), val = int32(1)]; tensor var_2560 = conv(dilations = var_2560_dilations_0, groups = var_2560_groups_0, pad = var_2560_pad_0, pad_type = var_2560_pad_type_0, strides = var_2560_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_2544)[name = string("op_2560")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 8, 1, 64])]; tensor var_2562 = reshape(shape = var_2561, x = var_2560)[name = string("op_2562")]; string var_2569_pad_type_0 = const()[name = string("op_2569_pad_type_0"), val = string("valid")]; tensor var_2569_strides_0 = const()[name = string("op_2569_strides_0"), val = tensor([1, 1])]; tensor var_2569_pad_0 = const()[name = string("op_2569_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2569_dilations_0 = const()[name = string("op_2569_dilations_0"), val = tensor([1, 1])]; int32 var_2569_groups_0 = const()[name = string("op_2569_groups_0"), val = int32(1)]; tensor var_2569 = conv(dilations = var_2569_dilations_0, groups = var_2569_groups_0, pad = var_2569_pad_0, pad_type = var_2569_pad_type_0, strides = var_2569_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_2544)[name = string("op_2569")]; tensor var_2570 = const()[name = string("op_2570"), val = tensor([1, 8, 1, 64])]; tensor var_2571 = reshape(shape = var_2570, x = var_2569)[name = string("op_2571")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_2553)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_2553)[name = string("x2_45")]; tensor var_2585_cast_fp16 = mul(x = x1_45, y = cos_3_cast_fp16)[name = string("op_2585_cast_fp16")]; tensor var_2586_cast_fp16 = mul(x = x2_45, y = sin_3_cast_fp16)[name = string("op_2586_cast_fp16")]; tensor var_2587_cast_fp16 = sub(x = var_2585_cast_fp16, y = var_2586_cast_fp16)[name = string("op_2587_cast_fp16")]; tensor var_2588_cast_fp16 = mul(x = x2_45, y = cos_3_cast_fp16)[name = string("op_2588_cast_fp16")]; tensor var_2589_cast_fp16 = mul(x = x1_45, y = sin_3_cast_fp16)[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = add(x = var_2588_cast_fp16, y = var_2589_cast_fp16)[name = string("op_2590_cast_fp16")]; bool rotated_45_interleave_0 = const()[name = string("rotated_45_interleave_0"), val = bool(false)]; tensor rotated_45_cast_fp16 = concat(axis = var_80, interleave = rotated_45_interleave_0, values = (var_2587_cast_fp16, var_2590_cast_fp16))[name = string("rotated_45_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_2562)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_2562)[name = string("x2_47")]; tensor var_2606_cast_fp16 = mul(x = x1_47, y = cos_3_cast_fp16)[name = string("op_2606_cast_fp16")]; tensor var_2607_cast_fp16 = mul(x = x2_47, y = sin_3_cast_fp16)[name = string("op_2607_cast_fp16")]; tensor var_2608_cast_fp16 = sub(x = var_2606_cast_fp16, y = var_2607_cast_fp16)[name = string("op_2608_cast_fp16")]; tensor var_2609_cast_fp16 = mul(x = x2_47, y = cos_3_cast_fp16)[name = string("op_2609_cast_fp16")]; tensor var_2610_cast_fp16 = mul(x = x1_47, y = sin_3_cast_fp16)[name = string("op_2610_cast_fp16")]; tensor var_2611_cast_fp16 = add(x = var_2609_cast_fp16, y = var_2610_cast_fp16)[name = string("op_2611_cast_fp16")]; bool rotated_47_interleave_0 = const()[name = string("rotated_47_interleave_0"), val = bool(false)]; tensor rotated_47_cast_fp16 = concat(axis = var_80, interleave = rotated_47_interleave_0, values = (var_2608_cast_fp16, var_2611_cast_fp16))[name = string("rotated_47_cast_fp16")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_90")]; tensor concat_91_values1_0 = const()[name = string("concat_91_values1_0"), val = tensor([0])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_136, concat_91_values1_0, var_581, concat_91_values3_0))[name = string("concat_91")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = rotated_47_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([27])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([28])]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_94")]; tensor concat_95_values1_0 = const()[name = string("concat_95_values1_0"), val = tensor([0])]; tensor concat_95_values3_0 = const()[name = string("concat_95_values3_0"), val = tensor([0])]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (expand_dims_142, concat_95_values1_0, var_581, concat_95_values3_0))[name = string("concat_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = var_2571, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_2631_begin_0 = const()[name = string("op_2631_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_2631_end_0 = const()[name = string("op_2631_end_0"), val = tensor([12, 8, 1024, 64])]; tensor var_2631_end_mask_0 = const()[name = string("op_2631_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2631_cast_fp16 = slice_by_index(begin = var_2631_begin_0, end = var_2631_end_0, end_mask = var_2631_end_mask_0, x = coreml_update_state_55)[name = string("op_2631_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_2631_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([28, 8, 1024, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_55)[name = string("op_2633_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_2633_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_319_axes_0 = const()[name = string("x_319_axes_0"), val = tensor([1])]; tensor x_319_cast_fp16 = expand_dims(axes = x_319_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_319_cast_fp16")]; tensor var_2642 = const()[name = string("op_2642"), val = tensor([1, 4, 1, 1])]; tensor x_321_cast_fp16 = tile(reps = var_2642, x = x_319_cast_fp16)[name = string("x_321_cast_fp16")]; tensor var_2646 = const()[name = string("op_2646"), val = tensor([1, -1, 1024, 64])]; tensor key_states_47_cast_fp16 = reshape(shape = var_2646, x = x_321_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_325_axes_0 = const()[name = string("x_325_axes_0"), val = tensor([1])]; tensor x_325_cast_fp16 = expand_dims(axes = x_325_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_325_cast_fp16")]; tensor var_2649 = const()[name = string("op_2649"), val = tensor([1, 4, 1, 1])]; tensor x_327_cast_fp16 = tile(reps = var_2649, x = x_325_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_2653 = const()[name = string("op_2653"), val = tensor([1, -1, 1024, 64])]; tensor value_states_47_cast_fp16 = reshape(shape = var_2653, x = x_327_cast_fp16)[name = string("value_states_47_cast_fp16")]; bool var_2656_transpose_x_1 = const()[name = string("op_2656_transpose_x_1"), val = bool(false)]; bool var_2656_transpose_y_1 = const()[name = string("op_2656_transpose_y_1"), val = bool(true)]; tensor var_2656_cast_fp16 = matmul(transpose_x = var_2656_transpose_x_1, transpose_y = var_2656_transpose_y_1, x = rotated_45_cast_fp16, y = key_states_47_cast_fp16)[name = string("op_2656_cast_fp16")]; fp16 var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_45_cast_fp16 = mul(x = var_2656_cast_fp16, y = var_2657_to_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor x_329_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("x_329_cast_fp16")]; tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; tensor reduce_max_11_cast_fp16 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_329_cast_fp16)[name = string("reduce_max_11_cast_fp16")]; tensor x_331_cast_fp16 = sub(x = x_329_cast_fp16, y = reduce_max_11_cast_fp16)[name = string("x_331_cast_fp16")]; tensor exp_x_23_cast_fp16 = exp(x = x_331_cast_fp16)[name = string("exp_x_23_cast_fp16")]; tensor var_2668_axes_0 = const()[name = string("op_2668_axes_0"), val = tensor([-1])]; bool var_2668_keep_dims_0 = const()[name = string("op_2668_keep_dims_0"), val = bool(true)]; tensor var_2668_cast_fp16 = reduce_sum(axes = var_2668_axes_0, keep_dims = var_2668_keep_dims_0, x = exp_x_23_cast_fp16)[name = string("op_2668_cast_fp16")]; tensor attn_weights_47_cast_fp16 = real_div(x = exp_x_23_cast_fp16, y = var_2668_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = attn_weights_47_cast_fp16, y = value_states_47_cast_fp16)[name = string("attn_output_67_cast_fp16")]; tensor var_2671_perm_0 = const()[name = string("op_2671_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2673 = const()[name = string("op_2673"), val = tensor([1, 1, 2048])]; tensor var_2671_cast_fp16 = transpose(perm = var_2671_perm_0, x = attn_output_67_cast_fp16)[name = string("transpose_18")]; tensor input_159_cast_fp16 = reshape(shape = var_2673, x = var_2671_cast_fp16)[name = string("input_159_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511188032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513285248))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_159_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor hidden_states_93_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_11_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_93_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_161_cast_fp16 = sub(x = hidden_states_93_cast_fp16, y = mean_47_cast_fp16)[name = string("input_161_cast_fp16")]; tensor var_2684_axes_0 = const()[name = string("op_2684_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513293504)))]; tensor var_2684_cast_fp16 = layer_norm(axes = var_2684_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_161_cast_fp16)[name = string("op_2684_cast_fp16")]; tensor var_2691 = const()[name = string("op_2691"), val = tensor([0, 2, 1])]; tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; tensor var_2692 = transpose(perm = var_2691, x = var_2684_cast_fp16)[name = string("transpose_17")]; tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_2692)[name = string("input_163")]; string input_165_pad_type_0 = const()[name = string("input_165_pad_type_0"), val = string("valid")]; tensor input_165_strides_0 = const()[name = string("input_165_strides_0"), val = tensor([1, 1])]; tensor input_165_pad_0 = const()[name = string("input_165_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_165_dilations_0 = const()[name = string("input_165_dilations_0"), val = tensor([1, 1])]; int32 input_165_groups_0 = const()[name = string("input_165_groups_0"), val = int32(1)]; tensor input_165 = conv(dilations = input_165_dilations_0, groups = input_165_groups_0, pad = input_165_pad_0, pad_type = input_165_pad_type_0, strides = input_165_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_163)[name = string("input_165")]; string up_states_23_pad_type_0 = const()[name = string("up_states_23_pad_type_0"), val = string("valid")]; tensor up_states_23_strides_0 = const()[name = string("up_states_23_strides_0"), val = tensor([1, 1])]; tensor up_states_23_pad_0 = const()[name = string("up_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_23_dilations_0 = const()[name = string("up_states_23_dilations_0"), val = tensor([1, 1])]; int32 up_states_23_groups_0 = const()[name = string("up_states_23_groups_0"), val = int32(1)]; tensor up_states_23 = conv(dilations = up_states_23_dilations_0, groups = up_states_23_groups_0, pad = up_states_23_pad_0, pad_type = up_states_23_pad_type_0, strides = up_states_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_163)[name = string("up_states_23")]; tensor gate_states_23 = silu(x = input_165)[name = string("gate_states_23")]; tensor input_167 = mul(x = gate_states_23, y = up_states_23)[name = string("input_167")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor hidden_states_95 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_167)[name = string("hidden_states_95")]; tensor var_2714_axes_0 = const()[name = string("op_2714_axes_0"), val = tensor([2])]; tensor var_2714 = squeeze(axes = var_2714_axes_0, x = hidden_states_95)[name = string("op_2714")]; tensor var_2715 = const()[name = string("op_2715"), val = tensor([0, 2, 1])]; tensor var_2716 = transpose(perm = var_2715, x = var_2714)[name = string("transpose_16")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_93_cast_fp16, y = var_2716)[name = string("hidden_states_97_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_169_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_49_cast_fp16)[name = string("input_169_cast_fp16")]; tensor var_2724_axes_0 = const()[name = string("op_2724_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513297664)))]; tensor var_2724_cast_fp16 = layer_norm(axes = var_2724_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_169_cast_fp16)[name = string("op_2724_cast_fp16")]; tensor var_2727 = const()[name = string("op_2727"), val = tensor([0, 2, 1])]; tensor var_2729_axes_0 = const()[name = string("op_2729_axes_0"), val = tensor([2])]; tensor var_2728 = transpose(perm = var_2727, x = var_2724_cast_fp16)[name = string("transpose_15")]; tensor var_2729 = expand_dims(axes = var_2729_axes_0, x = var_2728)[name = string("op_2729")]; string var_2736_pad_type_0 = const()[name = string("op_2736_pad_type_0"), val = string("valid")]; tensor var_2736_strides_0 = const()[name = string("op_2736_strides_0"), val = tensor([1, 1])]; tensor var_2736_pad_0 = const()[name = string("op_2736_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2736_dilations_0 = const()[name = string("op_2736_dilations_0"), val = tensor([1, 1])]; int32 var_2736_groups_0 = const()[name = string("op_2736_groups_0"), val = int32(1)]; tensor var_2736 = conv(dilations = var_2736_dilations_0, groups = var_2736_groups_0, pad = var_2736_pad_0, pad_type = var_2736_pad_type_0, strides = var_2736_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_2729)[name = string("op_2736")]; tensor var_2737 = const()[name = string("op_2737"), val = tensor([1, 32, 1, 64])]; tensor var_2738 = reshape(shape = var_2737, x = var_2736)[name = string("op_2738")]; string var_2745_pad_type_0 = const()[name = string("op_2745_pad_type_0"), val = string("valid")]; tensor var_2745_strides_0 = const()[name = string("op_2745_strides_0"), val = tensor([1, 1])]; tensor var_2745_pad_0 = const()[name = string("op_2745_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2745_dilations_0 = const()[name = string("op_2745_dilations_0"), val = tensor([1, 1])]; int32 var_2745_groups_0 = const()[name = string("op_2745_groups_0"), val = int32(1)]; tensor var_2745 = conv(dilations = var_2745_dilations_0, groups = var_2745_groups_0, pad = var_2745_pad_0, pad_type = var_2745_pad_type_0, strides = var_2745_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_2729)[name = string("op_2745")]; tensor var_2746 = const()[name = string("op_2746"), val = tensor([1, 8, 1, 64])]; tensor var_2747 = reshape(shape = var_2746, x = var_2745)[name = string("op_2747")]; string var_2754_pad_type_0 = const()[name = string("op_2754_pad_type_0"), val = string("valid")]; tensor var_2754_strides_0 = const()[name = string("op_2754_strides_0"), val = tensor([1, 1])]; tensor var_2754_pad_0 = const()[name = string("op_2754_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2754_dilations_0 = const()[name = string("op_2754_dilations_0"), val = tensor([1, 1])]; int32 var_2754_groups_0 = const()[name = string("op_2754_groups_0"), val = int32(1)]; tensor var_2754 = conv(dilations = var_2754_dilations_0, groups = var_2754_groups_0, pad = var_2754_pad_0, pad_type = var_2754_pad_type_0, strides = var_2754_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_2729)[name = string("op_2754")]; tensor var_2755 = const()[name = string("op_2755"), val = tensor([1, 8, 1, 64])]; tensor var_2756 = reshape(shape = var_2755, x = var_2754)[name = string("op_2756")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_2738)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_2738)[name = string("x2_49")]; tensor var_2770_cast_fp16 = mul(x = x1_49, y = cos_3_cast_fp16)[name = string("op_2770_cast_fp16")]; tensor var_2771_cast_fp16 = mul(x = x2_49, y = sin_3_cast_fp16)[name = string("op_2771_cast_fp16")]; tensor var_2772_cast_fp16 = sub(x = var_2770_cast_fp16, y = var_2771_cast_fp16)[name = string("op_2772_cast_fp16")]; tensor var_2773_cast_fp16 = mul(x = x2_49, y = cos_3_cast_fp16)[name = string("op_2773_cast_fp16")]; tensor var_2774_cast_fp16 = mul(x = x1_49, y = sin_3_cast_fp16)[name = string("op_2774_cast_fp16")]; tensor var_2775_cast_fp16 = add(x = var_2773_cast_fp16, y = var_2774_cast_fp16)[name = string("op_2775_cast_fp16")]; bool rotated_49_interleave_0 = const()[name = string("rotated_49_interleave_0"), val = bool(false)]; tensor rotated_49_cast_fp16 = concat(axis = var_80, interleave = rotated_49_interleave_0, values = (var_2772_cast_fp16, var_2775_cast_fp16))[name = string("rotated_49_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_2747)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_2747)[name = string("x2_51")]; tensor var_2791_cast_fp16 = mul(x = x1_51, y = cos_3_cast_fp16)[name = string("op_2791_cast_fp16")]; tensor var_2792_cast_fp16 = mul(x = x2_51, y = sin_3_cast_fp16)[name = string("op_2792_cast_fp16")]; tensor var_2793_cast_fp16 = sub(x = var_2791_cast_fp16, y = var_2792_cast_fp16)[name = string("op_2793_cast_fp16")]; tensor var_2794_cast_fp16 = mul(x = x2_51, y = cos_3_cast_fp16)[name = string("op_2794_cast_fp16")]; tensor var_2795_cast_fp16 = mul(x = x1_51, y = sin_3_cast_fp16)[name = string("op_2795_cast_fp16")]; tensor var_2796_cast_fp16 = add(x = var_2794_cast_fp16, y = var_2795_cast_fp16)[name = string("op_2796_cast_fp16")]; bool rotated_51_interleave_0 = const()[name = string("rotated_51_interleave_0"), val = bool(false)]; tensor rotated_51_cast_fp16 = concat(axis = var_80, interleave = rotated_51_interleave_0, values = (var_2793_cast_fp16, var_2796_cast_fp16))[name = string("rotated_51_cast_fp16")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_581, concat_99_values3_0))[name = string("concat_99")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = rotated_51_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([28])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([29])]; int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_581, concat_103_values3_0))[name = string("concat_103")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = var_2756, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_2816_begin_0 = const()[name = string("op_2816_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_2816_end_0 = const()[name = string("op_2816_end_0"), val = tensor([13, 8, 1024, 64])]; tensor var_2816_end_mask_0 = const()[name = string("op_2816_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2816_cast_fp16 = slice_by_index(begin = var_2816_begin_0, end = var_2816_end_0, end_mask = var_2816_end_mask_0, x = coreml_update_state_57)[name = string("op_2816_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_2816_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_2818_begin_0 = const()[name = string("op_2818_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_2818_end_0 = const()[name = string("op_2818_end_0"), val = tensor([29, 8, 1024, 64])]; tensor var_2818_end_mask_0 = const()[name = string("op_2818_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2818_cast_fp16 = slice_by_index(begin = var_2818_begin_0, end = var_2818_end_0, end_mask = var_2818_end_mask_0, x = coreml_update_state_57)[name = string("op_2818_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_2818_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_2827 = const()[name = string("op_2827"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_2827, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_2831 = const()[name = string("op_2831"), val = tensor([1, -1, 1024, 64])]; tensor key_states_51_cast_fp16 = reshape(shape = var_2831, x = x_349_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_2834 = const()[name = string("op_2834"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_2834, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_2838 = const()[name = string("op_2838"), val = tensor([1, -1, 1024, 64])]; tensor value_states_51_cast_fp16 = reshape(shape = var_2838, x = x_355_cast_fp16)[name = string("value_states_51_cast_fp16")]; bool var_2841_transpose_x_1 = const()[name = string("op_2841_transpose_x_1"), val = bool(false)]; bool var_2841_transpose_y_1 = const()[name = string("op_2841_transpose_y_1"), val = bool(true)]; tensor var_2841_cast_fp16 = matmul(transpose_x = var_2841_transpose_x_1, transpose_y = var_2841_transpose_y_1, x = rotated_49_cast_fp16, y = key_states_51_cast_fp16)[name = string("op_2841_cast_fp16")]; fp16 var_2842_to_fp16 = const()[name = string("op_2842_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_49_cast_fp16 = mul(x = var_2841_cast_fp16, y = var_2842_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor x_357_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("x_357_cast_fp16")]; tensor reduce_max_12_axes_0 = const()[name = string("reduce_max_12_axes_0"), val = tensor([-1])]; bool reduce_max_12_keep_dims_0 = const()[name = string("reduce_max_12_keep_dims_0"), val = bool(true)]; tensor reduce_max_12_cast_fp16 = reduce_max(axes = reduce_max_12_axes_0, keep_dims = reduce_max_12_keep_dims_0, x = x_357_cast_fp16)[name = string("reduce_max_12_cast_fp16")]; tensor x_359_cast_fp16 = sub(x = x_357_cast_fp16, y = reduce_max_12_cast_fp16)[name = string("x_359_cast_fp16")]; tensor exp_x_25_cast_fp16 = exp(x = x_359_cast_fp16)[name = string("exp_x_25_cast_fp16")]; tensor var_2853_axes_0 = const()[name = string("op_2853_axes_0"), val = tensor([-1])]; bool var_2853_keep_dims_0 = const()[name = string("op_2853_keep_dims_0"), val = bool(true)]; tensor var_2853_cast_fp16 = reduce_sum(axes = var_2853_axes_0, keep_dims = var_2853_keep_dims_0, x = exp_x_25_cast_fp16)[name = string("op_2853_cast_fp16")]; tensor attn_weights_51_cast_fp16 = real_div(x = exp_x_25_cast_fp16, y = var_2853_cast_fp16)[name = string("attn_weights_51_cast_fp16")]; bool attn_output_73_transpose_x_0 = const()[name = string("attn_output_73_transpose_x_0"), val = bool(false)]; bool attn_output_73_transpose_y_0 = const()[name = string("attn_output_73_transpose_y_0"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = attn_weights_51_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_73_cast_fp16")]; tensor var_2856_perm_0 = const()[name = string("op_2856_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2858 = const()[name = string("op_2858"), val = tensor([1, 1, 2048])]; tensor var_2856_cast_fp16 = transpose(perm = var_2856_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_14")]; tensor input_173_cast_fp16 = reshape(shape = var_2858, x = var_2856_cast_fp16)[name = string("input_173_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513301824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515399040))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_173_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = linear_12_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor mean_51_cast_fp16 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_51_cast_fp16")]; tensor input_175_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_51_cast_fp16)[name = string("input_175_cast_fp16")]; tensor var_2869_axes_0 = const()[name = string("op_2869_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515407296)))]; tensor var_2869_cast_fp16 = layer_norm(axes = var_2869_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_175_cast_fp16)[name = string("op_2869_cast_fp16")]; tensor var_2876 = const()[name = string("op_2876"), val = tensor([0, 2, 1])]; tensor input_177_axes_0 = const()[name = string("input_177_axes_0"), val = tensor([2])]; tensor var_2877 = transpose(perm = var_2876, x = var_2869_cast_fp16)[name = string("transpose_13")]; tensor input_177 = expand_dims(axes = input_177_axes_0, x = var_2877)[name = string("input_177")]; string input_179_pad_type_0 = const()[name = string("input_179_pad_type_0"), val = string("valid")]; tensor input_179_strides_0 = const()[name = string("input_179_strides_0"), val = tensor([1, 1])]; tensor input_179_pad_0 = const()[name = string("input_179_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_179_dilations_0 = const()[name = string("input_179_dilations_0"), val = tensor([1, 1])]; int32 input_179_groups_0 = const()[name = string("input_179_groups_0"), val = int32(1)]; tensor input_179 = conv(dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_177)[name = string("input_179")]; string up_states_25_pad_type_0 = const()[name = string("up_states_25_pad_type_0"), val = string("valid")]; tensor up_states_25_strides_0 = const()[name = string("up_states_25_strides_0"), val = tensor([1, 1])]; tensor up_states_25_pad_0 = const()[name = string("up_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_25_dilations_0 = const()[name = string("up_states_25_dilations_0"), val = tensor([1, 1])]; int32 up_states_25_groups_0 = const()[name = string("up_states_25_groups_0"), val = int32(1)]; tensor up_states_25 = conv(dilations = up_states_25_dilations_0, groups = up_states_25_groups_0, pad = up_states_25_pad_0, pad_type = up_states_25_pad_type_0, strides = up_states_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_177)[name = string("up_states_25")]; tensor gate_states_25 = silu(x = input_179)[name = string("gate_states_25")]; tensor input_181 = mul(x = gate_states_25, y = up_states_25)[name = string("input_181")]; string hidden_states_103_pad_type_0 = const()[name = string("hidden_states_103_pad_type_0"), val = string("valid")]; tensor hidden_states_103_strides_0 = const()[name = string("hidden_states_103_strides_0"), val = tensor([1, 1])]; tensor hidden_states_103_pad_0 = const()[name = string("hidden_states_103_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_103_dilations_0 = const()[name = string("hidden_states_103_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_103_groups_0 = const()[name = string("hidden_states_103_groups_0"), val = int32(1)]; tensor hidden_states_103 = conv(dilations = hidden_states_103_dilations_0, groups = hidden_states_103_groups_0, pad = hidden_states_103_pad_0, pad_type = hidden_states_103_pad_type_0, strides = hidden_states_103_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_181)[name = string("hidden_states_103")]; tensor var_2899_axes_0 = const()[name = string("op_2899_axes_0"), val = tensor([2])]; tensor var_2899 = squeeze(axes = var_2899_axes_0, x = hidden_states_103)[name = string("op_2899")]; tensor var_2900 = const()[name = string("op_2900"), val = tensor([0, 2, 1])]; tensor var_2901 = transpose(perm = var_2900, x = var_2899)[name = string("transpose_12")]; tensor hidden_states_105_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_2901)[name = string("hidden_states_105_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor mean_53_cast_fp16 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = hidden_states_105_cast_fp16)[name = string("mean_53_cast_fp16")]; tensor input_183_cast_fp16 = sub(x = hidden_states_105_cast_fp16, y = mean_53_cast_fp16)[name = string("input_183_cast_fp16")]; tensor var_2909_axes_0 = const()[name = string("op_2909_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515411456)))]; tensor var_2909_cast_fp16 = layer_norm(axes = var_2909_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_183_cast_fp16)[name = string("op_2909_cast_fp16")]; tensor var_2912 = const()[name = string("op_2912"), val = tensor([0, 2, 1])]; tensor var_2914_axes_0 = const()[name = string("op_2914_axes_0"), val = tensor([2])]; tensor var_2913 = transpose(perm = var_2912, x = var_2909_cast_fp16)[name = string("transpose_11")]; tensor var_2914 = expand_dims(axes = var_2914_axes_0, x = var_2913)[name = string("op_2914")]; string var_2921_pad_type_0 = const()[name = string("op_2921_pad_type_0"), val = string("valid")]; tensor var_2921_strides_0 = const()[name = string("op_2921_strides_0"), val = tensor([1, 1])]; tensor var_2921_pad_0 = const()[name = string("op_2921_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2921_dilations_0 = const()[name = string("op_2921_dilations_0"), val = tensor([1, 1])]; int32 var_2921_groups_0 = const()[name = string("op_2921_groups_0"), val = int32(1)]; tensor var_2921 = conv(dilations = var_2921_dilations_0, groups = var_2921_groups_0, pad = var_2921_pad_0, pad_type = var_2921_pad_type_0, strides = var_2921_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_2914)[name = string("op_2921")]; tensor var_2922 = const()[name = string("op_2922"), val = tensor([1, 32, 1, 64])]; tensor var_2923 = reshape(shape = var_2922, x = var_2921)[name = string("op_2923")]; string var_2930_pad_type_0 = const()[name = string("op_2930_pad_type_0"), val = string("valid")]; tensor var_2930_strides_0 = const()[name = string("op_2930_strides_0"), val = tensor([1, 1])]; tensor var_2930_pad_0 = const()[name = string("op_2930_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2930_dilations_0 = const()[name = string("op_2930_dilations_0"), val = tensor([1, 1])]; int32 var_2930_groups_0 = const()[name = string("op_2930_groups_0"), val = int32(1)]; tensor var_2930 = conv(dilations = var_2930_dilations_0, groups = var_2930_groups_0, pad = var_2930_pad_0, pad_type = var_2930_pad_type_0, strides = var_2930_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_2914)[name = string("op_2930")]; tensor var_2931 = const()[name = string("op_2931"), val = tensor([1, 8, 1, 64])]; tensor var_2932 = reshape(shape = var_2931, x = var_2930)[name = string("op_2932")]; string var_2939_pad_type_0 = const()[name = string("op_2939_pad_type_0"), val = string("valid")]; tensor var_2939_strides_0 = const()[name = string("op_2939_strides_0"), val = tensor([1, 1])]; tensor var_2939_pad_0 = const()[name = string("op_2939_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2939_dilations_0 = const()[name = string("op_2939_dilations_0"), val = tensor([1, 1])]; int32 var_2939_groups_0 = const()[name = string("op_2939_groups_0"), val = int32(1)]; tensor var_2939 = conv(dilations = var_2939_dilations_0, groups = var_2939_groups_0, pad = var_2939_pad_0, pad_type = var_2939_pad_type_0, strides = var_2939_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_2914)[name = string("op_2939")]; tensor var_2940 = const()[name = string("op_2940"), val = tensor([1, 8, 1, 64])]; tensor var_2941 = reshape(shape = var_2940, x = var_2939)[name = string("op_2941")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_2923)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_2923)[name = string("x2_53")]; tensor var_2955_cast_fp16 = mul(x = x1_53, y = cos_3_cast_fp16)[name = string("op_2955_cast_fp16")]; tensor var_2956_cast_fp16 = mul(x = x2_53, y = sin_3_cast_fp16)[name = string("op_2956_cast_fp16")]; tensor var_2957_cast_fp16 = sub(x = var_2955_cast_fp16, y = var_2956_cast_fp16)[name = string("op_2957_cast_fp16")]; tensor var_2958_cast_fp16 = mul(x = x2_53, y = cos_3_cast_fp16)[name = string("op_2958_cast_fp16")]; tensor var_2959_cast_fp16 = mul(x = x1_53, y = sin_3_cast_fp16)[name = string("op_2959_cast_fp16")]; tensor var_2960_cast_fp16 = add(x = var_2958_cast_fp16, y = var_2959_cast_fp16)[name = string("op_2960_cast_fp16")]; bool rotated_53_interleave_0 = const()[name = string("rotated_53_interleave_0"), val = bool(false)]; tensor rotated_53_cast_fp16 = concat(axis = var_80, interleave = rotated_53_interleave_0, values = (var_2957_cast_fp16, var_2960_cast_fp16))[name = string("rotated_53_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_2932)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_2932)[name = string("x2_55")]; tensor var_2976_cast_fp16 = mul(x = x1_55, y = cos_3_cast_fp16)[name = string("op_2976_cast_fp16")]; tensor var_2977_cast_fp16 = mul(x = x2_55, y = sin_3_cast_fp16)[name = string("op_2977_cast_fp16")]; tensor var_2978_cast_fp16 = sub(x = var_2976_cast_fp16, y = var_2977_cast_fp16)[name = string("op_2978_cast_fp16")]; tensor var_2979_cast_fp16 = mul(x = x2_55, y = cos_3_cast_fp16)[name = string("op_2979_cast_fp16")]; tensor var_2980_cast_fp16 = mul(x = x1_55, y = sin_3_cast_fp16)[name = string("op_2980_cast_fp16")]; tensor var_2981_cast_fp16 = add(x = var_2979_cast_fp16, y = var_2980_cast_fp16)[name = string("op_2981_cast_fp16")]; bool rotated_55_interleave_0 = const()[name = string("rotated_55_interleave_0"), val = bool(false)]; tensor rotated_55_cast_fp16 = concat(axis = var_80, interleave = rotated_55_interleave_0, values = (var_2978_cast_fp16, var_2981_cast_fp16))[name = string("rotated_55_cast_fp16")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_581, concat_107_values3_0))[name = string("concat_107")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = rotated_55_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([29])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([30])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_581, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = var_2941, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_3001_begin_0 = const()[name = string("op_3001_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3001_end_0 = const()[name = string("op_3001_end_0"), val = tensor([14, 8, 1024, 64])]; tensor var_3001_end_mask_0 = const()[name = string("op_3001_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3001_cast_fp16 = slice_by_index(begin = var_3001_begin_0, end = var_3001_end_0, end_mask = var_3001_end_mask_0, x = coreml_update_state_59)[name = string("op_3001_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_3001_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_3003_begin_0 = const()[name = string("op_3003_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_3003_end_0 = const()[name = string("op_3003_end_0"), val = tensor([30, 8, 1024, 64])]; tensor var_3003_end_mask_0 = const()[name = string("op_3003_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3003_cast_fp16 = slice_by_index(begin = var_3003_begin_0, end = var_3003_end_0, end_mask = var_3003_end_mask_0, x = coreml_update_state_59)[name = string("op_3003_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_3003_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_375_axes_0 = const()[name = string("x_375_axes_0"), val = tensor([1])]; tensor x_375_cast_fp16 = expand_dims(axes = x_375_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_375_cast_fp16")]; tensor var_3012 = const()[name = string("op_3012"), val = tensor([1, 4, 1, 1])]; tensor x_377_cast_fp16 = tile(reps = var_3012, x = x_375_cast_fp16)[name = string("x_377_cast_fp16")]; tensor var_3016 = const()[name = string("op_3016"), val = tensor([1, -1, 1024, 64])]; tensor key_states_55_cast_fp16 = reshape(shape = var_3016, x = x_377_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor x_381_axes_0 = const()[name = string("x_381_axes_0"), val = tensor([1])]; tensor x_381_cast_fp16 = expand_dims(axes = x_381_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_381_cast_fp16")]; tensor var_3019 = const()[name = string("op_3019"), val = tensor([1, 4, 1, 1])]; tensor x_383_cast_fp16 = tile(reps = var_3019, x = x_381_cast_fp16)[name = string("x_383_cast_fp16")]; tensor var_3023 = const()[name = string("op_3023"), val = tensor([1, -1, 1024, 64])]; tensor value_states_55_cast_fp16 = reshape(shape = var_3023, x = x_383_cast_fp16)[name = string("value_states_55_cast_fp16")]; bool var_3026_transpose_x_1 = const()[name = string("op_3026_transpose_x_1"), val = bool(false)]; bool var_3026_transpose_y_1 = const()[name = string("op_3026_transpose_y_1"), val = bool(true)]; tensor var_3026_cast_fp16 = matmul(transpose_x = var_3026_transpose_x_1, transpose_y = var_3026_transpose_y_1, x = rotated_53_cast_fp16, y = key_states_55_cast_fp16)[name = string("op_3026_cast_fp16")]; fp16 var_3027_to_fp16 = const()[name = string("op_3027_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_53_cast_fp16 = mul(x = var_3026_cast_fp16, y = var_3027_to_fp16)[name = string("attn_weights_53_cast_fp16")]; tensor x_385_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask)[name = string("x_385_cast_fp16")]; tensor reduce_max_13_axes_0 = const()[name = string("reduce_max_13_axes_0"), val = tensor([-1])]; bool reduce_max_13_keep_dims_0 = const()[name = string("reduce_max_13_keep_dims_0"), val = bool(true)]; tensor reduce_max_13_cast_fp16 = reduce_max(axes = reduce_max_13_axes_0, keep_dims = reduce_max_13_keep_dims_0, x = x_385_cast_fp16)[name = string("reduce_max_13_cast_fp16")]; tensor x_387_cast_fp16 = sub(x = x_385_cast_fp16, y = reduce_max_13_cast_fp16)[name = string("x_387_cast_fp16")]; tensor exp_x_27_cast_fp16 = exp(x = x_387_cast_fp16)[name = string("exp_x_27_cast_fp16")]; tensor var_3038_axes_0 = const()[name = string("op_3038_axes_0"), val = tensor([-1])]; bool var_3038_keep_dims_0 = const()[name = string("op_3038_keep_dims_0"), val = bool(true)]; tensor var_3038_cast_fp16 = reduce_sum(axes = var_3038_axes_0, keep_dims = var_3038_keep_dims_0, x = exp_x_27_cast_fp16)[name = string("op_3038_cast_fp16")]; tensor attn_weights_55_cast_fp16 = real_div(x = exp_x_27_cast_fp16, y = var_3038_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool attn_output_79_transpose_x_0 = const()[name = string("attn_output_79_transpose_x_0"), val = bool(false)]; bool attn_output_79_transpose_y_0 = const()[name = string("attn_output_79_transpose_y_0"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_0, transpose_y = attn_output_79_transpose_y_0, x = attn_weights_55_cast_fp16, y = value_states_55_cast_fp16)[name = string("attn_output_79_cast_fp16")]; tensor var_3041_perm_0 = const()[name = string("op_3041_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3043 = const()[name = string("op_3043"), val = tensor([1, 1, 2048])]; tensor var_3041_cast_fp16 = transpose(perm = var_3041_perm_0, x = attn_output_79_cast_fp16)[name = string("transpose_10")]; tensor input_187_cast_fp16 = reshape(shape = var_3043, x = var_3041_cast_fp16)[name = string("input_187_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515415616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517512832))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_187_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor hidden_states_109_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_109_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_189_cast_fp16 = sub(x = hidden_states_109_cast_fp16, y = mean_55_cast_fp16)[name = string("input_189_cast_fp16")]; tensor var_3054_axes_0 = const()[name = string("op_3054_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517521088)))]; tensor var_3054_cast_fp16 = layer_norm(axes = var_3054_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_189_cast_fp16)[name = string("op_3054_cast_fp16")]; tensor var_3061 = const()[name = string("op_3061"), val = tensor([0, 2, 1])]; tensor input_191_axes_0 = const()[name = string("input_191_axes_0"), val = tensor([2])]; tensor var_3062 = transpose(perm = var_3061, x = var_3054_cast_fp16)[name = string("transpose_9")]; tensor input_191 = expand_dims(axes = input_191_axes_0, x = var_3062)[name = string("input_191")]; string input_193_pad_type_0 = const()[name = string("input_193_pad_type_0"), val = string("valid")]; tensor input_193_strides_0 = const()[name = string("input_193_strides_0"), val = tensor([1, 1])]; tensor input_193_pad_0 = const()[name = string("input_193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_193_dilations_0 = const()[name = string("input_193_dilations_0"), val = tensor([1, 1])]; int32 input_193_groups_0 = const()[name = string("input_193_groups_0"), val = int32(1)]; tensor input_193 = conv(dilations = input_193_dilations_0, groups = input_193_groups_0, pad = input_193_pad_0, pad_type = input_193_pad_type_0, strides = input_193_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_191)[name = string("input_193")]; string up_states_27_pad_type_0 = const()[name = string("up_states_27_pad_type_0"), val = string("valid")]; tensor up_states_27_strides_0 = const()[name = string("up_states_27_strides_0"), val = tensor([1, 1])]; tensor up_states_27_pad_0 = const()[name = string("up_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_27_dilations_0 = const()[name = string("up_states_27_dilations_0"), val = tensor([1, 1])]; int32 up_states_27_groups_0 = const()[name = string("up_states_27_groups_0"), val = int32(1)]; tensor up_states_27 = conv(dilations = up_states_27_dilations_0, groups = up_states_27_groups_0, pad = up_states_27_pad_0, pad_type = up_states_27_pad_type_0, strides = up_states_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_191)[name = string("up_states_27")]; tensor gate_states_27 = silu(x = input_193)[name = string("gate_states_27")]; tensor input_195 = mul(x = gate_states_27, y = up_states_27)[name = string("input_195")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor hidden_states_111 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_195)[name = string("hidden_states_111")]; tensor var_3084_axes_0 = const()[name = string("op_3084_axes_0"), val = tensor([2])]; tensor var_3084 = squeeze(axes = var_3084_axes_0, x = hidden_states_111)[name = string("op_3084")]; tensor var_3085 = const()[name = string("op_3085"), val = tensor([0, 2, 1])]; tensor var_3086 = transpose(perm = var_3085, x = var_3084)[name = string("transpose_8")]; tensor hidden_states_113_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = var_3086)[name = string("hidden_states_113_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_113_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_197_cast_fp16 = sub(x = hidden_states_113_cast_fp16, y = mean_57_cast_fp16)[name = string("input_197_cast_fp16")]; tensor var_3094_axes_0 = const()[name = string("op_3094_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517525248)))]; tensor var_3094_cast_fp16 = layer_norm(axes = var_3094_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_197_cast_fp16)[name = string("op_3094_cast_fp16")]; tensor var_3097 = const()[name = string("op_3097"), val = tensor([0, 2, 1])]; tensor var_3099_axes_0 = const()[name = string("op_3099_axes_0"), val = tensor([2])]; tensor var_3098 = transpose(perm = var_3097, x = var_3094_cast_fp16)[name = string("transpose_7")]; tensor var_3099 = expand_dims(axes = var_3099_axes_0, x = var_3098)[name = string("op_3099")]; string var_3106_pad_type_0 = const()[name = string("op_3106_pad_type_0"), val = string("valid")]; tensor var_3106_strides_0 = const()[name = string("op_3106_strides_0"), val = tensor([1, 1])]; tensor var_3106_pad_0 = const()[name = string("op_3106_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3106_dilations_0 = const()[name = string("op_3106_dilations_0"), val = tensor([1, 1])]; int32 var_3106_groups_0 = const()[name = string("op_3106_groups_0"), val = int32(1)]; tensor var_3106 = conv(dilations = var_3106_dilations_0, groups = var_3106_groups_0, pad = var_3106_pad_0, pad_type = var_3106_pad_type_0, strides = var_3106_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_3099)[name = string("op_3106")]; tensor var_3107 = const()[name = string("op_3107"), val = tensor([1, 32, 1, 64])]; tensor var_3108 = reshape(shape = var_3107, x = var_3106)[name = string("op_3108")]; string var_3115_pad_type_0 = const()[name = string("op_3115_pad_type_0"), val = string("valid")]; tensor var_3115_strides_0 = const()[name = string("op_3115_strides_0"), val = tensor([1, 1])]; tensor var_3115_pad_0 = const()[name = string("op_3115_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3115_dilations_0 = const()[name = string("op_3115_dilations_0"), val = tensor([1, 1])]; int32 var_3115_groups_0 = const()[name = string("op_3115_groups_0"), val = int32(1)]; tensor var_3115 = conv(dilations = var_3115_dilations_0, groups = var_3115_groups_0, pad = var_3115_pad_0, pad_type = var_3115_pad_type_0, strides = var_3115_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_3099)[name = string("op_3115")]; tensor var_3116 = const()[name = string("op_3116"), val = tensor([1, 8, 1, 64])]; tensor var_3117 = reshape(shape = var_3116, x = var_3115)[name = string("op_3117")]; string var_3124_pad_type_0 = const()[name = string("op_3124_pad_type_0"), val = string("valid")]; tensor var_3124_strides_0 = const()[name = string("op_3124_strides_0"), val = tensor([1, 1])]; tensor var_3124_pad_0 = const()[name = string("op_3124_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3124_dilations_0 = const()[name = string("op_3124_dilations_0"), val = tensor([1, 1])]; int32 var_3124_groups_0 = const()[name = string("op_3124_groups_0"), val = int32(1)]; tensor var_3124 = conv(dilations = var_3124_dilations_0, groups = var_3124_groups_0, pad = var_3124_pad_0, pad_type = var_3124_pad_type_0, strides = var_3124_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_3099)[name = string("op_3124")]; tensor var_3125 = const()[name = string("op_3125"), val = tensor([1, 8, 1, 64])]; tensor var_3126 = reshape(shape = var_3125, x = var_3124)[name = string("op_3126")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_3108)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_3108)[name = string("x2_57")]; tensor var_3140_cast_fp16 = mul(x = x1_57, y = cos_3_cast_fp16)[name = string("op_3140_cast_fp16")]; tensor var_3141_cast_fp16 = mul(x = x2_57, y = sin_3_cast_fp16)[name = string("op_3141_cast_fp16")]; tensor var_3142_cast_fp16 = sub(x = var_3140_cast_fp16, y = var_3141_cast_fp16)[name = string("op_3142_cast_fp16")]; tensor var_3143_cast_fp16 = mul(x = x2_57, y = cos_3_cast_fp16)[name = string("op_3143_cast_fp16")]; tensor var_3144_cast_fp16 = mul(x = x1_57, y = sin_3_cast_fp16)[name = string("op_3144_cast_fp16")]; tensor var_3145_cast_fp16 = add(x = var_3143_cast_fp16, y = var_3144_cast_fp16)[name = string("op_3145_cast_fp16")]; bool rotated_57_interleave_0 = const()[name = string("rotated_57_interleave_0"), val = bool(false)]; tensor rotated_57_cast_fp16 = concat(axis = var_80, interleave = rotated_57_interleave_0, values = (var_3142_cast_fp16, var_3145_cast_fp16))[name = string("rotated_57_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_3117)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_3117)[name = string("x2_59")]; tensor var_3161_cast_fp16 = mul(x = x1_59, y = cos_3_cast_fp16)[name = string("op_3161_cast_fp16")]; tensor var_3162_cast_fp16 = mul(x = x2_59, y = sin_3_cast_fp16)[name = string("op_3162_cast_fp16")]; tensor var_3163_cast_fp16 = sub(x = var_3161_cast_fp16, y = var_3162_cast_fp16)[name = string("op_3163_cast_fp16")]; tensor var_3164_cast_fp16 = mul(x = x2_59, y = cos_3_cast_fp16)[name = string("op_3164_cast_fp16")]; tensor var_3165_cast_fp16 = mul(x = x1_59, y = sin_3_cast_fp16)[name = string("op_3165_cast_fp16")]; tensor var_3166_cast_fp16 = add(x = var_3164_cast_fp16, y = var_3165_cast_fp16)[name = string("op_3166_cast_fp16")]; bool rotated_59_interleave_0 = const()[name = string("rotated_59_interleave_0"), val = bool(false)]; tensor rotated_59_cast_fp16 = concat(axis = var_80, interleave = rotated_59_interleave_0, values = (var_3163_cast_fp16, var_3166_cast_fp16))[name = string("rotated_59_cast_fp16")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_581, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = rotated_59_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([30])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([31])]; int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_581, concat_119_values3_0))[name = string("concat_119")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = var_3126, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_3186_begin_0 = const()[name = string("op_3186_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_3186_end_0 = const()[name = string("op_3186_end_0"), val = tensor([15, 8, 1024, 64])]; tensor var_3186_end_mask_0 = const()[name = string("op_3186_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3186_cast_fp16 = slice_by_index(begin = var_3186_begin_0, end = var_3186_end_0, end_mask = var_3186_end_mask_0, x = coreml_update_state_61)[name = string("op_3186_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_3186_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_3188_begin_0 = const()[name = string("op_3188_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_3188_end_0 = const()[name = string("op_3188_end_0"), val = tensor([31, 8, 1024, 64])]; tensor var_3188_end_mask_0 = const()[name = string("op_3188_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3188_cast_fp16 = slice_by_index(begin = var_3188_begin_0, end = var_3188_end_0, end_mask = var_3188_end_mask_0, x = coreml_update_state_61)[name = string("op_3188_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_3188_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_403_axes_0 = const()[name = string("x_403_axes_0"), val = tensor([1])]; tensor x_403_cast_fp16 = expand_dims(axes = x_403_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_403_cast_fp16")]; tensor var_3197 = const()[name = string("op_3197"), val = tensor([1, 4, 1, 1])]; tensor x_405_cast_fp16 = tile(reps = var_3197, x = x_403_cast_fp16)[name = string("x_405_cast_fp16")]; tensor var_3201 = const()[name = string("op_3201"), val = tensor([1, -1, 1024, 64])]; tensor key_states_59_cast_fp16 = reshape(shape = var_3201, x = x_405_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor x_409_axes_0 = const()[name = string("x_409_axes_0"), val = tensor([1])]; tensor x_409_cast_fp16 = expand_dims(axes = x_409_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_3204 = const()[name = string("op_3204"), val = tensor([1, 4, 1, 1])]; tensor x_411_cast_fp16 = tile(reps = var_3204, x = x_409_cast_fp16)[name = string("x_411_cast_fp16")]; tensor var_3208 = const()[name = string("op_3208"), val = tensor([1, -1, 1024, 64])]; tensor value_states_59_cast_fp16 = reshape(shape = var_3208, x = x_411_cast_fp16)[name = string("value_states_59_cast_fp16")]; bool var_3211_transpose_x_1 = const()[name = string("op_3211_transpose_x_1"), val = bool(false)]; bool var_3211_transpose_y_1 = const()[name = string("op_3211_transpose_y_1"), val = bool(true)]; tensor var_3211_cast_fp16 = matmul(transpose_x = var_3211_transpose_x_1, transpose_y = var_3211_transpose_y_1, x = rotated_57_cast_fp16, y = key_states_59_cast_fp16)[name = string("op_3211_cast_fp16")]; fp16 var_3212_to_fp16 = const()[name = string("op_3212_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_57_cast_fp16 = mul(x = var_3211_cast_fp16, y = var_3212_to_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor x_413_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask)[name = string("x_413_cast_fp16")]; tensor reduce_max_14_axes_0 = const()[name = string("reduce_max_14_axes_0"), val = tensor([-1])]; bool reduce_max_14_keep_dims_0 = const()[name = string("reduce_max_14_keep_dims_0"), val = bool(true)]; tensor reduce_max_14_cast_fp16 = reduce_max(axes = reduce_max_14_axes_0, keep_dims = reduce_max_14_keep_dims_0, x = x_413_cast_fp16)[name = string("reduce_max_14_cast_fp16")]; tensor x_415_cast_fp16 = sub(x = x_413_cast_fp16, y = reduce_max_14_cast_fp16)[name = string("x_415_cast_fp16")]; tensor exp_x_29_cast_fp16 = exp(x = x_415_cast_fp16)[name = string("exp_x_29_cast_fp16")]; tensor var_3223_axes_0 = const()[name = string("op_3223_axes_0"), val = tensor([-1])]; bool var_3223_keep_dims_0 = const()[name = string("op_3223_keep_dims_0"), val = bool(true)]; tensor var_3223_cast_fp16 = reduce_sum(axes = var_3223_axes_0, keep_dims = var_3223_keep_dims_0, x = exp_x_29_cast_fp16)[name = string("op_3223_cast_fp16")]; tensor attn_weights_59_cast_fp16 = real_div(x = exp_x_29_cast_fp16, y = var_3223_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; bool attn_output_85_transpose_x_0 = const()[name = string("attn_output_85_transpose_x_0"), val = bool(false)]; bool attn_output_85_transpose_y_0 = const()[name = string("attn_output_85_transpose_y_0"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_0, transpose_y = attn_output_85_transpose_y_0, x = attn_weights_59_cast_fp16, y = value_states_59_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_3226_perm_0 = const()[name = string("op_3226_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3228 = const()[name = string("op_3228"), val = tensor([1, 1, 2048])]; tensor var_3226_cast_fp16 = transpose(perm = var_3226_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_6")]; tensor input_201_cast_fp16 = reshape(shape = var_3228, x = var_3226_cast_fp16)[name = string("input_201_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517529408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519626624))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor hidden_states_117_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = linear_14_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor mean_59_cast_fp16 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = hidden_states_117_cast_fp16)[name = string("mean_59_cast_fp16")]; tensor input_203_cast_fp16 = sub(x = hidden_states_117_cast_fp16, y = mean_59_cast_fp16)[name = string("input_203_cast_fp16")]; tensor var_3239_axes_0 = const()[name = string("op_3239_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519634880)))]; tensor var_3239_cast_fp16 = layer_norm(axes = var_3239_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_203_cast_fp16)[name = string("op_3239_cast_fp16")]; tensor var_3246 = const()[name = string("op_3246"), val = tensor([0, 2, 1])]; tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; tensor var_3247 = transpose(perm = var_3246, x = var_3239_cast_fp16)[name = string("transpose_5")]; tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3247)[name = string("input_205")]; string input_207_pad_type_0 = const()[name = string("input_207_pad_type_0"), val = string("valid")]; tensor input_207_strides_0 = const()[name = string("input_207_strides_0"), val = tensor([1, 1])]; tensor input_207_pad_0 = const()[name = string("input_207_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_207_dilations_0 = const()[name = string("input_207_dilations_0"), val = tensor([1, 1])]; int32 input_207_groups_0 = const()[name = string("input_207_groups_0"), val = int32(1)]; tensor input_207 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_205)[name = string("input_207")]; string up_states_29_pad_type_0 = const()[name = string("up_states_29_pad_type_0"), val = string("valid")]; tensor up_states_29_strides_0 = const()[name = string("up_states_29_strides_0"), val = tensor([1, 1])]; tensor up_states_29_pad_0 = const()[name = string("up_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_29_dilations_0 = const()[name = string("up_states_29_dilations_0"), val = tensor([1, 1])]; int32 up_states_29_groups_0 = const()[name = string("up_states_29_groups_0"), val = int32(1)]; tensor up_states_29 = conv(dilations = up_states_29_dilations_0, groups = up_states_29_groups_0, pad = up_states_29_pad_0, pad_type = up_states_29_pad_type_0, strides = up_states_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_205)[name = string("up_states_29")]; tensor gate_states_29 = silu(x = input_207)[name = string("gate_states_29")]; tensor input_209 = mul(x = gate_states_29, y = up_states_29)[name = string("input_209")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor hidden_states_119 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_209)[name = string("hidden_states_119")]; tensor var_3269_axes_0 = const()[name = string("op_3269_axes_0"), val = tensor([2])]; tensor var_3269 = squeeze(axes = var_3269_axes_0, x = hidden_states_119)[name = string("op_3269")]; tensor var_3270 = const()[name = string("op_3270"), val = tensor([0, 2, 1])]; tensor var_3271 = transpose(perm = var_3270, x = var_3269)[name = string("transpose_4")]; tensor hidden_states_121_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = var_3271)[name = string("hidden_states_121_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor mean_61_cast_fp16 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = hidden_states_121_cast_fp16)[name = string("mean_61_cast_fp16")]; tensor input_211_cast_fp16 = sub(x = hidden_states_121_cast_fp16, y = mean_61_cast_fp16)[name = string("input_211_cast_fp16")]; tensor var_3279_axes_0 = const()[name = string("op_3279_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519639040)))]; tensor var_3279_cast_fp16 = layer_norm(axes = var_3279_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_211_cast_fp16)[name = string("op_3279_cast_fp16")]; tensor var_3282 = const()[name = string("op_3282"), val = tensor([0, 2, 1])]; tensor var_3284_axes_0 = const()[name = string("op_3284_axes_0"), val = tensor([2])]; tensor var_3283 = transpose(perm = var_3282, x = var_3279_cast_fp16)[name = string("transpose_3")]; tensor var_3284 = expand_dims(axes = var_3284_axes_0, x = var_3283)[name = string("op_3284")]; string var_3291_pad_type_0 = const()[name = string("op_3291_pad_type_0"), val = string("valid")]; tensor var_3291_strides_0 = const()[name = string("op_3291_strides_0"), val = tensor([1, 1])]; tensor var_3291_pad_0 = const()[name = string("op_3291_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3291_dilations_0 = const()[name = string("op_3291_dilations_0"), val = tensor([1, 1])]; int32 var_3291_groups_0 = const()[name = string("op_3291_groups_0"), val = int32(1)]; tensor var_3291 = conv(dilations = var_3291_dilations_0, groups = var_3291_groups_0, pad = var_3291_pad_0, pad_type = var_3291_pad_type_0, strides = var_3291_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_3284)[name = string("op_3291")]; tensor var_3292 = const()[name = string("op_3292"), val = tensor([1, 32, 1, 64])]; tensor var_3293 = reshape(shape = var_3292, x = var_3291)[name = string("op_3293")]; string var_3300_pad_type_0 = const()[name = string("op_3300_pad_type_0"), val = string("valid")]; tensor var_3300_strides_0 = const()[name = string("op_3300_strides_0"), val = tensor([1, 1])]; tensor var_3300_pad_0 = const()[name = string("op_3300_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3300_dilations_0 = const()[name = string("op_3300_dilations_0"), val = tensor([1, 1])]; int32 var_3300_groups_0 = const()[name = string("op_3300_groups_0"), val = int32(1)]; tensor var_3300 = conv(dilations = var_3300_dilations_0, groups = var_3300_groups_0, pad = var_3300_pad_0, pad_type = var_3300_pad_type_0, strides = var_3300_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_3284)[name = string("op_3300")]; tensor var_3301 = const()[name = string("op_3301"), val = tensor([1, 8, 1, 64])]; tensor var_3302 = reshape(shape = var_3301, x = var_3300)[name = string("op_3302")]; string var_3309_pad_type_0 = const()[name = string("op_3309_pad_type_0"), val = string("valid")]; tensor var_3309_strides_0 = const()[name = string("op_3309_strides_0"), val = tensor([1, 1])]; tensor var_3309_pad_0 = const()[name = string("op_3309_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3309_dilations_0 = const()[name = string("op_3309_dilations_0"), val = tensor([1, 1])]; int32 var_3309_groups_0 = const()[name = string("op_3309_groups_0"), val = int32(1)]; tensor var_3309 = conv(dilations = var_3309_dilations_0, groups = var_3309_groups_0, pad = var_3309_pad_0, pad_type = var_3309_pad_type_0, strides = var_3309_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_3284)[name = string("op_3309")]; tensor var_3310 = const()[name = string("op_3310"), val = tensor([1, 8, 1, 64])]; tensor var_3311 = reshape(shape = var_3310, x = var_3309)[name = string("op_3311")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_3293)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_3293)[name = string("x2_61")]; tensor var_3325_cast_fp16 = mul(x = x1_61, y = cos_3_cast_fp16)[name = string("op_3325_cast_fp16")]; tensor var_3326_cast_fp16 = mul(x = x2_61, y = sin_3_cast_fp16)[name = string("op_3326_cast_fp16")]; tensor var_3327_cast_fp16 = sub(x = var_3325_cast_fp16, y = var_3326_cast_fp16)[name = string("op_3327_cast_fp16")]; tensor var_3328_cast_fp16 = mul(x = x2_61, y = cos_3_cast_fp16)[name = string("op_3328_cast_fp16")]; tensor var_3329_cast_fp16 = mul(x = x1_61, y = sin_3_cast_fp16)[name = string("op_3329_cast_fp16")]; tensor var_3330_cast_fp16 = add(x = var_3328_cast_fp16, y = var_3329_cast_fp16)[name = string("op_3330_cast_fp16")]; bool rotated_61_interleave_0 = const()[name = string("rotated_61_interleave_0"), val = bool(false)]; tensor rotated_61_cast_fp16 = concat(axis = var_80, interleave = rotated_61_interleave_0, values = (var_3327_cast_fp16, var_3330_cast_fp16))[name = string("rotated_61_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_3302)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_3302)[name = string("x2")]; tensor var_3346_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_3346_cast_fp16")]; tensor var_3347_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_3347_cast_fp16")]; tensor var_3348_cast_fp16 = sub(x = var_3346_cast_fp16, y = var_3347_cast_fp16)[name = string("op_3348_cast_fp16")]; tensor var_3349_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_3349_cast_fp16")]; tensor var_3350_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_3350_cast_fp16")]; tensor var_3351_cast_fp16 = add(x = var_3349_cast_fp16, y = var_3350_cast_fp16)[name = string("op_3351_cast_fp16")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated_cast_fp16 = concat(axis = var_80, interleave = rotated_interleave_0, values = (var_3348_cast_fp16, var_3351_cast_fp16))[name = string("rotated_cast_fp16")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_581, concat_123_values3_0))[name = string("concat_123")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = rotated_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([31])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([32])]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_581, concat_127_values3_0))[name = string("concat_127")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = var_3311, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_3371_begin_0 = const()[name = string("op_3371_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_3371_end_0 = const()[name = string("op_3371_end_0"), val = tensor([16, 8, 1024, 64])]; tensor var_3371_end_mask_0 = const()[name = string("op_3371_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3371_cast_fp16 = slice_by_index(begin = var_3371_begin_0, end = var_3371_end_0, end_mask = var_3371_end_mask_0, x = coreml_update_state_63)[name = string("op_3371_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_3371_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_3373_begin_0 = const()[name = string("op_3373_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_3373_end_0 = const()[name = string("op_3373_end_0"), val = tensor([1, 8, 1024, 64])]; tensor var_3373_end_mask_0 = const()[name = string("op_3373_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3373_cast_fp16 = slice_by_index(begin = var_3373_begin_0, end = var_3373_end_0, end_mask = var_3373_end_mask_0, x = coreml_update_state_63)[name = string("op_3373_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_3373_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_431_axes_0 = const()[name = string("x_431_axes_0"), val = tensor([1])]; tensor x_431_cast_fp16 = expand_dims(axes = x_431_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_431_cast_fp16")]; tensor var_3382 = const()[name = string("op_3382"), val = tensor([1, 4, 1, 1])]; tensor x_433_cast_fp16 = tile(reps = var_3382, x = x_431_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_3386 = const()[name = string("op_3386"), val = tensor([1, -1, 1024, 64])]; tensor key_states_cast_fp16 = reshape(shape = var_3386, x = x_433_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_437_axes_0 = const()[name = string("x_437_axes_0"), val = tensor([1])]; tensor x_437_cast_fp16 = expand_dims(axes = x_437_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_437_cast_fp16")]; tensor var_3389 = const()[name = string("op_3389"), val = tensor([1, 4, 1, 1])]; tensor x_439_cast_fp16 = tile(reps = var_3389, x = x_437_cast_fp16)[name = string("x_439_cast_fp16")]; tensor var_3393 = const()[name = string("op_3393"), val = tensor([1, -1, 1024, 64])]; tensor value_states_cast_fp16 = reshape(shape = var_3393, x = x_439_cast_fp16)[name = string("value_states_cast_fp16")]; bool var_3396_transpose_x_1 = const()[name = string("op_3396_transpose_x_1"), val = bool(false)]; bool var_3396_transpose_y_1 = const()[name = string("op_3396_transpose_y_1"), val = bool(true)]; tensor var_3396_cast_fp16 = matmul(transpose_x = var_3396_transpose_x_1, transpose_y = var_3396_transpose_y_1, x = rotated_61_cast_fp16, y = key_states_cast_fp16)[name = string("op_3396_cast_fp16")]; fp16 var_3397_to_fp16 = const()[name = string("op_3397_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_61_cast_fp16 = mul(x = var_3396_cast_fp16, y = var_3397_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor x_441_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("x_441_cast_fp16")]; tensor reduce_max_15_axes_0 = const()[name = string("reduce_max_15_axes_0"), val = tensor([-1])]; bool reduce_max_15_keep_dims_0 = const()[name = string("reduce_max_15_keep_dims_0"), val = bool(true)]; tensor reduce_max_15_cast_fp16 = reduce_max(axes = reduce_max_15_axes_0, keep_dims = reduce_max_15_keep_dims_0, x = x_441_cast_fp16)[name = string("reduce_max_15_cast_fp16")]; tensor x_443_cast_fp16 = sub(x = x_441_cast_fp16, y = reduce_max_15_cast_fp16)[name = string("x_443_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_443_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_3408_axes_0 = const()[name = string("op_3408_axes_0"), val = tensor([-1])]; bool var_3408_keep_dims_0 = const()[name = string("op_3408_keep_dims_0"), val = bool(true)]; tensor var_3408_cast_fp16 = reduce_sum(axes = var_3408_axes_0, keep_dims = var_3408_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_3408_cast_fp16")]; tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_3408_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_91_cast_fp16")]; tensor var_3411_perm_0 = const()[name = string("op_3411_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3413 = const()[name = string("op_3413"), val = tensor([1, 1, 2048])]; tensor var_3411_cast_fp16 = transpose(perm = var_3411_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_2")]; tensor input_215_cast_fp16 = reshape(shape = var_3413, x = var_3411_cast_fp16)[name = string("input_215_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519643200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521740416))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor hidden_states_125_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = linear_15_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_125_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_125_cast_fp16, y = mean_63_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_3424_axes_0 = const()[name = string("op_3424_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521748672)))]; tensor var_3424_cast_fp16 = layer_norm(axes = var_3424_axes_0, epsilon = var_75_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_3424_cast_fp16")]; tensor var_3431 = const()[name = string("op_3431"), val = tensor([0, 2, 1])]; tensor input_219_axes_0 = const()[name = string("input_219_axes_0"), val = tensor([2])]; tensor var_3432 = transpose(perm = var_3431, x = var_3424_cast_fp16)[name = string("transpose_1")]; tensor input_219 = expand_dims(axes = input_219_axes_0, x = var_3432)[name = string("input_219")]; string input_221_pad_type_0 = const()[name = string("input_221_pad_type_0"), val = string("valid")]; tensor input_221_strides_0 = const()[name = string("input_221_strides_0"), val = tensor([1, 1])]; tensor input_221_pad_0 = const()[name = string("input_221_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_221_dilations_0 = const()[name = string("input_221_dilations_0"), val = tensor([1, 1])]; int32 input_221_groups_0 = const()[name = string("input_221_groups_0"), val = int32(1)]; tensor input_221 = conv(dilations = input_221_dilations_0, groups = input_221_groups_0, pad = input_221_pad_0, pad_type = input_221_pad_type_0, strides = input_221_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_219)[name = string("input_221")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_219)[name = string("up_states")]; tensor gate_states = silu(x = input_221)[name = string("gate_states")]; tensor input_223 = mul(x = gate_states, y = up_states)[name = string("input_223")]; string hidden_states_127_pad_type_0 = const()[name = string("hidden_states_127_pad_type_0"), val = string("valid")]; tensor hidden_states_127_strides_0 = const()[name = string("hidden_states_127_strides_0"), val = tensor([1, 1])]; tensor hidden_states_127_pad_0 = const()[name = string("hidden_states_127_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_127_dilations_0 = const()[name = string("hidden_states_127_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_127_groups_0 = const()[name = string("hidden_states_127_groups_0"), val = int32(1)]; tensor hidden_states_127 = conv(dilations = hidden_states_127_dilations_0, groups = hidden_states_127_groups_0, pad = hidden_states_127_pad_0, pad_type = hidden_states_127_pad_type_0, strides = hidden_states_127_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_223)[name = string("hidden_states_127")]; tensor var_3454_axes_0 = const()[name = string("op_3454_axes_0"), val = tensor([2])]; tensor var_3454 = squeeze(axes = var_3454_axes_0, x = hidden_states_127)[name = string("op_3454")]; tensor var_3455 = const()[name = string("op_3455"), val = tensor([0, 2, 1])]; tensor var_3456 = transpose(perm = var_3455, x = var_3454)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_125_cast_fp16, y = var_3456)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_3464_axes_0 = const()[name = string("op_3464_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521752832)))]; tensor output_hidden_states = layer_norm(axes = var_3464_axes_0, epsilon = var_75_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_3464_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521756992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524902784))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524935616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525722112))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525730368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526516864))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526525120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539108096))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539239232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551822208))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551953344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564536320))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564569152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567714944))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567747776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568534272))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568542528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569329024))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569337280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581920256))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582051392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594634368))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594765504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607348480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607381312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610527104))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610559936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(611346432))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(611354688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612141184))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612149440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624732416))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624863552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(637446528))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(637577664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650160640))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650193472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653339264))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653372096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(654158592))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(654166848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(654953344))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(654961600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667544576))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667675712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(680258688))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(680389824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692972800))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693005632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696151424))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696184256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696970752))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696979008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697765504))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697773760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710356736))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710487872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723070848))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723201984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735784960))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735817792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738963584))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738996416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739782912))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739791168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740577664))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740585920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(753168896))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(753300032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765883008))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(766014144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778597120))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778629952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781775744))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781808576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(782595072))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(782603328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783389824))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783398080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795981056))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(796112192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(808695168))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(808826304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821409280))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821442112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824587904))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824620736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825407232))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825415488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826201984))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826210240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(838793216))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(838924352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851507328))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851638464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(864221440))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(864254272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(867400064))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(867432896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(868219392))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(868227648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(869014144))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(869022400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881605376))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881736512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(894319488))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(894450624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907033600))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907066432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910212224))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910245056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(911031552))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(911039808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(911826304))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(911834560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(924417536))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(924548672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937131648))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937262784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949845760))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949878592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953024384))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953057216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953843712))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953851968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(954638464))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(954646720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967229696))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967360832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(979943808))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(980074944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(992657920))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(992690752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(995836544))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(995869376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(996655872))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(996664128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997450624))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997458880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010041856))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010172992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022755968))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022887104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1035470080))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1035502912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1038648704))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1038681536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039468032))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039476288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1040262784))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1040271040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052854016))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052985152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065568128))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065699264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078282240))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078315072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1081460864))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1081493696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1082280192))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1082288448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1083074944))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1083083200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1095666176))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1095797312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108380288))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108511424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1121094400))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1121127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1124273024))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1124305856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125092352))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125100608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125887104))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125895360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1138478336))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1138609472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151192448))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151323584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1163906560))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1163939392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1167085184))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1167118016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1167904512))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1167912768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168699264))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; int32 var_75 = const()[name = string("op_75"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_486_axis_0 = const()[name = string("op_486_axis_0"), val = int32(1)]; int32 var_486_batch_dims_0 = const()[name = string("op_486_batch_dims_0"), val = int32(0)]; bool var_486_validate_indices_0 = const()[name = string("op_486_validate_indices_0"), val = bool(false)]; tensor var_86_to_fp16 = const()[name = string("op_86_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471150720)))]; tensor var_486_cast_fp16 = gather(axis = var_486_axis_0, batch_dims = var_486_batch_dims_0, indices = select_0, validate_indices = var_486_validate_indices_0, x = var_86_to_fp16)[name = string("op_486_cast_fp16")]; tensor var_487 = const()[name = string("op_487"), val = tensor([1, 64, 1, 64])]; tensor cos_1_cast_fp16 = reshape(shape = var_487, x = var_486_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_491_axis_0 = const()[name = string("op_491_axis_0"), val = int32(1)]; int32 var_491_batch_dims_0 = const()[name = string("op_491_batch_dims_0"), val = int32(0)]; bool var_491_validate_indices_0 = const()[name = string("op_491_validate_indices_0"), val = bool(false)]; tensor var_81_to_fp16 = const()[name = string("op_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454373440)))]; tensor var_491_cast_fp16 = gather(axis = var_491_axis_0, batch_dims = var_491_batch_dims_0, indices = select_0, validate_indices = var_491_validate_indices_0, x = var_81_to_fp16)[name = string("op_491_cast_fp16")]; tensor var_492 = const()[name = string("op_492"), val = tensor([1, 64, 1, 64])]; tensor sin_1_cast_fp16 = reshape(shape = var_492, x = var_491_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_502_axes_0 = const()[name = string("op_502_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(487928000)))]; fp16 var_77_to_fp16 = const()[name = string("op_77_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_502_cast_fp16 = layer_norm(axes = var_502_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_502_cast_fp16")]; tensor var_506 = const()[name = string("op_506"), val = tensor([0, 2, 1])]; tensor var_508_axes_0 = const()[name = string("op_508_axes_0"), val = tensor([2])]; tensor var_507 = transpose(perm = var_506, x = var_502_cast_fp16)[name = string("transpose_111")]; tensor var_508 = expand_dims(axes = var_508_axes_0, x = var_507)[name = string("op_508")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_508)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_508)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_508)[name = string("value_states_1")]; tensor var_528 = const()[name = string("op_528"), val = tensor([1, 32, 64, 64])]; tensor var_529 = reshape(shape = var_528, x = query_states_1)[name = string("op_529")]; tensor var_530 = const()[name = string("op_530"), val = tensor([0, 1, 3, 2])]; tensor var_532 = const()[name = string("op_532"), val = tensor([1, 8, 64, 64])]; tensor var_533 = reshape(shape = var_532, x = key_states_1)[name = string("op_533")]; tensor var_534 = const()[name = string("op_534"), val = tensor([0, 1, 3, 2])]; tensor var_536 = const()[name = string("op_536"), val = tensor([1, 8, 64, 64])]; tensor var_537 = reshape(shape = var_536, x = value_states_1)[name = string("op_537")]; tensor var_538 = const()[name = string("op_538"), val = tensor([0, 1, 3, 2])]; tensor var_540 = const()[name = string("op_540"), val = tensor([0, 2, 1, 3])]; tensor var_542 = const()[name = string("op_542"), val = tensor([0, 2, 1, 3])]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_1 = transpose(perm = var_530, x = var_529)[name = string("transpose_110")]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_5 = transpose(perm = var_540, x = cos_1_cast_fp16)[name = string("transpose_109")]; tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_5 = transpose(perm = var_542, x = sin_1_cast_fp16)[name = string("transpose_108")]; tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; tensor var_556 = mul(x = x1_1, y = cos_7)[name = string("op_556")]; tensor var_557 = mul(x = x2_1, y = sin_7)[name = string("op_557")]; tensor var_558 = sub(x = var_556, y = var_557)[name = string("op_558")]; tensor var_559 = mul(x = x2_1, y = cos_7)[name = string("op_559")]; tensor var_560 = mul(x = x1_1, y = sin_7)[name = string("op_560")]; tensor var_561 = add(x = var_559, y = var_560)[name = string("op_561")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1 = concat(axis = var_75, interleave = rotated_1_interleave_0, values = (var_558, var_561))[name = string("rotated_1")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_5 = transpose(perm = var_534, x = var_533)[name = string("transpose_107")]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; tensor var_577 = mul(x = x1_3, y = cos_7)[name = string("op_577")]; tensor var_578 = mul(x = x2_3, y = sin_7)[name = string("op_578")]; tensor var_579 = sub(x = var_577, y = var_578)[name = string("op_579")]; tensor var_580 = mul(x = x2_3, y = cos_7)[name = string("op_580")]; tensor var_581 = mul(x = x1_3, y = sin_7)[name = string("op_581")]; tensor var_582 = add(x = var_580, y = var_581)[name = string("op_582")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3 = concat(axis = var_75, interleave = rotated_3_interleave_0, values = (var_579, var_582))[name = string("rotated_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_591 = add(x = current_pos, y = seq_length_1)[name = string("op_591")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_591, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_591, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_538, x = var_537)[name = string("transpose_106")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; tensor var_605_begin_0 = const()[name = string("op_605_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_605_end_0 = const()[name = string("op_605_end_0"), val = tensor([1, 8, 1024, 64])]; tensor var_605_end_mask_0 = const()[name = string("op_605_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_605_cast_fp16 = slice_by_index(begin = var_605_begin_0, end = var_605_end_0, end_mask = var_605_end_mask_0, x = coreml_update_state_33)[name = string("op_605_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_605_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_607_begin_0 = const()[name = string("op_607_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_607_end_0 = const()[name = string("op_607_end_0"), val = tensor([17, 8, 1024, 64])]; tensor var_607_end_mask_0 = const()[name = string("op_607_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_607_cast_fp16 = slice_by_index(begin = var_607_begin_0, end = var_607_end_0, end_mask = var_607_end_mask_0, x = coreml_update_state_33)[name = string("op_607_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_607_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_616 = const()[name = string("op_616"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_616, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_620 = const()[name = string("op_620"), val = tensor([1, -1, 1024, 64])]; tensor var_621_cast_fp16 = reshape(shape = var_620, x = x_13_cast_fp16)[name = string("op_621_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_623 = const()[name = string("op_623"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_623, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; bool var_630_transpose_x_0 = const()[name = string("op_630_transpose_x_0"), val = bool(false)]; bool var_630_transpose_y_0 = const()[name = string("op_630_transpose_y_0"), val = bool(true)]; tensor var_630_cast_fp16 = matmul(transpose_x = var_630_transpose_x_0, transpose_y = var_630_transpose_y_0, x = rotated_1, y = var_621_cast_fp16)[name = string("op_630_cast_fp16")]; fp16 var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_642_axes_0 = const()[name = string("op_642_axes_0"), val = tensor([-1])]; bool var_642_keep_dims_0 = const()[name = string("op_642_keep_dims_0"), val = bool(true)]; tensor var_642_cast_fp16 = reduce_sum(axes = var_642_axes_0, keep_dims = var_642_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_642_cast_fp16")]; tensor var_643_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_642_cast_fp16)[name = string("op_643_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 1024])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_643_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1024, 64])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 64])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_646_perm_0 = const()[name = string("op_646_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_648 = const()[name = string("op_648"), val = tensor([1, 64, 2048])]; tensor var_646_cast_fp16 = transpose(perm = var_646_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_105")]; tensor input_5_cast_fp16 = reshape(shape = var_648, x = var_646_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168707520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1171853312))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490037632)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_659_axes_0 = const()[name = string("op_659_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490041792)))]; tensor var_659_cast_fp16 = layer_norm(axes = var_659_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_659_cast_fp16")]; tensor var_666 = const()[name = string("op_666"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_667 = transpose(perm = var_666, x = var_659_cast_fp16)[name = string("transpose_104")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_667)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_689_axes_0 = const()[name = string("op_689_axes_0"), val = tensor([2])]; tensor var_689 = squeeze(axes = var_689_axes_0, x = hidden_states_7)[name = string("op_689")]; tensor var_690 = const()[name = string("op_690"), val = tensor([0, 2, 1])]; tensor var_691 = transpose(perm = var_690, x = var_689)[name = string("transpose_103")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_691)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_699_axes_0 = const()[name = string("op_699_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490045952)))]; tensor var_699_cast_fp16 = layer_norm(axes = var_699_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_699_cast_fp16")]; tensor var_703 = const()[name = string("op_703"), val = tensor([0, 2, 1])]; tensor var_705_axes_0 = const()[name = string("op_705_axes_0"), val = tensor([2])]; tensor var_704 = transpose(perm = var_703, x = var_699_cast_fp16)[name = string("transpose_102")]; tensor var_705 = expand_dims(axes = var_705_axes_0, x = var_704)[name = string("op_705")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_705)[name = string("query_states_5")]; string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_705)[name = string("key_states_7")]; string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_705)[name = string("value_states_7")]; tensor var_725 = const()[name = string("op_725"), val = tensor([1, 32, 64, 64])]; tensor var_726 = reshape(shape = var_725, x = query_states_5)[name = string("op_726")]; tensor var_727 = const()[name = string("op_727"), val = tensor([0, 1, 3, 2])]; tensor var_729 = const()[name = string("op_729"), val = tensor([1, 8, 64, 64])]; tensor var_730 = reshape(shape = var_729, x = key_states_7)[name = string("op_730")]; tensor var_731 = const()[name = string("op_731"), val = tensor([0, 1, 3, 2])]; tensor var_733 = const()[name = string("op_733"), val = tensor([1, 8, 64, 64])]; tensor var_734 = reshape(shape = var_733, x = value_states_7)[name = string("op_734")]; tensor var_735 = const()[name = string("op_735"), val = tensor([0, 1, 3, 2])]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_29 = transpose(perm = var_727, x = var_726)[name = string("transpose_101")]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; tensor var_753 = mul(x = x1_5, y = cos_7)[name = string("op_753")]; tensor var_754 = mul(x = x2_5, y = sin_7)[name = string("op_754")]; tensor var_755 = sub(x = var_753, y = var_754)[name = string("op_755")]; tensor var_756 = mul(x = x2_5, y = cos_7)[name = string("op_756")]; tensor var_757 = mul(x = x1_5, y = sin_7)[name = string("op_757")]; tensor var_758 = add(x = var_756, y = var_757)[name = string("op_758")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5 = concat(axis = var_75, interleave = rotated_5_interleave_0, values = (var_755, var_758))[name = string("rotated_5")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_33 = transpose(perm = var_731, x = var_730)[name = string("transpose_100")]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; tensor var_774 = mul(x = x1_7, y = cos_7)[name = string("op_774")]; tensor var_775 = mul(x = x2_7, y = sin_7)[name = string("op_775")]; tensor var_776 = sub(x = var_774, y = var_775)[name = string("op_776")]; tensor var_777 = mul(x = x2_7, y = cos_7)[name = string("op_777")]; tensor var_778 = mul(x = x1_7, y = sin_7)[name = string("op_778")]; tensor var_779 = add(x = var_777, y = var_778)[name = string("op_779")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7 = concat(axis = var_75, interleave = rotated_7_interleave_0, values = (var_776, var_779))[name = string("rotated_7")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_591, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_591, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_9 = transpose(perm = var_735, x = var_734)[name = string("transpose_99")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; tensor var_802_begin_0 = const()[name = string("op_802_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_802_end_0 = const()[name = string("op_802_end_0"), val = tensor([2, 8, 1024, 64])]; tensor var_802_end_mask_0 = const()[name = string("op_802_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_802_cast_fp16 = slice_by_index(begin = var_802_begin_0, end = var_802_end_0, end_mask = var_802_end_mask_0, x = coreml_update_state_35)[name = string("op_802_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_802_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_804_begin_0 = const()[name = string("op_804_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_804_end_0 = const()[name = string("op_804_end_0"), val = tensor([18, 8, 1024, 64])]; tensor var_804_end_mask_0 = const()[name = string("op_804_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_804_cast_fp16 = slice_by_index(begin = var_804_begin_0, end = var_804_end_0, end_mask = var_804_end_mask_0, x = coreml_update_state_35)[name = string("op_804_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_804_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_813 = const()[name = string("op_813"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_813, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_817 = const()[name = string("op_817"), val = tensor([1, -1, 1024, 64])]; tensor var_818_cast_fp16 = reshape(shape = var_817, x = x_41_cast_fp16)[name = string("op_818_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_820 = const()[name = string("op_820"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_820, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; bool var_827_transpose_x_0 = const()[name = string("op_827_transpose_x_0"), val = bool(false)]; bool var_827_transpose_y_0 = const()[name = string("op_827_transpose_y_0"), val = bool(true)]; tensor var_827_cast_fp16 = matmul(transpose_x = var_827_transpose_x_0, transpose_y = var_827_transpose_y_0, x = rotated_5, y = var_818_cast_fp16)[name = string("op_827_cast_fp16")]; fp16 var_828_to_fp16 = const()[name = string("op_828_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_3_cast_fp16 = mul(x = var_827_cast_fp16, y = var_828_to_fp16)[name = string("attn_weights_3_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor([-1])]; bool var_839_keep_dims_0 = const()[name = string("op_839_keep_dims_0"), val = bool(true)]; tensor var_839_cast_fp16 = reduce_sum(axes = var_839_axes_0, keep_dims = var_839_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_839_cast_fp16")]; tensor var_840_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_839_cast_fp16)[name = string("op_840_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 1024])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_840_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1024, 64])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 64])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_843_perm_0 = const()[name = string("op_843_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_845 = const()[name = string("op_845"), val = tensor([1, 64, 2048])]; tensor var_843_cast_fp16 = transpose(perm = var_843_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_98")]; tensor input_19_cast_fp16 = reshape(shape = var_845, x = var_843_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1171886144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1175031936))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_856_axes_0 = const()[name = string("op_856_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492155584)))]; tensor var_856_cast_fp16 = layer_norm(axes = var_856_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_856_cast_fp16")]; tensor var_863 = const()[name = string("op_863"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_864 = transpose(perm = var_863, x = var_856_cast_fp16)[name = string("transpose_97")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_864)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_886_axes_0 = const()[name = string("op_886_axes_0"), val = tensor([2])]; tensor var_886 = squeeze(axes = var_886_axes_0, x = hidden_states_15)[name = string("op_886")]; tensor var_887 = const()[name = string("op_887"), val = tensor([0, 2, 1])]; tensor var_888 = transpose(perm = var_887, x = var_886)[name = string("transpose_96")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_888)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_896_axes_0 = const()[name = string("op_896_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492159744)))]; tensor var_896_cast_fp16 = layer_norm(axes = var_896_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_896_cast_fp16")]; tensor var_900 = const()[name = string("op_900"), val = tensor([0, 2, 1])]; tensor var_902_axes_0 = const()[name = string("op_902_axes_0"), val = tensor([2])]; tensor var_901 = transpose(perm = var_900, x = var_896_cast_fp16)[name = string("transpose_95")]; tensor var_902 = expand_dims(axes = var_902_axes_0, x = var_901)[name = string("op_902")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_902)[name = string("query_states_9")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_902)[name = string("key_states_13")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_902)[name = string("value_states_13")]; tensor var_922 = const()[name = string("op_922"), val = tensor([1, 32, 64, 64])]; tensor var_923 = reshape(shape = var_922, x = query_states_9)[name = string("op_923")]; tensor var_924 = const()[name = string("op_924"), val = tensor([0, 1, 3, 2])]; tensor var_926 = const()[name = string("op_926"), val = tensor([1, 8, 64, 64])]; tensor var_927 = reshape(shape = var_926, x = key_states_13)[name = string("op_927")]; tensor var_928 = const()[name = string("op_928"), val = tensor([0, 1, 3, 2])]; tensor var_930 = const()[name = string("op_930"), val = tensor([1, 8, 64, 64])]; tensor var_931 = reshape(shape = var_930, x = value_states_13)[name = string("op_931")]; tensor var_932 = const()[name = string("op_932"), val = tensor([0, 1, 3, 2])]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_57 = transpose(perm = var_924, x = var_923)[name = string("transpose_94")]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; tensor var_950 = mul(x = x1_9, y = cos_7)[name = string("op_950")]; tensor var_951 = mul(x = x2_9, y = sin_7)[name = string("op_951")]; tensor var_952 = sub(x = var_950, y = var_951)[name = string("op_952")]; tensor var_953 = mul(x = x2_9, y = cos_7)[name = string("op_953")]; tensor var_954 = mul(x = x1_9, y = sin_7)[name = string("op_954")]; tensor var_955 = add(x = var_953, y = var_954)[name = string("op_955")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9 = concat(axis = var_75, interleave = rotated_9_interleave_0, values = (var_952, var_955))[name = string("rotated_9")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_61 = transpose(perm = var_928, x = var_927)[name = string("transpose_93")]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; tensor var_971 = mul(x = x1_11, y = cos_7)[name = string("op_971")]; tensor var_972 = mul(x = x2_11, y = sin_7)[name = string("op_972")]; tensor var_973 = sub(x = var_971, y = var_972)[name = string("op_973")]; tensor var_974 = mul(x = x2_11, y = cos_7)[name = string("op_974")]; tensor var_975 = mul(x = x1_11, y = sin_7)[name = string("op_975")]; tensor var_976 = add(x = var_974, y = var_975)[name = string("op_976")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11 = concat(axis = var_75, interleave = rotated_11_interleave_0, values = (var_973, var_976))[name = string("rotated_11")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_591, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_35)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_36")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_591, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15 = transpose(perm = var_932, x = var_931)[name = string("transpose_92")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_37")]; tensor var_999_begin_0 = const()[name = string("op_999_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_999_end_0 = const()[name = string("op_999_end_0"), val = tensor([3, 8, 1024, 64])]; tensor var_999_end_mask_0 = const()[name = string("op_999_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_999_cast_fp16 = slice_by_index(begin = var_999_begin_0, end = var_999_end_0, end_mask = var_999_end_mask_0, x = coreml_update_state_37)[name = string("op_999_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_999_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_1001_begin_0 = const()[name = string("op_1001_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_1001_end_0 = const()[name = string("op_1001_end_0"), val = tensor([19, 8, 1024, 64])]; tensor var_1001_end_mask_0 = const()[name = string("op_1001_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1001_cast_fp16 = slice_by_index(begin = var_1001_begin_0, end = var_1001_end_0, end_mask = var_1001_end_mask_0, x = coreml_update_state_37)[name = string("op_1001_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_1001_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_1010 = const()[name = string("op_1010"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_1010, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([1, -1, 1024, 64])]; tensor var_1015_cast_fp16 = reshape(shape = var_1014, x = x_69_cast_fp16)[name = string("op_1015_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_1017 = const()[name = string("op_1017"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_1017, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_1024_transpose_x_0 = const()[name = string("op_1024_transpose_x_0"), val = bool(false)]; bool var_1024_transpose_y_0 = const()[name = string("op_1024_transpose_y_0"), val = bool(true)]; tensor var_1024_cast_fp16 = matmul(transpose_x = var_1024_transpose_x_0, transpose_y = var_1024_transpose_y_0, x = rotated_9, y = var_1015_cast_fp16)[name = string("op_1024_cast_fp16")]; fp16 var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_1024_cast_fp16, y = var_1025_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_1036_axes_0 = const()[name = string("op_1036_axes_0"), val = tensor([-1])]; bool var_1036_keep_dims_0 = const()[name = string("op_1036_keep_dims_0"), val = bool(true)]; tensor var_1036_cast_fp16 = reduce_sum(axes = var_1036_axes_0, keep_dims = var_1036_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_1036_cast_fp16")]; tensor var_1037_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_1036_cast_fp16)[name = string("op_1037_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 1024])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_1037_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1024, 64])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 64])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_1040_perm_0 = const()[name = string("op_1040_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1042 = const()[name = string("op_1042"), val = tensor([1, 64, 2048])]; tensor var_1040_cast_fp16 = transpose(perm = var_1040_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_91")]; tensor input_33_cast_fp16 = reshape(shape = var_1042, x = var_1040_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1175064768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1178210560))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_1053_axes_0 = const()[name = string("op_1053_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494269376)))]; tensor var_1053_cast_fp16 = layer_norm(axes = var_1053_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_1053_cast_fp16")]; tensor var_1060 = const()[name = string("op_1060"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_1061 = transpose(perm = var_1060, x = var_1053_cast_fp16)[name = string("transpose_90")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1061)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_1083_axes_0 = const()[name = string("op_1083_axes_0"), val = tensor([2])]; tensor var_1083 = squeeze(axes = var_1083_axes_0, x = hidden_states_23)[name = string("op_1083")]; tensor var_1084 = const()[name = string("op_1084"), val = tensor([0, 2, 1])]; tensor var_1085 = transpose(perm = var_1084, x = var_1083)[name = string("transpose_89")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_1085)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_1093_axes_0 = const()[name = string("op_1093_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494273536)))]; tensor var_1093_cast_fp16 = layer_norm(axes = var_1093_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_1093_cast_fp16")]; tensor var_1097 = const()[name = string("op_1097"), val = tensor([0, 2, 1])]; tensor var_1099_axes_0 = const()[name = string("op_1099_axes_0"), val = tensor([2])]; tensor var_1098 = transpose(perm = var_1097, x = var_1093_cast_fp16)[name = string("transpose_88")]; tensor var_1099 = expand_dims(axes = var_1099_axes_0, x = var_1098)[name = string("op_1099")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_1099)[name = string("query_states_13")]; string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_1099)[name = string("key_states_19")]; string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_1099)[name = string("value_states_19")]; tensor var_1119 = const()[name = string("op_1119"), val = tensor([1, 32, 64, 64])]; tensor var_1120 = reshape(shape = var_1119, x = query_states_13)[name = string("op_1120")]; tensor var_1121 = const()[name = string("op_1121"), val = tensor([0, 1, 3, 2])]; tensor var_1123 = const()[name = string("op_1123"), val = tensor([1, 8, 64, 64])]; tensor var_1124 = reshape(shape = var_1123, x = key_states_19)[name = string("op_1124")]; tensor var_1125 = const()[name = string("op_1125"), val = tensor([0, 1, 3, 2])]; tensor var_1127 = const()[name = string("op_1127"), val = tensor([1, 8, 64, 64])]; tensor var_1128 = reshape(shape = var_1127, x = value_states_19)[name = string("op_1128")]; tensor var_1129 = const()[name = string("op_1129"), val = tensor([0, 1, 3, 2])]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_85 = transpose(perm = var_1121, x = var_1120)[name = string("transpose_87")]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; tensor var_1147 = mul(x = x1_13, y = cos_7)[name = string("op_1147")]; tensor var_1148 = mul(x = x2_13, y = sin_7)[name = string("op_1148")]; tensor var_1149 = sub(x = var_1147, y = var_1148)[name = string("op_1149")]; tensor var_1150 = mul(x = x2_13, y = cos_7)[name = string("op_1150")]; tensor var_1151 = mul(x = x1_13, y = sin_7)[name = string("op_1151")]; tensor var_1152 = add(x = var_1150, y = var_1151)[name = string("op_1152")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13 = concat(axis = var_75, interleave = rotated_13_interleave_0, values = (var_1149, var_1152))[name = string("rotated_13")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_89 = transpose(perm = var_1125, x = var_1124)[name = string("transpose_86")]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; tensor var_1168 = mul(x = x1_15, y = cos_7)[name = string("op_1168")]; tensor var_1169 = mul(x = x2_15, y = sin_7)[name = string("op_1169")]; tensor var_1170 = sub(x = var_1168, y = var_1169)[name = string("op_1170")]; tensor var_1171 = mul(x = x2_15, y = cos_7)[name = string("op_1171")]; tensor var_1172 = mul(x = x1_15, y = sin_7)[name = string("op_1172")]; tensor var_1173 = add(x = var_1171, y = var_1172)[name = string("op_1173")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15 = concat(axis = var_75, interleave = rotated_15_interleave_0, values = (var_1170, var_1173))[name = string("rotated_15")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_591, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_38")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_591, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_21 = transpose(perm = var_1129, x = var_1128)[name = string("transpose_85")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_39")]; tensor var_1196_begin_0 = const()[name = string("op_1196_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1196_end_0 = const()[name = string("op_1196_end_0"), val = tensor([4, 8, 1024, 64])]; tensor var_1196_end_mask_0 = const()[name = string("op_1196_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = coreml_update_state_39)[name = string("op_1196_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_1196_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_1198_begin_0 = const()[name = string("op_1198_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1198_end_0 = const()[name = string("op_1198_end_0"), val = tensor([20, 8, 1024, 64])]; tensor var_1198_end_mask_0 = const()[name = string("op_1198_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = coreml_update_state_39)[name = string("op_1198_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_1198_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_1207 = const()[name = string("op_1207"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_1207, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1211 = const()[name = string("op_1211"), val = tensor([1, -1, 1024, 64])]; tensor var_1212_cast_fp16 = reshape(shape = var_1211, x = x_97_cast_fp16)[name = string("op_1212_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_1214 = const()[name = string("op_1214"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_1214, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; bool var_1221_transpose_x_0 = const()[name = string("op_1221_transpose_x_0"), val = bool(false)]; bool var_1221_transpose_y_0 = const()[name = string("op_1221_transpose_y_0"), val = bool(true)]; tensor var_1221_cast_fp16 = matmul(transpose_x = var_1221_transpose_x_0, transpose_y = var_1221_transpose_y_0, x = rotated_13, y = var_1212_cast_fp16)[name = string("op_1221_cast_fp16")]; fp16 var_1222_to_fp16 = const()[name = string("op_1222_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_7_cast_fp16 = mul(x = var_1221_cast_fp16, y = var_1222_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_1233_axes_0 = const()[name = string("op_1233_axes_0"), val = tensor([-1])]; bool var_1233_keep_dims_0 = const()[name = string("op_1233_keep_dims_0"), val = bool(true)]; tensor var_1233_cast_fp16 = reduce_sum(axes = var_1233_axes_0, keep_dims = var_1233_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1233_cast_fp16")]; tensor var_1234_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1233_cast_fp16)[name = string("op_1234_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 1024])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1234_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1024, 64])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 64])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_1237_perm_0 = const()[name = string("op_1237_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1239 = const()[name = string("op_1239"), val = tensor([1, 64, 2048])]; tensor var_1237_cast_fp16 = transpose(perm = var_1237_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_84")]; tensor input_47_cast_fp16 = reshape(shape = var_1239, x = var_1237_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1178243392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181389184))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_1250_axes_0 = const()[name = string("op_1250_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496383168)))]; tensor var_1250_cast_fp16 = layer_norm(axes = var_1250_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1250_cast_fp16")]; tensor var_1257 = const()[name = string("op_1257"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_1258 = transpose(perm = var_1257, x = var_1250_cast_fp16)[name = string("transpose_83")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1258)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1280_axes_0 = const()[name = string("op_1280_axes_0"), val = tensor([2])]; tensor var_1280 = squeeze(axes = var_1280_axes_0, x = hidden_states_31)[name = string("op_1280")]; tensor var_1281 = const()[name = string("op_1281"), val = tensor([0, 2, 1])]; tensor var_1282 = transpose(perm = var_1281, x = var_1280)[name = string("transpose_82")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1282)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1290_axes_0 = const()[name = string("op_1290_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496387328)))]; tensor var_1290_cast_fp16 = layer_norm(axes = var_1290_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1290_cast_fp16")]; tensor var_1294 = const()[name = string("op_1294"), val = tensor([0, 2, 1])]; tensor var_1296_axes_0 = const()[name = string("op_1296_axes_0"), val = tensor([2])]; tensor var_1295 = transpose(perm = var_1294, x = var_1290_cast_fp16)[name = string("transpose_81")]; tensor var_1296 = expand_dims(axes = var_1296_axes_0, x = var_1295)[name = string("op_1296")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1296)[name = string("query_states_17")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1296)[name = string("key_states_25")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1296)[name = string("value_states_25")]; tensor var_1316 = const()[name = string("op_1316"), val = tensor([1, 32, 64, 64])]; tensor var_1317 = reshape(shape = var_1316, x = query_states_17)[name = string("op_1317")]; tensor var_1318 = const()[name = string("op_1318"), val = tensor([0, 1, 3, 2])]; tensor var_1320 = const()[name = string("op_1320"), val = tensor([1, 8, 64, 64])]; tensor var_1321 = reshape(shape = var_1320, x = key_states_25)[name = string("op_1321")]; tensor var_1322 = const()[name = string("op_1322"), val = tensor([0, 1, 3, 2])]; tensor var_1324 = const()[name = string("op_1324"), val = tensor([1, 8, 64, 64])]; tensor var_1325 = reshape(shape = var_1324, x = value_states_25)[name = string("op_1325")]; tensor var_1326 = const()[name = string("op_1326"), val = tensor([0, 1, 3, 2])]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_113 = transpose(perm = var_1318, x = var_1317)[name = string("transpose_80")]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; tensor var_1344 = mul(x = x1_17, y = cos_7)[name = string("op_1344")]; tensor var_1345 = mul(x = x2_17, y = sin_7)[name = string("op_1345")]; tensor var_1346 = sub(x = var_1344, y = var_1345)[name = string("op_1346")]; tensor var_1347 = mul(x = x2_17, y = cos_7)[name = string("op_1347")]; tensor var_1348 = mul(x = x1_17, y = sin_7)[name = string("op_1348")]; tensor var_1349 = add(x = var_1347, y = var_1348)[name = string("op_1349")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17 = concat(axis = var_75, interleave = rotated_17_interleave_0, values = (var_1346, var_1349))[name = string("rotated_17")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_117 = transpose(perm = var_1322, x = var_1321)[name = string("transpose_79")]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; tensor var_1365 = mul(x = x1_19, y = cos_7)[name = string("op_1365")]; tensor var_1366 = mul(x = x2_19, y = sin_7)[name = string("op_1366")]; tensor var_1367 = sub(x = var_1365, y = var_1366)[name = string("op_1367")]; tensor var_1368 = mul(x = x2_19, y = cos_7)[name = string("op_1368")]; tensor var_1369 = mul(x = x1_19, y = sin_7)[name = string("op_1369")]; tensor var_1370 = add(x = var_1368, y = var_1369)[name = string("op_1370")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19 = concat(axis = var_75, interleave = rotated_19_interleave_0, values = (var_1367, var_1370))[name = string("rotated_19")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_591, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_40")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_591, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_1326, x = var_1325)[name = string("transpose_78")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_41")]; tensor var_1393_begin_0 = const()[name = string("op_1393_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1393_end_0 = const()[name = string("op_1393_end_0"), val = tensor([5, 8, 1024, 64])]; tensor var_1393_end_mask_0 = const()[name = string("op_1393_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1393_cast_fp16 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, x = coreml_update_state_41)[name = string("op_1393_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1393_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1395_begin_0 = const()[name = string("op_1395_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_1395_end_0 = const()[name = string("op_1395_end_0"), val = tensor([21, 8, 1024, 64])]; tensor var_1395_end_mask_0 = const()[name = string("op_1395_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1395_cast_fp16 = slice_by_index(begin = var_1395_begin_0, end = var_1395_end_0, end_mask = var_1395_end_mask_0, x = coreml_update_state_41)[name = string("op_1395_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1395_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1404 = const()[name = string("op_1404"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1404, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1408 = const()[name = string("op_1408"), val = tensor([1, -1, 1024, 64])]; tensor var_1409_cast_fp16 = reshape(shape = var_1408, x = x_125_cast_fp16)[name = string("op_1409_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1411 = const()[name = string("op_1411"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1411, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; bool var_1418_transpose_x_0 = const()[name = string("op_1418_transpose_x_0"), val = bool(false)]; bool var_1418_transpose_y_0 = const()[name = string("op_1418_transpose_y_0"), val = bool(true)]; tensor var_1418_cast_fp16 = matmul(transpose_x = var_1418_transpose_x_0, transpose_y = var_1418_transpose_y_0, x = rotated_17, y = var_1409_cast_fp16)[name = string("op_1418_cast_fp16")]; fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_1418_cast_fp16, y = var_1419_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1430_axes_0 = const()[name = string("op_1430_axes_0"), val = tensor([-1])]; bool var_1430_keep_dims_0 = const()[name = string("op_1430_keep_dims_0"), val = bool(true)]; tensor var_1430_cast_fp16 = reduce_sum(axes = var_1430_axes_0, keep_dims = var_1430_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1430_cast_fp16")]; tensor var_1431_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1430_cast_fp16)[name = string("op_1431_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 1024])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1431_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 1024, 64])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 64])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_1434_perm_0 = const()[name = string("op_1434_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1436 = const()[name = string("op_1436"), val = tensor([1, 64, 2048])]; tensor var_1434_cast_fp16 = transpose(perm = var_1434_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_77")]; tensor input_61_cast_fp16 = reshape(shape = var_1436, x = var_1434_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181422016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1184567808))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1447_axes_0 = const()[name = string("op_1447_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498496960)))]; tensor var_1447_cast_fp16 = layer_norm(axes = var_1447_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1447_cast_fp16")]; tensor var_1454 = const()[name = string("op_1454"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1455 = transpose(perm = var_1454, x = var_1447_cast_fp16)[name = string("transpose_76")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1455)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1477_axes_0 = const()[name = string("op_1477_axes_0"), val = tensor([2])]; tensor var_1477 = squeeze(axes = var_1477_axes_0, x = hidden_states_39)[name = string("op_1477")]; tensor var_1478 = const()[name = string("op_1478"), val = tensor([0, 2, 1])]; tensor var_1479 = transpose(perm = var_1478, x = var_1477)[name = string("transpose_75")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1479)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1487_axes_0 = const()[name = string("op_1487_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498501120)))]; tensor var_1487_cast_fp16 = layer_norm(axes = var_1487_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1487_cast_fp16")]; tensor var_1491 = const()[name = string("op_1491"), val = tensor([0, 2, 1])]; tensor var_1493_axes_0 = const()[name = string("op_1493_axes_0"), val = tensor([2])]; tensor var_1492 = transpose(perm = var_1491, x = var_1487_cast_fp16)[name = string("transpose_74")]; tensor var_1493 = expand_dims(axes = var_1493_axes_0, x = var_1492)[name = string("op_1493")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1493)[name = string("query_states_21")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1493)[name = string("key_states_31")]; string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1493)[name = string("value_states_31")]; tensor var_1513 = const()[name = string("op_1513"), val = tensor([1, 32, 64, 64])]; tensor var_1514 = reshape(shape = var_1513, x = query_states_21)[name = string("op_1514")]; tensor var_1515 = const()[name = string("op_1515"), val = tensor([0, 1, 3, 2])]; tensor var_1517 = const()[name = string("op_1517"), val = tensor([1, 8, 64, 64])]; tensor var_1518 = reshape(shape = var_1517, x = key_states_31)[name = string("op_1518")]; tensor var_1519 = const()[name = string("op_1519"), val = tensor([0, 1, 3, 2])]; tensor var_1521 = const()[name = string("op_1521"), val = tensor([1, 8, 64, 64])]; tensor var_1522 = reshape(shape = var_1521, x = value_states_31)[name = string("op_1522")]; tensor var_1523 = const()[name = string("op_1523"), val = tensor([0, 1, 3, 2])]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_141 = transpose(perm = var_1515, x = var_1514)[name = string("transpose_73")]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; tensor var_1541 = mul(x = x1_21, y = cos_7)[name = string("op_1541")]; tensor var_1542 = mul(x = x2_21, y = sin_7)[name = string("op_1542")]; tensor var_1543 = sub(x = var_1541, y = var_1542)[name = string("op_1543")]; tensor var_1544 = mul(x = x2_21, y = cos_7)[name = string("op_1544")]; tensor var_1545 = mul(x = x1_21, y = sin_7)[name = string("op_1545")]; tensor var_1546 = add(x = var_1544, y = var_1545)[name = string("op_1546")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21 = concat(axis = var_75, interleave = rotated_21_interleave_0, values = (var_1543, var_1546))[name = string("rotated_21")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_145 = transpose(perm = var_1519, x = var_1518)[name = string("transpose_72")]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; tensor var_1562 = mul(x = x1_23, y = cos_7)[name = string("op_1562")]; tensor var_1563 = mul(x = x2_23, y = sin_7)[name = string("op_1563")]; tensor var_1564 = sub(x = var_1562, y = var_1563)[name = string("op_1564")]; tensor var_1565 = mul(x = x2_23, y = cos_7)[name = string("op_1565")]; tensor var_1566 = mul(x = x1_23, y = sin_7)[name = string("op_1566")]; tensor var_1567 = add(x = var_1565, y = var_1566)[name = string("op_1567")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23 = concat(axis = var_75, interleave = rotated_23_interleave_0, values = (var_1564, var_1567))[name = string("rotated_23")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_591, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_42")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_591, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_33 = transpose(perm = var_1523, x = var_1522)[name = string("transpose_71")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_43")]; tensor var_1590_begin_0 = const()[name = string("op_1590_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1590_end_0 = const()[name = string("op_1590_end_0"), val = tensor([6, 8, 1024, 64])]; tensor var_1590_end_mask_0 = const()[name = string("op_1590_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1590_cast_fp16 = slice_by_index(begin = var_1590_begin_0, end = var_1590_end_0, end_mask = var_1590_end_mask_0, x = coreml_update_state_43)[name = string("op_1590_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1590_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1592_begin_0 = const()[name = string("op_1592_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_1592_end_0 = const()[name = string("op_1592_end_0"), val = tensor([22, 8, 1024, 64])]; tensor var_1592_end_mask_0 = const()[name = string("op_1592_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1592_cast_fp16 = slice_by_index(begin = var_1592_begin_0, end = var_1592_end_0, end_mask = var_1592_end_mask_0, x = coreml_update_state_43)[name = string("op_1592_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1592_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1601, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([1, -1, 1024, 64])]; tensor var_1606_cast_fp16 = reshape(shape = var_1605, x = x_153_cast_fp16)[name = string("op_1606_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1608 = const()[name = string("op_1608"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1608, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; bool var_1615_transpose_x_0 = const()[name = string("op_1615_transpose_x_0"), val = bool(false)]; bool var_1615_transpose_y_0 = const()[name = string("op_1615_transpose_y_0"), val = bool(true)]; tensor var_1615_cast_fp16 = matmul(transpose_x = var_1615_transpose_x_0, transpose_y = var_1615_transpose_y_0, x = rotated_21, y = var_1606_cast_fp16)[name = string("op_1615_cast_fp16")]; fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_11_cast_fp16 = mul(x = var_1615_cast_fp16, y = var_1616_to_fp16)[name = string("attn_weights_11_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1627_axes_0 = const()[name = string("op_1627_axes_0"), val = tensor([-1])]; bool var_1627_keep_dims_0 = const()[name = string("op_1627_keep_dims_0"), val = bool(true)]; tensor var_1627_cast_fp16 = reduce_sum(axes = var_1627_axes_0, keep_dims = var_1627_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1627_cast_fp16")]; tensor var_1628_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1627_cast_fp16)[name = string("op_1628_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 1024])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1628_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 1024, 64])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 64])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_1631_perm_0 = const()[name = string("op_1631_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1633 = const()[name = string("op_1633"), val = tensor([1, 64, 2048])]; tensor var_1631_cast_fp16 = transpose(perm = var_1631_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_70")]; tensor input_75_cast_fp16 = reshape(shape = var_1633, x = var_1631_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1184600640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1187746432))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1644_axes_0 = const()[name = string("op_1644_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500610752)))]; tensor var_1644_cast_fp16 = layer_norm(axes = var_1644_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1644_cast_fp16")]; tensor var_1651 = const()[name = string("op_1651"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1652 = transpose(perm = var_1651, x = var_1644_cast_fp16)[name = string("transpose_69")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1652)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1674_axes_0 = const()[name = string("op_1674_axes_0"), val = tensor([2])]; tensor var_1674 = squeeze(axes = var_1674_axes_0, x = hidden_states_47)[name = string("op_1674")]; tensor var_1675 = const()[name = string("op_1675"), val = tensor([0, 2, 1])]; tensor var_1676 = transpose(perm = var_1675, x = var_1674)[name = string("transpose_68")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1676)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1684_axes_0 = const()[name = string("op_1684_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500614912)))]; tensor var_1684_cast_fp16 = layer_norm(axes = var_1684_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1684_cast_fp16")]; tensor var_1688 = const()[name = string("op_1688"), val = tensor([0, 2, 1])]; tensor var_1690_axes_0 = const()[name = string("op_1690_axes_0"), val = tensor([2])]; tensor var_1689 = transpose(perm = var_1688, x = var_1684_cast_fp16)[name = string("transpose_67")]; tensor var_1690 = expand_dims(axes = var_1690_axes_0, x = var_1689)[name = string("op_1690")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1690)[name = string("query_states_25")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1690)[name = string("key_states_37")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1690)[name = string("value_states_37")]; tensor var_1710 = const()[name = string("op_1710"), val = tensor([1, 32, 64, 64])]; tensor var_1711 = reshape(shape = var_1710, x = query_states_25)[name = string("op_1711")]; tensor var_1712 = const()[name = string("op_1712"), val = tensor([0, 1, 3, 2])]; tensor var_1714 = const()[name = string("op_1714"), val = tensor([1, 8, 64, 64])]; tensor var_1715 = reshape(shape = var_1714, x = key_states_37)[name = string("op_1715")]; tensor var_1716 = const()[name = string("op_1716"), val = tensor([0, 1, 3, 2])]; tensor var_1718 = const()[name = string("op_1718"), val = tensor([1, 8, 64, 64])]; tensor var_1719 = reshape(shape = var_1718, x = value_states_37)[name = string("op_1719")]; tensor var_1720 = const()[name = string("op_1720"), val = tensor([0, 1, 3, 2])]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_169 = transpose(perm = var_1712, x = var_1711)[name = string("transpose_66")]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; tensor var_1738 = mul(x = x1_25, y = cos_7)[name = string("op_1738")]; tensor var_1739 = mul(x = x2_25, y = sin_7)[name = string("op_1739")]; tensor var_1740 = sub(x = var_1738, y = var_1739)[name = string("op_1740")]; tensor var_1741 = mul(x = x2_25, y = cos_7)[name = string("op_1741")]; tensor var_1742 = mul(x = x1_25, y = sin_7)[name = string("op_1742")]; tensor var_1743 = add(x = var_1741, y = var_1742)[name = string("op_1743")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25 = concat(axis = var_75, interleave = rotated_25_interleave_0, values = (var_1740, var_1743))[name = string("rotated_25")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_173 = transpose(perm = var_1716, x = var_1715)[name = string("transpose_65")]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; tensor var_1759 = mul(x = x1_27, y = cos_7)[name = string("op_1759")]; tensor var_1760 = mul(x = x2_27, y = sin_7)[name = string("op_1760")]; tensor var_1761 = sub(x = var_1759, y = var_1760)[name = string("op_1761")]; tensor var_1762 = mul(x = x2_27, y = cos_7)[name = string("op_1762")]; tensor var_1763 = mul(x = x1_27, y = sin_7)[name = string("op_1763")]; tensor var_1764 = add(x = var_1762, y = var_1763)[name = string("op_1764")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27 = concat(axis = var_75, interleave = rotated_27_interleave_0, values = (var_1761, var_1764))[name = string("rotated_27")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_591, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_44")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_591, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39 = transpose(perm = var_1720, x = var_1719)[name = string("transpose_64")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_45")]; tensor var_1787_begin_0 = const()[name = string("op_1787_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_1787_end_0 = const()[name = string("op_1787_end_0"), val = tensor([7, 8, 1024, 64])]; tensor var_1787_end_mask_0 = const()[name = string("op_1787_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = coreml_update_state_45)[name = string("op_1787_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1787_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1789_begin_0 = const()[name = string("op_1789_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_1789_end_0 = const()[name = string("op_1789_end_0"), val = tensor([23, 8, 1024, 64])]; tensor var_1789_end_mask_0 = const()[name = string("op_1789_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1789_cast_fp16 = slice_by_index(begin = var_1789_begin_0, end = var_1789_end_0, end_mask = var_1789_end_mask_0, x = coreml_update_state_45)[name = string("op_1789_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1789_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1798 = const()[name = string("op_1798"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1798, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1802 = const()[name = string("op_1802"), val = tensor([1, -1, 1024, 64])]; tensor var_1803_cast_fp16 = reshape(shape = var_1802, x = x_181_cast_fp16)[name = string("op_1803_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1805 = const()[name = string("op_1805"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1805, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; bool var_1812_transpose_x_0 = const()[name = string("op_1812_transpose_x_0"), val = bool(false)]; bool var_1812_transpose_y_0 = const()[name = string("op_1812_transpose_y_0"), val = bool(true)]; tensor var_1812_cast_fp16 = matmul(transpose_x = var_1812_transpose_x_0, transpose_y = var_1812_transpose_y_0, x = rotated_25, y = var_1803_cast_fp16)[name = string("op_1812_cast_fp16")]; fp16 var_1813_to_fp16 = const()[name = string("op_1813_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_1812_cast_fp16, y = var_1813_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1824_axes_0 = const()[name = string("op_1824_axes_0"), val = tensor([-1])]; bool var_1824_keep_dims_0 = const()[name = string("op_1824_keep_dims_0"), val = bool(true)]; tensor var_1824_cast_fp16 = reduce_sum(axes = var_1824_axes_0, keep_dims = var_1824_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1824_cast_fp16")]; tensor var_1825_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1824_cast_fp16)[name = string("op_1825_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 1024])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1825_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 1024, 64])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 64])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_1828_perm_0 = const()[name = string("op_1828_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1830 = const()[name = string("op_1830"), val = tensor([1, 64, 2048])]; tensor var_1828_cast_fp16 = transpose(perm = var_1828_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_63")]; tensor input_89_cast_fp16 = reshape(shape = var_1830, x = var_1828_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1187779264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190925056))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1841_axes_0 = const()[name = string("op_1841_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502724544)))]; tensor var_1841_cast_fp16 = layer_norm(axes = var_1841_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1841_cast_fp16")]; tensor var_1848 = const()[name = string("op_1848"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1849 = transpose(perm = var_1848, x = var_1841_cast_fp16)[name = string("transpose_62")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1849)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1871_axes_0 = const()[name = string("op_1871_axes_0"), val = tensor([2])]; tensor var_1871 = squeeze(axes = var_1871_axes_0, x = hidden_states_55)[name = string("op_1871")]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([0, 2, 1])]; tensor var_1873 = transpose(perm = var_1872, x = var_1871)[name = string("transpose_61")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1873)[name = string("hidden_states_57_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1881_axes_0 = const()[name = string("op_1881_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502728704)))]; tensor var_1881_cast_fp16 = layer_norm(axes = var_1881_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1881_cast_fp16")]; tensor var_1885 = const()[name = string("op_1885"), val = tensor([0, 2, 1])]; tensor var_1887_axes_0 = const()[name = string("op_1887_axes_0"), val = tensor([2])]; tensor var_1886 = transpose(perm = var_1885, x = var_1881_cast_fp16)[name = string("transpose_60")]; tensor var_1887 = expand_dims(axes = var_1887_axes_0, x = var_1886)[name = string("op_1887")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1887)[name = string("query_states_29")]; string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1887)[name = string("key_states_43")]; string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1887)[name = string("value_states_43")]; tensor var_1907 = const()[name = string("op_1907"), val = tensor([1, 32, 64, 64])]; tensor var_1908 = reshape(shape = var_1907, x = query_states_29)[name = string("op_1908")]; tensor var_1909 = const()[name = string("op_1909"), val = tensor([0, 1, 3, 2])]; tensor var_1911 = const()[name = string("op_1911"), val = tensor([1, 8, 64, 64])]; tensor var_1912 = reshape(shape = var_1911, x = key_states_43)[name = string("op_1912")]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([0, 1, 3, 2])]; tensor var_1915 = const()[name = string("op_1915"), val = tensor([1, 8, 64, 64])]; tensor var_1916 = reshape(shape = var_1915, x = value_states_43)[name = string("op_1916")]; tensor var_1917 = const()[name = string("op_1917"), val = tensor([0, 1, 3, 2])]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_197 = transpose(perm = var_1909, x = var_1908)[name = string("transpose_59")]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; tensor var_1935 = mul(x = x1_29, y = cos_7)[name = string("op_1935")]; tensor var_1936 = mul(x = x2_29, y = sin_7)[name = string("op_1936")]; tensor var_1937 = sub(x = var_1935, y = var_1936)[name = string("op_1937")]; tensor var_1938 = mul(x = x2_29, y = cos_7)[name = string("op_1938")]; tensor var_1939 = mul(x = x1_29, y = sin_7)[name = string("op_1939")]; tensor var_1940 = add(x = var_1938, y = var_1939)[name = string("op_1940")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29 = concat(axis = var_75, interleave = rotated_29_interleave_0, values = (var_1937, var_1940))[name = string("rotated_29")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_201 = transpose(perm = var_1913, x = var_1912)[name = string("transpose_58")]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = x_201)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = x_201)[name = string("x2_31")]; tensor var_1956 = mul(x = x1_31, y = cos_7)[name = string("op_1956")]; tensor var_1957 = mul(x = x2_31, y = sin_7)[name = string("op_1957")]; tensor var_1958 = sub(x = var_1956, y = var_1957)[name = string("op_1958")]; tensor var_1959 = mul(x = x2_31, y = cos_7)[name = string("op_1959")]; tensor var_1960 = mul(x = x1_31, y = sin_7)[name = string("op_1960")]; tensor var_1961 = add(x = var_1959, y = var_1960)[name = string("op_1961")]; bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; tensor rotated_31 = concat(axis = var_75, interleave = rotated_31_interleave_0, values = (var_1958, var_1961))[name = string("rotated_31")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_591, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_46")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_591, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_45 = transpose(perm = var_1917, x = var_1916)[name = string("transpose_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_47")]; tensor var_1984_begin_0 = const()[name = string("op_1984_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_1984_end_0 = const()[name = string("op_1984_end_0"), val = tensor([8, 8, 1024, 64])]; tensor var_1984_end_mask_0 = const()[name = string("op_1984_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1984_cast_fp16 = slice_by_index(begin = var_1984_begin_0, end = var_1984_end_0, end_mask = var_1984_end_mask_0, x = coreml_update_state_47)[name = string("op_1984_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1984_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_1986_begin_0 = const()[name = string("op_1986_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_1986_end_0 = const()[name = string("op_1986_end_0"), val = tensor([24, 8, 1024, 64])]; tensor var_1986_end_mask_0 = const()[name = string("op_1986_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = coreml_update_state_47)[name = string("op_1986_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1986_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1995 = const()[name = string("op_1995"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1995, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1999 = const()[name = string("op_1999"), val = tensor([1, -1, 1024, 64])]; tensor var_2000_cast_fp16 = reshape(shape = var_1999, x = x_209_cast_fp16)[name = string("op_2000_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_2002 = const()[name = string("op_2002"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_2002, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_2009_transpose_x_0 = const()[name = string("op_2009_transpose_x_0"), val = bool(false)]; bool var_2009_transpose_y_0 = const()[name = string("op_2009_transpose_y_0"), val = bool(true)]; tensor var_2009_cast_fp16 = matmul(transpose_x = var_2009_transpose_x_0, transpose_y = var_2009_transpose_y_0, x = rotated_29, y = var_2000_cast_fp16)[name = string("op_2009_cast_fp16")]; fp16 var_2010_to_fp16 = const()[name = string("op_2010_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_15_cast_fp16 = mul(x = var_2009_cast_fp16, y = var_2010_to_fp16)[name = string("attn_weights_15_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_15_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; tensor var_2021_axes_0 = const()[name = string("op_2021_axes_0"), val = tensor([-1])]; bool var_2021_keep_dims_0 = const()[name = string("op_2021_keep_dims_0"), val = bool(true)]; tensor var_2021_cast_fp16 = reduce_sum(axes = var_2021_axes_0, keep_dims = var_2021_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_2021_cast_fp16")]; tensor var_2022_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_2021_cast_fp16)[name = string("op_2022_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 1024])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_2022_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 1024, 64])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 64])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_2025_perm_0 = const()[name = string("op_2025_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2027 = const()[name = string("op_2027"), val = tensor([1, 64, 2048])]; tensor var_2025_cast_fp16 = transpose(perm = var_2025_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_56")]; tensor input_103_cast_fp16 = reshape(shape = var_2027, x = var_2025_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190957888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194103680))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; tensor var_2038_axes_0 = const()[name = string("op_2038_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504838336)))]; tensor var_2038_cast_fp16 = layer_norm(axes = var_2038_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor var_2045 = const()[name = string("op_2045"), val = tensor([0, 2, 1])]; tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; tensor var_2046 = transpose(perm = var_2045, x = var_2038_cast_fp16)[name = string("transpose_55")]; tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_2046)[name = string("input_107")]; string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; tensor var_2068_axes_0 = const()[name = string("op_2068_axes_0"), val = tensor([2])]; tensor var_2068 = squeeze(axes = var_2068_axes_0, x = hidden_states_63)[name = string("op_2068")]; tensor var_2069 = const()[name = string("op_2069"), val = tensor([0, 2, 1])]; tensor var_2070 = transpose(perm = var_2069, x = var_2068)[name = string("transpose_54")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_2070)[name = string("hidden_states_65_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; tensor var_2078_axes_0 = const()[name = string("op_2078_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504842496)))]; tensor var_2078_cast_fp16 = layer_norm(axes = var_2078_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_2078_cast_fp16")]; tensor var_2082 = const()[name = string("op_2082"), val = tensor([0, 2, 1])]; tensor var_2084_axes_0 = const()[name = string("op_2084_axes_0"), val = tensor([2])]; tensor var_2083 = transpose(perm = var_2082, x = var_2078_cast_fp16)[name = string("transpose_53")]; tensor var_2084 = expand_dims(axes = var_2084_axes_0, x = var_2083)[name = string("op_2084")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_2084)[name = string("query_states_33")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor key_states_49 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_2084)[name = string("key_states_49")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_2084)[name = string("value_states_49")]; tensor var_2104 = const()[name = string("op_2104"), val = tensor([1, 32, 64, 64])]; tensor var_2105 = reshape(shape = var_2104, x = query_states_33)[name = string("op_2105")]; tensor var_2106 = const()[name = string("op_2106"), val = tensor([0, 1, 3, 2])]; tensor var_2108 = const()[name = string("op_2108"), val = tensor([1, 8, 64, 64])]; tensor var_2109 = reshape(shape = var_2108, x = key_states_49)[name = string("op_2109")]; tensor var_2110 = const()[name = string("op_2110"), val = tensor([0, 1, 3, 2])]; tensor var_2112 = const()[name = string("op_2112"), val = tensor([1, 8, 64, 64])]; tensor var_2113 = reshape(shape = var_2112, x = value_states_49)[name = string("op_2113")]; tensor var_2114 = const()[name = string("op_2114"), val = tensor([0, 1, 3, 2])]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_225 = transpose(perm = var_2106, x = var_2105)[name = string("transpose_52")]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = x_225)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = x_225)[name = string("x2_33")]; tensor var_2132 = mul(x = x1_33, y = cos_7)[name = string("op_2132")]; tensor var_2133 = mul(x = x2_33, y = sin_7)[name = string("op_2133")]; tensor var_2134 = sub(x = var_2132, y = var_2133)[name = string("op_2134")]; tensor var_2135 = mul(x = x2_33, y = cos_7)[name = string("op_2135")]; tensor var_2136 = mul(x = x1_33, y = sin_7)[name = string("op_2136")]; tensor var_2137 = add(x = var_2135, y = var_2136)[name = string("op_2137")]; bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; tensor rotated_33 = concat(axis = var_75, interleave = rotated_33_interleave_0, values = (var_2134, var_2137))[name = string("rotated_33")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_229 = transpose(perm = var_2110, x = var_2109)[name = string("transpose_51")]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = x_229)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = x_229)[name = string("x2_35")]; tensor var_2153 = mul(x = x1_35, y = cos_7)[name = string("op_2153")]; tensor var_2154 = mul(x = x2_35, y = sin_7)[name = string("op_2154")]; tensor var_2155 = sub(x = var_2153, y = var_2154)[name = string("op_2155")]; tensor var_2156 = mul(x = x2_35, y = cos_7)[name = string("op_2156")]; tensor var_2157 = mul(x = x1_35, y = sin_7)[name = string("op_2157")]; tensor var_2158 = add(x = var_2156, y = var_2157)[name = string("op_2158")]; bool rotated_35_interleave_0 = const()[name = string("rotated_35_interleave_0"), val = bool(false)]; tensor rotated_35 = concat(axis = var_75, interleave = rotated_35_interleave_0, values = (var_2155, var_2158))[name = string("rotated_35")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_591, concat_147_values3_0))[name = string("concat_147")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated_35, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_48")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([24])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([25])]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_591, concat_151_values3_0))[name = string("concat_151")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51 = transpose(perm = var_2114, x = var_2113)[name = string("transpose_50")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_51, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_49")]; tensor var_2181_begin_0 = const()[name = string("op_2181_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2181_end_0 = const()[name = string("op_2181_end_0"), val = tensor([9, 8, 1024, 64])]; tensor var_2181_end_mask_0 = const()[name = string("op_2181_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2181_cast_fp16 = slice_by_index(begin = var_2181_begin_0, end = var_2181_end_0, end_mask = var_2181_end_mask_0, x = coreml_update_state_49)[name = string("op_2181_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_2181_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_2183_begin_0 = const()[name = string("op_2183_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_2183_end_0 = const()[name = string("op_2183_end_0"), val = tensor([25, 8, 1024, 64])]; tensor var_2183_end_mask_0 = const()[name = string("op_2183_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2183_cast_fp16 = slice_by_index(begin = var_2183_begin_0, end = var_2183_end_0, end_mask = var_2183_end_mask_0, x = coreml_update_state_49)[name = string("op_2183_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_2183_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_2192 = const()[name = string("op_2192"), val = tensor([1, 4, 1, 1])]; tensor x_237_cast_fp16 = tile(reps = var_2192, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; tensor var_2196 = const()[name = string("op_2196"), val = tensor([1, -1, 1024, 64])]; tensor var_2197_cast_fp16 = reshape(shape = var_2196, x = x_237_cast_fp16)[name = string("op_2197_cast_fp16")]; tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_241_cast_fp16")]; tensor var_2199 = const()[name = string("op_2199"), val = tensor([1, 4, 1, 1])]; tensor x_243_cast_fp16 = tile(reps = var_2199, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; bool var_2206_transpose_x_0 = const()[name = string("op_2206_transpose_x_0"), val = bool(false)]; bool var_2206_transpose_y_0 = const()[name = string("op_2206_transpose_y_0"), val = bool(true)]; tensor var_2206_cast_fp16 = matmul(transpose_x = var_2206_transpose_x_0, transpose_y = var_2206_transpose_y_0, x = rotated_33, y = var_2197_cast_fp16)[name = string("op_2206_cast_fp16")]; fp16 var_2207_to_fp16 = const()[name = string("op_2207_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_17_cast_fp16 = mul(x = var_2206_cast_fp16, y = var_2207_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor x_245_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; tensor exp_x_17_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_17_cast_fp16")]; tensor var_2218_axes_0 = const()[name = string("op_2218_axes_0"), val = tensor([-1])]; bool var_2218_keep_dims_0 = const()[name = string("op_2218_keep_dims_0"), val = bool(true)]; tensor var_2218_cast_fp16 = reduce_sum(axes = var_2218_axes_0, keep_dims = var_2218_keep_dims_0, x = exp_x_17_cast_fp16)[name = string("op_2218_cast_fp16")]; tensor var_2219_cast_fp16 = real_div(x = exp_x_17_cast_fp16, y = var_2218_cast_fp16)[name = string("op_2219_cast_fp16")]; tensor concat_156 = const()[name = string("concat_156"), val = tensor([32, 64, 1024])]; tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_2219_cast_fp16)[name = string("reshape_24_cast_fp16")]; tensor concat_157 = const()[name = string("concat_157"), val = tensor([32, 1024, 64])]; tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_243_cast_fp16)[name = string("reshape_25_cast_fp16")]; bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 32, 64, 64])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor var_2222_perm_0 = const()[name = string("op_2222_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2224 = const()[name = string("op_2224"), val = tensor([1, 64, 2048])]; tensor var_2222_cast_fp16 = transpose(perm = var_2222_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_49")]; tensor input_117_cast_fp16 = reshape(shape = var_2224, x = var_2222_cast_fp16)[name = string("input_117_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194136512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197282304))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor mean_35_cast_fp16 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_35_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_35_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_2235_axes_0 = const()[name = string("op_2235_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506952128)))]; tensor var_2235_cast_fp16 = layer_norm(axes = var_2235_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_2235_cast_fp16")]; tensor var_2242 = const()[name = string("op_2242"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_2243 = transpose(perm = var_2242, x = var_2235_cast_fp16)[name = string("transpose_48")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_2243)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string up_states_17_pad_type_0 = const()[name = string("up_states_17_pad_type_0"), val = string("valid")]; tensor up_states_17_strides_0 = const()[name = string("up_states_17_strides_0"), val = tensor([1, 1])]; tensor up_states_17_pad_0 = const()[name = string("up_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_17_dilations_0 = const()[name = string("up_states_17_dilations_0"), val = tensor([1, 1])]; int32 up_states_17_groups_0 = const()[name = string("up_states_17_groups_0"), val = int32(1)]; tensor up_states_17 = conv(dilations = up_states_17_dilations_0, groups = up_states_17_groups_0, pad = up_states_17_pad_0, pad_type = up_states_17_pad_type_0, strides = up_states_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states_17")]; tensor gate_states_17 = silu(x = input_123)[name = string("gate_states_17")]; tensor input_125 = mul(x = gate_states_17, y = up_states_17)[name = string("input_125")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor hidden_states_71 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_125)[name = string("hidden_states_71")]; tensor var_2265_axes_0 = const()[name = string("op_2265_axes_0"), val = tensor([2])]; tensor var_2265 = squeeze(axes = var_2265_axes_0, x = hidden_states_71)[name = string("op_2265")]; tensor var_2266 = const()[name = string("op_2266"), val = tensor([0, 2, 1])]; tensor var_2267 = transpose(perm = var_2266, x = var_2265)[name = string("transpose_47")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = var_2267)[name = string("hidden_states_73_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor mean_37_cast_fp16 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_37_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_37_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_2275_axes_0 = const()[name = string("op_2275_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506956288)))]; tensor var_2275_cast_fp16 = layer_norm(axes = var_2275_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_2275_cast_fp16")]; tensor var_2279 = const()[name = string("op_2279"), val = tensor([0, 2, 1])]; tensor var_2281_axes_0 = const()[name = string("op_2281_axes_0"), val = tensor([2])]; tensor var_2280 = transpose(perm = var_2279, x = var_2275_cast_fp16)[name = string("transpose_46")]; tensor var_2281 = expand_dims(axes = var_2281_axes_0, x = var_2280)[name = string("op_2281")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor query_states_37 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_2281)[name = string("query_states_37")]; string key_states_55_pad_type_0 = const()[name = string("key_states_55_pad_type_0"), val = string("valid")]; tensor key_states_55_strides_0 = const()[name = string("key_states_55_strides_0"), val = tensor([1, 1])]; tensor key_states_55_pad_0 = const()[name = string("key_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_55_dilations_0 = const()[name = string("key_states_55_dilations_0"), val = tensor([1, 1])]; int32 key_states_55_groups_0 = const()[name = string("key_states_55_groups_0"), val = int32(1)]; tensor key_states_55 = conv(dilations = key_states_55_dilations_0, groups = key_states_55_groups_0, pad = key_states_55_pad_0, pad_type = key_states_55_pad_type_0, strides = key_states_55_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_2281)[name = string("key_states_55")]; string value_states_55_pad_type_0 = const()[name = string("value_states_55_pad_type_0"), val = string("valid")]; tensor value_states_55_strides_0 = const()[name = string("value_states_55_strides_0"), val = tensor([1, 1])]; tensor value_states_55_pad_0 = const()[name = string("value_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_55_dilations_0 = const()[name = string("value_states_55_dilations_0"), val = tensor([1, 1])]; int32 value_states_55_groups_0 = const()[name = string("value_states_55_groups_0"), val = int32(1)]; tensor value_states_55 = conv(dilations = value_states_55_dilations_0, groups = value_states_55_groups_0, pad = value_states_55_pad_0, pad_type = value_states_55_pad_type_0, strides = value_states_55_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_2281)[name = string("value_states_55")]; tensor var_2301 = const()[name = string("op_2301"), val = tensor([1, 32, 64, 64])]; tensor var_2302 = reshape(shape = var_2301, x = query_states_37)[name = string("op_2302")]; tensor var_2303 = const()[name = string("op_2303"), val = tensor([0, 1, 3, 2])]; tensor var_2305 = const()[name = string("op_2305"), val = tensor([1, 8, 64, 64])]; tensor var_2306 = reshape(shape = var_2305, x = key_states_55)[name = string("op_2306")]; tensor var_2307 = const()[name = string("op_2307"), val = tensor([0, 1, 3, 2])]; tensor var_2309 = const()[name = string("op_2309"), val = tensor([1, 8, 64, 64])]; tensor var_2310 = reshape(shape = var_2309, x = value_states_55)[name = string("op_2310")]; tensor var_2311 = const()[name = string("op_2311"), val = tensor([0, 1, 3, 2])]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_253 = transpose(perm = var_2303, x = var_2302)[name = string("transpose_45")]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = x_253)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = x_253)[name = string("x2_37")]; tensor var_2329 = mul(x = x1_37, y = cos_7)[name = string("op_2329")]; tensor var_2330 = mul(x = x2_37, y = sin_7)[name = string("op_2330")]; tensor var_2331 = sub(x = var_2329, y = var_2330)[name = string("op_2331")]; tensor var_2332 = mul(x = x2_37, y = cos_7)[name = string("op_2332")]; tensor var_2333 = mul(x = x1_37, y = sin_7)[name = string("op_2333")]; tensor var_2334 = add(x = var_2332, y = var_2333)[name = string("op_2334")]; bool rotated_37_interleave_0 = const()[name = string("rotated_37_interleave_0"), val = bool(false)]; tensor rotated_37 = concat(axis = var_75, interleave = rotated_37_interleave_0, values = (var_2331, var_2334))[name = string("rotated_37")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_257 = transpose(perm = var_2307, x = var_2306)[name = string("transpose_44")]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = x_257)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = x_257)[name = string("x2_39")]; tensor var_2350 = mul(x = x1_39, y = cos_7)[name = string("op_2350")]; tensor var_2351 = mul(x = x2_39, y = sin_7)[name = string("op_2351")]; tensor var_2352 = sub(x = var_2350, y = var_2351)[name = string("op_2352")]; tensor var_2353 = mul(x = x2_39, y = cos_7)[name = string("op_2353")]; tensor var_2354 = mul(x = x1_39, y = sin_7)[name = string("op_2354")]; tensor var_2355 = add(x = var_2353, y = var_2354)[name = string("op_2355")]; bool rotated_39_interleave_0 = const()[name = string("rotated_39_interleave_0"), val = bool(false)]; tensor rotated_39 = concat(axis = var_75, interleave = rotated_39_interleave_0, values = (var_2352, var_2355))[name = string("rotated_39")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, var_591, concat_165_values3_0))[name = string("concat_165")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = rotated_39, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_50")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([25])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([26])]; int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, var_591, concat_169_values3_0))[name = string("concat_169")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_57 = transpose(perm = var_2311, x = var_2310)[name = string("transpose_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = value_states_57, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_51")]; tensor var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2378_end_0 = const()[name = string("op_2378_end_0"), val = tensor([10, 8, 1024, 64])]; tensor var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = coreml_update_state_51)[name = string("op_2378_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_2378_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_2380_begin_0 = const()[name = string("op_2380_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_2380_end_0 = const()[name = string("op_2380_end_0"), val = tensor([26, 8, 1024, 64])]; tensor var_2380_end_mask_0 = const()[name = string("op_2380_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2380_cast_fp16 = slice_by_index(begin = var_2380_begin_0, end = var_2380_end_0, end_mask = var_2380_end_mask_0, x = coreml_update_state_51)[name = string("op_2380_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_2380_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_263_axes_0 = const()[name = string("x_263_axes_0"), val = tensor([1])]; tensor x_263_cast_fp16 = expand_dims(axes = x_263_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_263_cast_fp16")]; tensor var_2389 = const()[name = string("op_2389"), val = tensor([1, 4, 1, 1])]; tensor x_265_cast_fp16 = tile(reps = var_2389, x = x_263_cast_fp16)[name = string("x_265_cast_fp16")]; tensor var_2393 = const()[name = string("op_2393"), val = tensor([1, -1, 1024, 64])]; tensor var_2394_cast_fp16 = reshape(shape = var_2393, x = x_265_cast_fp16)[name = string("op_2394_cast_fp16")]; tensor x_269_axes_0 = const()[name = string("x_269_axes_0"), val = tensor([1])]; tensor x_269_cast_fp16 = expand_dims(axes = x_269_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_2396 = const()[name = string("op_2396"), val = tensor([1, 4, 1, 1])]; tensor x_271_cast_fp16 = tile(reps = var_2396, x = x_269_cast_fp16)[name = string("x_271_cast_fp16")]; bool var_2403_transpose_x_0 = const()[name = string("op_2403_transpose_x_0"), val = bool(false)]; bool var_2403_transpose_y_0 = const()[name = string("op_2403_transpose_y_0"), val = bool(true)]; tensor var_2403_cast_fp16 = matmul(transpose_x = var_2403_transpose_x_0, transpose_y = var_2403_transpose_y_0, x = rotated_37, y = var_2394_cast_fp16)[name = string("op_2403_cast_fp16")]; fp16 var_2404_to_fp16 = const()[name = string("op_2404_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_19_cast_fp16 = mul(x = var_2403_cast_fp16, y = var_2404_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor x_273_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("x_273_cast_fp16")]; tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; tensor reduce_max_9_cast_fp16 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_273_cast_fp16)[name = string("reduce_max_9_cast_fp16")]; tensor x_275_cast_fp16 = sub(x = x_273_cast_fp16, y = reduce_max_9_cast_fp16)[name = string("x_275_cast_fp16")]; tensor exp_x_19_cast_fp16 = exp(x = x_275_cast_fp16)[name = string("exp_x_19_cast_fp16")]; tensor var_2415_axes_0 = const()[name = string("op_2415_axes_0"), val = tensor([-1])]; bool var_2415_keep_dims_0 = const()[name = string("op_2415_keep_dims_0"), val = bool(true)]; tensor var_2415_cast_fp16 = reduce_sum(axes = var_2415_axes_0, keep_dims = var_2415_keep_dims_0, x = exp_x_19_cast_fp16)[name = string("op_2415_cast_fp16")]; tensor var_2416_cast_fp16 = real_div(x = exp_x_19_cast_fp16, y = var_2415_cast_fp16)[name = string("op_2416_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([32, 64, 1024])]; tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_2416_cast_fp16)[name = string("reshape_27_cast_fp16")]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([32, 1024, 64])]; tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_271_cast_fp16)[name = string("reshape_28_cast_fp16")]; bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 32, 64, 64])]; tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor var_2419_perm_0 = const()[name = string("op_2419_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2421 = const()[name = string("op_2421"), val = tensor([1, 64, 2048])]; tensor var_2419_cast_fp16 = transpose(perm = var_2419_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_42")]; tensor input_131_cast_fp16 = reshape(shape = var_2421, x = var_2419_cast_fp16)[name = string("input_131_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197315136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200460928))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = linear_9_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_133_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_39_cast_fp16)[name = string("input_133_cast_fp16")]; tensor var_2432_axes_0 = const()[name = string("op_2432_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509065920)))]; tensor var_2432_cast_fp16 = layer_norm(axes = var_2432_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_133_cast_fp16)[name = string("op_2432_cast_fp16")]; tensor var_2439 = const()[name = string("op_2439"), val = tensor([0, 2, 1])]; tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; tensor var_2440 = transpose(perm = var_2439, x = var_2432_cast_fp16)[name = string("transpose_41")]; tensor input_135 = expand_dims(axes = input_135_axes_0, x = var_2440)[name = string("input_135")]; string input_137_pad_type_0 = const()[name = string("input_137_pad_type_0"), val = string("valid")]; tensor input_137_strides_0 = const()[name = string("input_137_strides_0"), val = tensor([1, 1])]; tensor input_137_pad_0 = const()[name = string("input_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_137_dilations_0 = const()[name = string("input_137_dilations_0"), val = tensor([1, 1])]; int32 input_137_groups_0 = const()[name = string("input_137_groups_0"), val = int32(1)]; tensor input_137 = conv(dilations = input_137_dilations_0, groups = input_137_groups_0, pad = input_137_pad_0, pad_type = input_137_pad_type_0, strides = input_137_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_135)[name = string("input_137")]; string up_states_19_pad_type_0 = const()[name = string("up_states_19_pad_type_0"), val = string("valid")]; tensor up_states_19_strides_0 = const()[name = string("up_states_19_strides_0"), val = tensor([1, 1])]; tensor up_states_19_pad_0 = const()[name = string("up_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_19_dilations_0 = const()[name = string("up_states_19_dilations_0"), val = tensor([1, 1])]; int32 up_states_19_groups_0 = const()[name = string("up_states_19_groups_0"), val = int32(1)]; tensor up_states_19 = conv(dilations = up_states_19_dilations_0, groups = up_states_19_groups_0, pad = up_states_19_pad_0, pad_type = up_states_19_pad_type_0, strides = up_states_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_135)[name = string("up_states_19")]; tensor gate_states_19 = silu(x = input_137)[name = string("gate_states_19")]; tensor input_139 = mul(x = gate_states_19, y = up_states_19)[name = string("input_139")]; string hidden_states_79_pad_type_0 = const()[name = string("hidden_states_79_pad_type_0"), val = string("valid")]; tensor hidden_states_79_strides_0 = const()[name = string("hidden_states_79_strides_0"), val = tensor([1, 1])]; tensor hidden_states_79_pad_0 = const()[name = string("hidden_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_79_dilations_0 = const()[name = string("hidden_states_79_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_79_groups_0 = const()[name = string("hidden_states_79_groups_0"), val = int32(1)]; tensor hidden_states_79 = conv(dilations = hidden_states_79_dilations_0, groups = hidden_states_79_groups_0, pad = hidden_states_79_pad_0, pad_type = hidden_states_79_pad_type_0, strides = hidden_states_79_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_139)[name = string("hidden_states_79")]; tensor var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor([2])]; tensor var_2462 = squeeze(axes = var_2462_axes_0, x = hidden_states_79)[name = string("op_2462")]; tensor var_2463 = const()[name = string("op_2463"), val = tensor([0, 2, 1])]; tensor var_2464 = transpose(perm = var_2463, x = var_2462)[name = string("transpose_40")]; tensor hidden_states_81_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_2464)[name = string("hidden_states_81_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_81_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_141_cast_fp16 = sub(x = hidden_states_81_cast_fp16, y = mean_41_cast_fp16)[name = string("input_141_cast_fp16")]; tensor var_2472_axes_0 = const()[name = string("op_2472_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509070080)))]; tensor var_2472_cast_fp16 = layer_norm(axes = var_2472_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_141_cast_fp16)[name = string("op_2472_cast_fp16")]; tensor var_2476 = const()[name = string("op_2476"), val = tensor([0, 2, 1])]; tensor var_2478_axes_0 = const()[name = string("op_2478_axes_0"), val = tensor([2])]; tensor var_2477 = transpose(perm = var_2476, x = var_2472_cast_fp16)[name = string("transpose_39")]; tensor var_2478 = expand_dims(axes = var_2478_axes_0, x = var_2477)[name = string("op_2478")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_2478)[name = string("query_states_41")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_2478)[name = string("key_states_61")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor value_states_61 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_2478)[name = string("value_states_61")]; tensor var_2498 = const()[name = string("op_2498"), val = tensor([1, 32, 64, 64])]; tensor var_2499 = reshape(shape = var_2498, x = query_states_41)[name = string("op_2499")]; tensor var_2500 = const()[name = string("op_2500"), val = tensor([0, 1, 3, 2])]; tensor var_2502 = const()[name = string("op_2502"), val = tensor([1, 8, 64, 64])]; tensor var_2503 = reshape(shape = var_2502, x = key_states_61)[name = string("op_2503")]; tensor var_2504 = const()[name = string("op_2504"), val = tensor([0, 1, 3, 2])]; tensor var_2506 = const()[name = string("op_2506"), val = tensor([1, 8, 64, 64])]; tensor var_2507 = reshape(shape = var_2506, x = value_states_61)[name = string("op_2507")]; tensor var_2508 = const()[name = string("op_2508"), val = tensor([0, 1, 3, 2])]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_281 = transpose(perm = var_2500, x = var_2499)[name = string("transpose_38")]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = x_281)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = x_281)[name = string("x2_41")]; tensor var_2526 = mul(x = x1_41, y = cos_7)[name = string("op_2526")]; tensor var_2527 = mul(x = x2_41, y = sin_7)[name = string("op_2527")]; tensor var_2528 = sub(x = var_2526, y = var_2527)[name = string("op_2528")]; tensor var_2529 = mul(x = x2_41, y = cos_7)[name = string("op_2529")]; tensor var_2530 = mul(x = x1_41, y = sin_7)[name = string("op_2530")]; tensor var_2531 = add(x = var_2529, y = var_2530)[name = string("op_2531")]; bool rotated_41_interleave_0 = const()[name = string("rotated_41_interleave_0"), val = bool(false)]; tensor rotated_41 = concat(axis = var_75, interleave = rotated_41_interleave_0, values = (var_2528, var_2531))[name = string("rotated_41")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_285 = transpose(perm = var_2504, x = var_2503)[name = string("transpose_37")]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = x_285)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = x_285)[name = string("x2_43")]; tensor var_2547 = mul(x = x1_43, y = cos_7)[name = string("op_2547")]; tensor var_2548 = mul(x = x2_43, y = sin_7)[name = string("op_2548")]; tensor var_2549 = sub(x = var_2547, y = var_2548)[name = string("op_2549")]; tensor var_2550 = mul(x = x2_43, y = cos_7)[name = string("op_2550")]; tensor var_2551 = mul(x = x1_43, y = sin_7)[name = string("op_2551")]; tensor var_2552 = add(x = var_2550, y = var_2551)[name = string("op_2552")]; bool rotated_43_interleave_0 = const()[name = string("rotated_43_interleave_0"), val = bool(false)]; tensor rotated_43 = concat(axis = var_75, interleave = rotated_43_interleave_0, values = (var_2549, var_2552))[name = string("rotated_43")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, var_591, concat_183_values3_0))[name = string("concat_183")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = rotated_43, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_52")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([26])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([27])]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, var_591, concat_187_values3_0))[name = string("concat_187")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63 = transpose(perm = var_2508, x = var_2507)[name = string("transpose_36")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = value_states_63, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_53")]; tensor var_2575_begin_0 = const()[name = string("op_2575_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_2575_end_0 = const()[name = string("op_2575_end_0"), val = tensor([11, 8, 1024, 64])]; tensor var_2575_end_mask_0 = const()[name = string("op_2575_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2575_cast_fp16 = slice_by_index(begin = var_2575_begin_0, end = var_2575_end_0, end_mask = var_2575_end_mask_0, x = coreml_update_state_53)[name = string("op_2575_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_2575_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_2577_begin_0 = const()[name = string("op_2577_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_2577_end_0 = const()[name = string("op_2577_end_0"), val = tensor([27, 8, 1024, 64])]; tensor var_2577_end_mask_0 = const()[name = string("op_2577_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2577_cast_fp16 = slice_by_index(begin = var_2577_begin_0, end = var_2577_end_0, end_mask = var_2577_end_mask_0, x = coreml_update_state_53)[name = string("op_2577_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_2577_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_291_axes_0 = const()[name = string("x_291_axes_0"), val = tensor([1])]; tensor x_291_cast_fp16 = expand_dims(axes = x_291_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_291_cast_fp16")]; tensor var_2586 = const()[name = string("op_2586"), val = tensor([1, 4, 1, 1])]; tensor x_293_cast_fp16 = tile(reps = var_2586, x = x_291_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_2590 = const()[name = string("op_2590"), val = tensor([1, -1, 1024, 64])]; tensor var_2591_cast_fp16 = reshape(shape = var_2590, x = x_293_cast_fp16)[name = string("op_2591_cast_fp16")]; tensor x_297_axes_0 = const()[name = string("x_297_axes_0"), val = tensor([1])]; tensor x_297_cast_fp16 = expand_dims(axes = x_297_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_297_cast_fp16")]; tensor var_2593 = const()[name = string("op_2593"), val = tensor([1, 4, 1, 1])]; tensor x_299_cast_fp16 = tile(reps = var_2593, x = x_297_cast_fp16)[name = string("x_299_cast_fp16")]; bool var_2600_transpose_x_0 = const()[name = string("op_2600_transpose_x_0"), val = bool(false)]; bool var_2600_transpose_y_0 = const()[name = string("op_2600_transpose_y_0"), val = bool(true)]; tensor var_2600_cast_fp16 = matmul(transpose_x = var_2600_transpose_x_0, transpose_y = var_2600_transpose_y_0, x = rotated_41, y = var_2591_cast_fp16)[name = string("op_2600_cast_fp16")]; fp16 var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_21_cast_fp16 = mul(x = var_2600_cast_fp16, y = var_2601_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor x_301_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_301_cast_fp16")]; tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; tensor reduce_max_10_cast_fp16 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_301_cast_fp16)[name = string("reduce_max_10_cast_fp16")]; tensor x_303_cast_fp16 = sub(x = x_301_cast_fp16, y = reduce_max_10_cast_fp16)[name = string("x_303_cast_fp16")]; tensor exp_x_21_cast_fp16 = exp(x = x_303_cast_fp16)[name = string("exp_x_21_cast_fp16")]; tensor var_2612_axes_0 = const()[name = string("op_2612_axes_0"), val = tensor([-1])]; bool var_2612_keep_dims_0 = const()[name = string("op_2612_keep_dims_0"), val = bool(true)]; tensor var_2612_cast_fp16 = reduce_sum(axes = var_2612_axes_0, keep_dims = var_2612_keep_dims_0, x = exp_x_21_cast_fp16)[name = string("op_2612_cast_fp16")]; tensor var_2613_cast_fp16 = real_div(x = exp_x_21_cast_fp16, y = var_2612_cast_fp16)[name = string("op_2613_cast_fp16")]; tensor concat_192 = const()[name = string("concat_192"), val = tensor([32, 64, 1024])]; tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_2613_cast_fp16)[name = string("reshape_30_cast_fp16")]; tensor concat_193 = const()[name = string("concat_193"), val = tensor([32, 1024, 64])]; tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_299_cast_fp16)[name = string("reshape_31_cast_fp16")]; bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 32, 64, 64])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor var_2616_perm_0 = const()[name = string("op_2616_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2618 = const()[name = string("op_2618"), val = tensor([1, 64, 2048])]; tensor var_2616_cast_fp16 = transpose(perm = var_2616_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_35")]; tensor input_145_cast_fp16 = reshape(shape = var_2618, x = var_2616_cast_fp16)[name = string("input_145_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200493760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1203639552))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor mean_43_cast_fp16 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_43_cast_fp16")]; tensor input_147_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_43_cast_fp16)[name = string("input_147_cast_fp16")]; tensor var_2629_axes_0 = const()[name = string("op_2629_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511179712)))]; tensor var_2629_cast_fp16 = layer_norm(axes = var_2629_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_147_cast_fp16)[name = string("op_2629_cast_fp16")]; tensor var_2636 = const()[name = string("op_2636"), val = tensor([0, 2, 1])]; tensor input_149_axes_0 = const()[name = string("input_149_axes_0"), val = tensor([2])]; tensor var_2637 = transpose(perm = var_2636, x = var_2629_cast_fp16)[name = string("transpose_34")]; tensor input_149 = expand_dims(axes = input_149_axes_0, x = var_2637)[name = string("input_149")]; string input_151_pad_type_0 = const()[name = string("input_151_pad_type_0"), val = string("valid")]; tensor input_151_strides_0 = const()[name = string("input_151_strides_0"), val = tensor([1, 1])]; tensor input_151_pad_0 = const()[name = string("input_151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_151_dilations_0 = const()[name = string("input_151_dilations_0"), val = tensor([1, 1])]; int32 input_151_groups_0 = const()[name = string("input_151_groups_0"), val = int32(1)]; tensor input_151 = conv(dilations = input_151_dilations_0, groups = input_151_groups_0, pad = input_151_pad_0, pad_type = input_151_pad_type_0, strides = input_151_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_149)[name = string("input_151")]; string up_states_21_pad_type_0 = const()[name = string("up_states_21_pad_type_0"), val = string("valid")]; tensor up_states_21_strides_0 = const()[name = string("up_states_21_strides_0"), val = tensor([1, 1])]; tensor up_states_21_pad_0 = const()[name = string("up_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_21_dilations_0 = const()[name = string("up_states_21_dilations_0"), val = tensor([1, 1])]; int32 up_states_21_groups_0 = const()[name = string("up_states_21_groups_0"), val = int32(1)]; tensor up_states_21 = conv(dilations = up_states_21_dilations_0, groups = up_states_21_groups_0, pad = up_states_21_pad_0, pad_type = up_states_21_pad_type_0, strides = up_states_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_149)[name = string("up_states_21")]; tensor gate_states_21 = silu(x = input_151)[name = string("gate_states_21")]; tensor input_153 = mul(x = gate_states_21, y = up_states_21)[name = string("input_153")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor hidden_states_87 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_153)[name = string("hidden_states_87")]; tensor var_2659_axes_0 = const()[name = string("op_2659_axes_0"), val = tensor([2])]; tensor var_2659 = squeeze(axes = var_2659_axes_0, x = hidden_states_87)[name = string("op_2659")]; tensor var_2660 = const()[name = string("op_2660"), val = tensor([0, 2, 1])]; tensor var_2661 = transpose(perm = var_2660, x = var_2659)[name = string("transpose_33")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = var_2661)[name = string("hidden_states_89_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor mean_45_cast_fp16 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_45_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_45_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_2669_axes_0 = const()[name = string("op_2669_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511183872)))]; tensor var_2669_cast_fp16 = layer_norm(axes = var_2669_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_2669_cast_fp16")]; tensor var_2673 = const()[name = string("op_2673"), val = tensor([0, 2, 1])]; tensor var_2675_axes_0 = const()[name = string("op_2675_axes_0"), val = tensor([2])]; tensor var_2674 = transpose(perm = var_2673, x = var_2669_cast_fp16)[name = string("transpose_32")]; tensor var_2675 = expand_dims(axes = var_2675_axes_0, x = var_2674)[name = string("op_2675")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor query_states_45 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_2675)[name = string("query_states_45")]; string key_states_67_pad_type_0 = const()[name = string("key_states_67_pad_type_0"), val = string("valid")]; tensor key_states_67_strides_0 = const()[name = string("key_states_67_strides_0"), val = tensor([1, 1])]; tensor key_states_67_pad_0 = const()[name = string("key_states_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_67_dilations_0 = const()[name = string("key_states_67_dilations_0"), val = tensor([1, 1])]; int32 key_states_67_groups_0 = const()[name = string("key_states_67_groups_0"), val = int32(1)]; tensor key_states_67 = conv(dilations = key_states_67_dilations_0, groups = key_states_67_groups_0, pad = key_states_67_pad_0, pad_type = key_states_67_pad_type_0, strides = key_states_67_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_2675)[name = string("key_states_67")]; string value_states_67_pad_type_0 = const()[name = string("value_states_67_pad_type_0"), val = string("valid")]; tensor value_states_67_strides_0 = const()[name = string("value_states_67_strides_0"), val = tensor([1, 1])]; tensor value_states_67_pad_0 = const()[name = string("value_states_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_67_dilations_0 = const()[name = string("value_states_67_dilations_0"), val = tensor([1, 1])]; int32 value_states_67_groups_0 = const()[name = string("value_states_67_groups_0"), val = int32(1)]; tensor value_states_67 = conv(dilations = value_states_67_dilations_0, groups = value_states_67_groups_0, pad = value_states_67_pad_0, pad_type = value_states_67_pad_type_0, strides = value_states_67_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_2675)[name = string("value_states_67")]; tensor var_2695 = const()[name = string("op_2695"), val = tensor([1, 32, 64, 64])]; tensor var_2696 = reshape(shape = var_2695, x = query_states_45)[name = string("op_2696")]; tensor var_2697 = const()[name = string("op_2697"), val = tensor([0, 1, 3, 2])]; tensor var_2699 = const()[name = string("op_2699"), val = tensor([1, 8, 64, 64])]; tensor var_2700 = reshape(shape = var_2699, x = key_states_67)[name = string("op_2700")]; tensor var_2701 = const()[name = string("op_2701"), val = tensor([0, 1, 3, 2])]; tensor var_2703 = const()[name = string("op_2703"), val = tensor([1, 8, 64, 64])]; tensor var_2704 = reshape(shape = var_2703, x = value_states_67)[name = string("op_2704")]; tensor var_2705 = const()[name = string("op_2705"), val = tensor([0, 1, 3, 2])]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_309 = transpose(perm = var_2697, x = var_2696)[name = string("transpose_31")]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = x_309)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = x_309)[name = string("x2_45")]; tensor var_2723 = mul(x = x1_45, y = cos_7)[name = string("op_2723")]; tensor var_2724 = mul(x = x2_45, y = sin_7)[name = string("op_2724")]; tensor var_2725 = sub(x = var_2723, y = var_2724)[name = string("op_2725")]; tensor var_2726 = mul(x = x2_45, y = cos_7)[name = string("op_2726")]; tensor var_2727 = mul(x = x1_45, y = sin_7)[name = string("op_2727")]; tensor var_2728 = add(x = var_2726, y = var_2727)[name = string("op_2728")]; bool rotated_45_interleave_0 = const()[name = string("rotated_45_interleave_0"), val = bool(false)]; tensor rotated_45 = concat(axis = var_75, interleave = rotated_45_interleave_0, values = (var_2725, var_2728))[name = string("rotated_45")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_313 = transpose(perm = var_2701, x = var_2700)[name = string("transpose_30")]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = x_313)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = x_313)[name = string("x2_47")]; tensor var_2744 = mul(x = x1_47, y = cos_7)[name = string("op_2744")]; tensor var_2745 = mul(x = x2_47, y = sin_7)[name = string("op_2745")]; tensor var_2746 = sub(x = var_2744, y = var_2745)[name = string("op_2746")]; tensor var_2747 = mul(x = x2_47, y = cos_7)[name = string("op_2747")]; tensor var_2748 = mul(x = x1_47, y = sin_7)[name = string("op_2748")]; tensor var_2749 = add(x = var_2747, y = var_2748)[name = string("op_2749")]; bool rotated_47_interleave_0 = const()[name = string("rotated_47_interleave_0"), val = bool(false)]; tensor rotated_47 = concat(axis = var_75, interleave = rotated_47_interleave_0, values = (var_2746, var_2749))[name = string("rotated_47")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_200")]; tensor concat_201_values1_0 = const()[name = string("concat_201_values1_0"), val = tensor([0])]; tensor concat_201_values3_0 = const()[name = string("concat_201_values3_0"), val = tensor([0])]; int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (expand_dims_136, concat_201_values1_0, var_591, concat_201_values3_0))[name = string("concat_201")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_200, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_201, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = rotated_47, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_54")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([27])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([28])]; int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_204")]; tensor concat_205_values1_0 = const()[name = string("concat_205_values1_0"), val = tensor([0])]; tensor concat_205_values3_0 = const()[name = string("concat_205_values3_0"), val = tensor([0])]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (expand_dims_142, concat_205_values1_0, var_591, concat_205_values3_0))[name = string("concat_205")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_69 = transpose(perm = var_2705, x = var_2704)[name = string("transpose_29")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = value_states_69, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_55")]; tensor var_2772_begin_0 = const()[name = string("op_2772_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_2772_end_0 = const()[name = string("op_2772_end_0"), val = tensor([12, 8, 1024, 64])]; tensor var_2772_end_mask_0 = const()[name = string("op_2772_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2772_cast_fp16 = slice_by_index(begin = var_2772_begin_0, end = var_2772_end_0, end_mask = var_2772_end_mask_0, x = coreml_update_state_55)[name = string("op_2772_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_2772_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_2774_begin_0 = const()[name = string("op_2774_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_2774_end_0 = const()[name = string("op_2774_end_0"), val = tensor([28, 8, 1024, 64])]; tensor var_2774_end_mask_0 = const()[name = string("op_2774_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2774_cast_fp16 = slice_by_index(begin = var_2774_begin_0, end = var_2774_end_0, end_mask = var_2774_end_mask_0, x = coreml_update_state_55)[name = string("op_2774_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_2774_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_319_axes_0 = const()[name = string("x_319_axes_0"), val = tensor([1])]; tensor x_319_cast_fp16 = expand_dims(axes = x_319_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_319_cast_fp16")]; tensor var_2783 = const()[name = string("op_2783"), val = tensor([1, 4, 1, 1])]; tensor x_321_cast_fp16 = tile(reps = var_2783, x = x_319_cast_fp16)[name = string("x_321_cast_fp16")]; tensor var_2787 = const()[name = string("op_2787"), val = tensor([1, -1, 1024, 64])]; tensor var_2788_cast_fp16 = reshape(shape = var_2787, x = x_321_cast_fp16)[name = string("op_2788_cast_fp16")]; tensor x_325_axes_0 = const()[name = string("x_325_axes_0"), val = tensor([1])]; tensor x_325_cast_fp16 = expand_dims(axes = x_325_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_325_cast_fp16")]; tensor var_2790 = const()[name = string("op_2790"), val = tensor([1, 4, 1, 1])]; tensor x_327_cast_fp16 = tile(reps = var_2790, x = x_325_cast_fp16)[name = string("x_327_cast_fp16")]; bool var_2797_transpose_x_0 = const()[name = string("op_2797_transpose_x_0"), val = bool(false)]; bool var_2797_transpose_y_0 = const()[name = string("op_2797_transpose_y_0"), val = bool(true)]; tensor var_2797_cast_fp16 = matmul(transpose_x = var_2797_transpose_x_0, transpose_y = var_2797_transpose_y_0, x = rotated_45, y = var_2788_cast_fp16)[name = string("op_2797_cast_fp16")]; fp16 var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_23_cast_fp16 = mul(x = var_2797_cast_fp16, y = var_2798_to_fp16)[name = string("attn_weights_23_cast_fp16")]; tensor x_329_cast_fp16 = add(x = attn_weights_23_cast_fp16, y = causal_mask)[name = string("x_329_cast_fp16")]; tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; tensor reduce_max_11_cast_fp16 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_329_cast_fp16)[name = string("reduce_max_11_cast_fp16")]; tensor x_331_cast_fp16 = sub(x = x_329_cast_fp16, y = reduce_max_11_cast_fp16)[name = string("x_331_cast_fp16")]; tensor exp_x_23_cast_fp16 = exp(x = x_331_cast_fp16)[name = string("exp_x_23_cast_fp16")]; tensor var_2809_axes_0 = const()[name = string("op_2809_axes_0"), val = tensor([-1])]; bool var_2809_keep_dims_0 = const()[name = string("op_2809_keep_dims_0"), val = bool(true)]; tensor var_2809_cast_fp16 = reduce_sum(axes = var_2809_axes_0, keep_dims = var_2809_keep_dims_0, x = exp_x_23_cast_fp16)[name = string("op_2809_cast_fp16")]; tensor var_2810_cast_fp16 = real_div(x = exp_x_23_cast_fp16, y = var_2809_cast_fp16)[name = string("op_2810_cast_fp16")]; tensor concat_210 = const()[name = string("concat_210"), val = tensor([32, 64, 1024])]; tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_2810_cast_fp16)[name = string("reshape_33_cast_fp16")]; tensor concat_211 = const()[name = string("concat_211"), val = tensor([32, 1024, 64])]; tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_327_cast_fp16)[name = string("reshape_34_cast_fp16")]; bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 32, 64, 64])]; tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor var_2813_perm_0 = const()[name = string("op_2813_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2815 = const()[name = string("op_2815"), val = tensor([1, 64, 2048])]; tensor var_2813_cast_fp16 = transpose(perm = var_2813_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_28")]; tensor input_159_cast_fp16 = reshape(shape = var_2815, x = var_2813_cast_fp16)[name = string("input_159_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1203672384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1206818176))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_159_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor hidden_states_93_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_11_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_93_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_161_cast_fp16 = sub(x = hidden_states_93_cast_fp16, y = mean_47_cast_fp16)[name = string("input_161_cast_fp16")]; tensor var_2826_axes_0 = const()[name = string("op_2826_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513293504)))]; tensor var_2826_cast_fp16 = layer_norm(axes = var_2826_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_161_cast_fp16)[name = string("op_2826_cast_fp16")]; tensor var_2833 = const()[name = string("op_2833"), val = tensor([0, 2, 1])]; tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; tensor var_2834 = transpose(perm = var_2833, x = var_2826_cast_fp16)[name = string("transpose_27")]; tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_2834)[name = string("input_163")]; string input_165_pad_type_0 = const()[name = string("input_165_pad_type_0"), val = string("valid")]; tensor input_165_strides_0 = const()[name = string("input_165_strides_0"), val = tensor([1, 1])]; tensor input_165_pad_0 = const()[name = string("input_165_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_165_dilations_0 = const()[name = string("input_165_dilations_0"), val = tensor([1, 1])]; int32 input_165_groups_0 = const()[name = string("input_165_groups_0"), val = int32(1)]; tensor input_165 = conv(dilations = input_165_dilations_0, groups = input_165_groups_0, pad = input_165_pad_0, pad_type = input_165_pad_type_0, strides = input_165_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_163)[name = string("input_165")]; string up_states_23_pad_type_0 = const()[name = string("up_states_23_pad_type_0"), val = string("valid")]; tensor up_states_23_strides_0 = const()[name = string("up_states_23_strides_0"), val = tensor([1, 1])]; tensor up_states_23_pad_0 = const()[name = string("up_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_23_dilations_0 = const()[name = string("up_states_23_dilations_0"), val = tensor([1, 1])]; int32 up_states_23_groups_0 = const()[name = string("up_states_23_groups_0"), val = int32(1)]; tensor up_states_23 = conv(dilations = up_states_23_dilations_0, groups = up_states_23_groups_0, pad = up_states_23_pad_0, pad_type = up_states_23_pad_type_0, strides = up_states_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_163)[name = string("up_states_23")]; tensor gate_states_23 = silu(x = input_165)[name = string("gate_states_23")]; tensor input_167 = mul(x = gate_states_23, y = up_states_23)[name = string("input_167")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor hidden_states_95 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_167)[name = string("hidden_states_95")]; tensor var_2856_axes_0 = const()[name = string("op_2856_axes_0"), val = tensor([2])]; tensor var_2856 = squeeze(axes = var_2856_axes_0, x = hidden_states_95)[name = string("op_2856")]; tensor var_2857 = const()[name = string("op_2857"), val = tensor([0, 2, 1])]; tensor var_2858 = transpose(perm = var_2857, x = var_2856)[name = string("transpose_26")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_93_cast_fp16, y = var_2858)[name = string("hidden_states_97_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_169_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_49_cast_fp16)[name = string("input_169_cast_fp16")]; tensor var_2866_axes_0 = const()[name = string("op_2866_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513297664)))]; tensor var_2866_cast_fp16 = layer_norm(axes = var_2866_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_169_cast_fp16)[name = string("op_2866_cast_fp16")]; tensor var_2870 = const()[name = string("op_2870"), val = tensor([0, 2, 1])]; tensor var_2872_axes_0 = const()[name = string("op_2872_axes_0"), val = tensor([2])]; tensor var_2871 = transpose(perm = var_2870, x = var_2866_cast_fp16)[name = string("transpose_25")]; tensor var_2872 = expand_dims(axes = var_2872_axes_0, x = var_2871)[name = string("op_2872")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_2872)[name = string("query_states_49")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor key_states_73 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_2872)[name = string("key_states_73")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_2872)[name = string("value_states_73")]; tensor var_2892 = const()[name = string("op_2892"), val = tensor([1, 32, 64, 64])]; tensor var_2893 = reshape(shape = var_2892, x = query_states_49)[name = string("op_2893")]; tensor var_2894 = const()[name = string("op_2894"), val = tensor([0, 1, 3, 2])]; tensor var_2896 = const()[name = string("op_2896"), val = tensor([1, 8, 64, 64])]; tensor var_2897 = reshape(shape = var_2896, x = key_states_73)[name = string("op_2897")]; tensor var_2898 = const()[name = string("op_2898"), val = tensor([0, 1, 3, 2])]; tensor var_2900 = const()[name = string("op_2900"), val = tensor([1, 8, 64, 64])]; tensor var_2901 = reshape(shape = var_2900, x = value_states_73)[name = string("op_2901")]; tensor var_2902 = const()[name = string("op_2902"), val = tensor([0, 1, 3, 2])]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_337 = transpose(perm = var_2894, x = var_2893)[name = string("transpose_24")]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = x_337)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = x_337)[name = string("x2_49")]; tensor var_2920 = mul(x = x1_49, y = cos_7)[name = string("op_2920")]; tensor var_2921 = mul(x = x2_49, y = sin_7)[name = string("op_2921")]; tensor var_2922 = sub(x = var_2920, y = var_2921)[name = string("op_2922")]; tensor var_2923 = mul(x = x2_49, y = cos_7)[name = string("op_2923")]; tensor var_2924 = mul(x = x1_49, y = sin_7)[name = string("op_2924")]; tensor var_2925 = add(x = var_2923, y = var_2924)[name = string("op_2925")]; bool rotated_49_interleave_0 = const()[name = string("rotated_49_interleave_0"), val = bool(false)]; tensor rotated_49 = concat(axis = var_75, interleave = rotated_49_interleave_0, values = (var_2922, var_2925))[name = string("rotated_49")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_341 = transpose(perm = var_2898, x = var_2897)[name = string("transpose_23")]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = x_341)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = x_341)[name = string("x2_51")]; tensor var_2941 = mul(x = x1_51, y = cos_7)[name = string("op_2941")]; tensor var_2942 = mul(x = x2_51, y = sin_7)[name = string("op_2942")]; tensor var_2943 = sub(x = var_2941, y = var_2942)[name = string("op_2943")]; tensor var_2944 = mul(x = x2_51, y = cos_7)[name = string("op_2944")]; tensor var_2945 = mul(x = x1_51, y = sin_7)[name = string("op_2945")]; tensor var_2946 = add(x = var_2944, y = var_2945)[name = string("op_2946")]; bool rotated_51_interleave_0 = const()[name = string("rotated_51_interleave_0"), val = bool(false)]; tensor rotated_51 = concat(axis = var_75, interleave = rotated_51_interleave_0, values = (var_2943, var_2946))[name = string("rotated_51")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, var_591, concat_219_values3_0))[name = string("concat_219")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = rotated_51, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_56")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([28])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([29])]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, var_591, concat_223_values3_0))[name = string("concat_223")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75 = transpose(perm = var_2902, x = var_2901)[name = string("transpose_22")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = value_states_75, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_57")]; tensor var_2969_begin_0 = const()[name = string("op_2969_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_2969_end_0 = const()[name = string("op_2969_end_0"), val = tensor([13, 8, 1024, 64])]; tensor var_2969_end_mask_0 = const()[name = string("op_2969_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2969_cast_fp16 = slice_by_index(begin = var_2969_begin_0, end = var_2969_end_0, end_mask = var_2969_end_mask_0, x = coreml_update_state_57)[name = string("op_2969_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_2969_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_2971_begin_0 = const()[name = string("op_2971_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_2971_end_0 = const()[name = string("op_2971_end_0"), val = tensor([29, 8, 1024, 64])]; tensor var_2971_end_mask_0 = const()[name = string("op_2971_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2971_cast_fp16 = slice_by_index(begin = var_2971_begin_0, end = var_2971_end_0, end_mask = var_2971_end_mask_0, x = coreml_update_state_57)[name = string("op_2971_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_2971_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_2980 = const()[name = string("op_2980"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_2980, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_2984 = const()[name = string("op_2984"), val = tensor([1, -1, 1024, 64])]; tensor var_2985_cast_fp16 = reshape(shape = var_2984, x = x_349_cast_fp16)[name = string("op_2985_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_2987 = const()[name = string("op_2987"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_2987, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; bool var_2994_transpose_x_0 = const()[name = string("op_2994_transpose_x_0"), val = bool(false)]; bool var_2994_transpose_y_0 = const()[name = string("op_2994_transpose_y_0"), val = bool(true)]; tensor var_2994_cast_fp16 = matmul(transpose_x = var_2994_transpose_x_0, transpose_y = var_2994_transpose_y_0, x = rotated_49, y = var_2985_cast_fp16)[name = string("op_2994_cast_fp16")]; fp16 var_2995_to_fp16 = const()[name = string("op_2995_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_25_cast_fp16 = mul(x = var_2994_cast_fp16, y = var_2995_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor x_357_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_357_cast_fp16")]; tensor reduce_max_12_axes_0 = const()[name = string("reduce_max_12_axes_0"), val = tensor([-1])]; bool reduce_max_12_keep_dims_0 = const()[name = string("reduce_max_12_keep_dims_0"), val = bool(true)]; tensor reduce_max_12_cast_fp16 = reduce_max(axes = reduce_max_12_axes_0, keep_dims = reduce_max_12_keep_dims_0, x = x_357_cast_fp16)[name = string("reduce_max_12_cast_fp16")]; tensor x_359_cast_fp16 = sub(x = x_357_cast_fp16, y = reduce_max_12_cast_fp16)[name = string("x_359_cast_fp16")]; tensor exp_x_25_cast_fp16 = exp(x = x_359_cast_fp16)[name = string("exp_x_25_cast_fp16")]; tensor var_3006_axes_0 = const()[name = string("op_3006_axes_0"), val = tensor([-1])]; bool var_3006_keep_dims_0 = const()[name = string("op_3006_keep_dims_0"), val = bool(true)]; tensor var_3006_cast_fp16 = reduce_sum(axes = var_3006_axes_0, keep_dims = var_3006_keep_dims_0, x = exp_x_25_cast_fp16)[name = string("op_3006_cast_fp16")]; tensor var_3007_cast_fp16 = real_div(x = exp_x_25_cast_fp16, y = var_3006_cast_fp16)[name = string("op_3007_cast_fp16")]; tensor concat_228 = const()[name = string("concat_228"), val = tensor([32, 64, 1024])]; tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_3007_cast_fp16)[name = string("reshape_36_cast_fp16")]; tensor concat_229 = const()[name = string("concat_229"), val = tensor([32, 1024, 64])]; tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_355_cast_fp16)[name = string("reshape_37_cast_fp16")]; bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 32, 64, 64])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor var_3010_perm_0 = const()[name = string("op_3010_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3012 = const()[name = string("op_3012"), val = tensor([1, 64, 2048])]; tensor var_3010_cast_fp16 = transpose(perm = var_3010_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_21")]; tensor input_173_cast_fp16 = reshape(shape = var_3012, x = var_3010_cast_fp16)[name = string("input_173_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1206851008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209996800))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_173_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = linear_12_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor mean_51_cast_fp16 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_51_cast_fp16")]; tensor input_175_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_51_cast_fp16)[name = string("input_175_cast_fp16")]; tensor var_3023_axes_0 = const()[name = string("op_3023_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515407296)))]; tensor var_3023_cast_fp16 = layer_norm(axes = var_3023_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_175_cast_fp16)[name = string("op_3023_cast_fp16")]; tensor var_3030 = const()[name = string("op_3030"), val = tensor([0, 2, 1])]; tensor input_177_axes_0 = const()[name = string("input_177_axes_0"), val = tensor([2])]; tensor var_3031 = transpose(perm = var_3030, x = var_3023_cast_fp16)[name = string("transpose_20")]; tensor input_177 = expand_dims(axes = input_177_axes_0, x = var_3031)[name = string("input_177")]; string input_179_pad_type_0 = const()[name = string("input_179_pad_type_0"), val = string("valid")]; tensor input_179_strides_0 = const()[name = string("input_179_strides_0"), val = tensor([1, 1])]; tensor input_179_pad_0 = const()[name = string("input_179_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_179_dilations_0 = const()[name = string("input_179_dilations_0"), val = tensor([1, 1])]; int32 input_179_groups_0 = const()[name = string("input_179_groups_0"), val = int32(1)]; tensor input_179 = conv(dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_177)[name = string("input_179")]; string up_states_25_pad_type_0 = const()[name = string("up_states_25_pad_type_0"), val = string("valid")]; tensor up_states_25_strides_0 = const()[name = string("up_states_25_strides_0"), val = tensor([1, 1])]; tensor up_states_25_pad_0 = const()[name = string("up_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_25_dilations_0 = const()[name = string("up_states_25_dilations_0"), val = tensor([1, 1])]; int32 up_states_25_groups_0 = const()[name = string("up_states_25_groups_0"), val = int32(1)]; tensor up_states_25 = conv(dilations = up_states_25_dilations_0, groups = up_states_25_groups_0, pad = up_states_25_pad_0, pad_type = up_states_25_pad_type_0, strides = up_states_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_177)[name = string("up_states_25")]; tensor gate_states_25 = silu(x = input_179)[name = string("gate_states_25")]; tensor input_181 = mul(x = gate_states_25, y = up_states_25)[name = string("input_181")]; string hidden_states_103_pad_type_0 = const()[name = string("hidden_states_103_pad_type_0"), val = string("valid")]; tensor hidden_states_103_strides_0 = const()[name = string("hidden_states_103_strides_0"), val = tensor([1, 1])]; tensor hidden_states_103_pad_0 = const()[name = string("hidden_states_103_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_103_dilations_0 = const()[name = string("hidden_states_103_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_103_groups_0 = const()[name = string("hidden_states_103_groups_0"), val = int32(1)]; tensor hidden_states_103 = conv(dilations = hidden_states_103_dilations_0, groups = hidden_states_103_groups_0, pad = hidden_states_103_pad_0, pad_type = hidden_states_103_pad_type_0, strides = hidden_states_103_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_181)[name = string("hidden_states_103")]; tensor var_3053_axes_0 = const()[name = string("op_3053_axes_0"), val = tensor([2])]; tensor var_3053 = squeeze(axes = var_3053_axes_0, x = hidden_states_103)[name = string("op_3053")]; tensor var_3054 = const()[name = string("op_3054"), val = tensor([0, 2, 1])]; tensor var_3055 = transpose(perm = var_3054, x = var_3053)[name = string("transpose_19")]; tensor hidden_states_105_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_3055)[name = string("hidden_states_105_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor mean_53_cast_fp16 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = hidden_states_105_cast_fp16)[name = string("mean_53_cast_fp16")]; tensor input_183_cast_fp16 = sub(x = hidden_states_105_cast_fp16, y = mean_53_cast_fp16)[name = string("input_183_cast_fp16")]; tensor var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515411456)))]; tensor var_3063_cast_fp16 = layer_norm(axes = var_3063_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_183_cast_fp16)[name = string("op_3063_cast_fp16")]; tensor var_3067 = const()[name = string("op_3067"), val = tensor([0, 2, 1])]; tensor var_3069_axes_0 = const()[name = string("op_3069_axes_0"), val = tensor([2])]; tensor var_3068 = transpose(perm = var_3067, x = var_3063_cast_fp16)[name = string("transpose_18")]; tensor var_3069 = expand_dims(axes = var_3069_axes_0, x = var_3068)[name = string("op_3069")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor query_states_53 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_3069)[name = string("query_states_53")]; string key_states_79_pad_type_0 = const()[name = string("key_states_79_pad_type_0"), val = string("valid")]; tensor key_states_79_strides_0 = const()[name = string("key_states_79_strides_0"), val = tensor([1, 1])]; tensor key_states_79_pad_0 = const()[name = string("key_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_79_dilations_0 = const()[name = string("key_states_79_dilations_0"), val = tensor([1, 1])]; int32 key_states_79_groups_0 = const()[name = string("key_states_79_groups_0"), val = int32(1)]; tensor key_states_79 = conv(dilations = key_states_79_dilations_0, groups = key_states_79_groups_0, pad = key_states_79_pad_0, pad_type = key_states_79_pad_type_0, strides = key_states_79_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_3069)[name = string("key_states_79")]; string value_states_79_pad_type_0 = const()[name = string("value_states_79_pad_type_0"), val = string("valid")]; tensor value_states_79_strides_0 = const()[name = string("value_states_79_strides_0"), val = tensor([1, 1])]; tensor value_states_79_pad_0 = const()[name = string("value_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_79_dilations_0 = const()[name = string("value_states_79_dilations_0"), val = tensor([1, 1])]; int32 value_states_79_groups_0 = const()[name = string("value_states_79_groups_0"), val = int32(1)]; tensor value_states_79 = conv(dilations = value_states_79_dilations_0, groups = value_states_79_groups_0, pad = value_states_79_pad_0, pad_type = value_states_79_pad_type_0, strides = value_states_79_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_3069)[name = string("value_states_79")]; tensor var_3089 = const()[name = string("op_3089"), val = tensor([1, 32, 64, 64])]; tensor var_3090 = reshape(shape = var_3089, x = query_states_53)[name = string("op_3090")]; tensor var_3091 = const()[name = string("op_3091"), val = tensor([0, 1, 3, 2])]; tensor var_3093 = const()[name = string("op_3093"), val = tensor([1, 8, 64, 64])]; tensor var_3094 = reshape(shape = var_3093, x = key_states_79)[name = string("op_3094")]; tensor var_3095 = const()[name = string("op_3095"), val = tensor([0, 1, 3, 2])]; tensor var_3097 = const()[name = string("op_3097"), val = tensor([1, 8, 64, 64])]; tensor var_3098 = reshape(shape = var_3097, x = value_states_79)[name = string("op_3098")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([0, 1, 3, 2])]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_365 = transpose(perm = var_3091, x = var_3090)[name = string("transpose_17")]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = x_365)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = x_365)[name = string("x2_53")]; tensor var_3117 = mul(x = x1_53, y = cos_7)[name = string("op_3117")]; tensor var_3118 = mul(x = x2_53, y = sin_7)[name = string("op_3118")]; tensor var_3119 = sub(x = var_3117, y = var_3118)[name = string("op_3119")]; tensor var_3120 = mul(x = x2_53, y = cos_7)[name = string("op_3120")]; tensor var_3121 = mul(x = x1_53, y = sin_7)[name = string("op_3121")]; tensor var_3122 = add(x = var_3120, y = var_3121)[name = string("op_3122")]; bool rotated_53_interleave_0 = const()[name = string("rotated_53_interleave_0"), val = bool(false)]; tensor rotated_53 = concat(axis = var_75, interleave = rotated_53_interleave_0, values = (var_3119, var_3122))[name = string("rotated_53")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_369 = transpose(perm = var_3095, x = var_3094)[name = string("transpose_16")]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = x_369)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = x_369)[name = string("x2_55")]; tensor var_3138 = mul(x = x1_55, y = cos_7)[name = string("op_3138")]; tensor var_3139 = mul(x = x2_55, y = sin_7)[name = string("op_3139")]; tensor var_3140 = sub(x = var_3138, y = var_3139)[name = string("op_3140")]; tensor var_3141 = mul(x = x2_55, y = cos_7)[name = string("op_3141")]; tensor var_3142 = mul(x = x1_55, y = sin_7)[name = string("op_3142")]; tensor var_3143 = add(x = var_3141, y = var_3142)[name = string("op_3143")]; bool rotated_55_interleave_0 = const()[name = string("rotated_55_interleave_0"), val = bool(false)]; tensor rotated_55 = concat(axis = var_75, interleave = rotated_55_interleave_0, values = (var_3140, var_3143))[name = string("rotated_55")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, var_591, concat_237_values3_0))[name = string("concat_237")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = rotated_55, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_58")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([29])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([30])]; int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, var_591, concat_241_values3_0))[name = string("concat_241")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_81 = transpose(perm = var_3099, x = var_3098)[name = string("transpose_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = value_states_81, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_59")]; tensor var_3166_begin_0 = const()[name = string("op_3166_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3166_end_0 = const()[name = string("op_3166_end_0"), val = tensor([14, 8, 1024, 64])]; tensor var_3166_end_mask_0 = const()[name = string("op_3166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3166_cast_fp16 = slice_by_index(begin = var_3166_begin_0, end = var_3166_end_0, end_mask = var_3166_end_mask_0, x = coreml_update_state_59)[name = string("op_3166_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_3166_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_3168_begin_0 = const()[name = string("op_3168_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_3168_end_0 = const()[name = string("op_3168_end_0"), val = tensor([30, 8, 1024, 64])]; tensor var_3168_end_mask_0 = const()[name = string("op_3168_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3168_cast_fp16 = slice_by_index(begin = var_3168_begin_0, end = var_3168_end_0, end_mask = var_3168_end_mask_0, x = coreml_update_state_59)[name = string("op_3168_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_3168_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_375_axes_0 = const()[name = string("x_375_axes_0"), val = tensor([1])]; tensor x_375_cast_fp16 = expand_dims(axes = x_375_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_375_cast_fp16")]; tensor var_3177 = const()[name = string("op_3177"), val = tensor([1, 4, 1, 1])]; tensor x_377_cast_fp16 = tile(reps = var_3177, x = x_375_cast_fp16)[name = string("x_377_cast_fp16")]; tensor var_3181 = const()[name = string("op_3181"), val = tensor([1, -1, 1024, 64])]; tensor var_3182_cast_fp16 = reshape(shape = var_3181, x = x_377_cast_fp16)[name = string("op_3182_cast_fp16")]; tensor x_381_axes_0 = const()[name = string("x_381_axes_0"), val = tensor([1])]; tensor x_381_cast_fp16 = expand_dims(axes = x_381_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_381_cast_fp16")]; tensor var_3184 = const()[name = string("op_3184"), val = tensor([1, 4, 1, 1])]; tensor x_383_cast_fp16 = tile(reps = var_3184, x = x_381_cast_fp16)[name = string("x_383_cast_fp16")]; bool var_3191_transpose_x_0 = const()[name = string("op_3191_transpose_x_0"), val = bool(false)]; bool var_3191_transpose_y_0 = const()[name = string("op_3191_transpose_y_0"), val = bool(true)]; tensor var_3191_cast_fp16 = matmul(transpose_x = var_3191_transpose_x_0, transpose_y = var_3191_transpose_y_0, x = rotated_53, y = var_3182_cast_fp16)[name = string("op_3191_cast_fp16")]; fp16 var_3192_to_fp16 = const()[name = string("op_3192_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_27_cast_fp16 = mul(x = var_3191_cast_fp16, y = var_3192_to_fp16)[name = string("attn_weights_27_cast_fp16")]; tensor x_385_cast_fp16 = add(x = attn_weights_27_cast_fp16, y = causal_mask)[name = string("x_385_cast_fp16")]; tensor reduce_max_13_axes_0 = const()[name = string("reduce_max_13_axes_0"), val = tensor([-1])]; bool reduce_max_13_keep_dims_0 = const()[name = string("reduce_max_13_keep_dims_0"), val = bool(true)]; tensor reduce_max_13_cast_fp16 = reduce_max(axes = reduce_max_13_axes_0, keep_dims = reduce_max_13_keep_dims_0, x = x_385_cast_fp16)[name = string("reduce_max_13_cast_fp16")]; tensor x_387_cast_fp16 = sub(x = x_385_cast_fp16, y = reduce_max_13_cast_fp16)[name = string("x_387_cast_fp16")]; tensor exp_x_27_cast_fp16 = exp(x = x_387_cast_fp16)[name = string("exp_x_27_cast_fp16")]; tensor var_3203_axes_0 = const()[name = string("op_3203_axes_0"), val = tensor([-1])]; bool var_3203_keep_dims_0 = const()[name = string("op_3203_keep_dims_0"), val = bool(true)]; tensor var_3203_cast_fp16 = reduce_sum(axes = var_3203_axes_0, keep_dims = var_3203_keep_dims_0, x = exp_x_27_cast_fp16)[name = string("op_3203_cast_fp16")]; tensor var_3204_cast_fp16 = real_div(x = exp_x_27_cast_fp16, y = var_3203_cast_fp16)[name = string("op_3204_cast_fp16")]; tensor concat_246 = const()[name = string("concat_246"), val = tensor([32, 64, 1024])]; tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_3204_cast_fp16)[name = string("reshape_39_cast_fp16")]; tensor concat_247 = const()[name = string("concat_247"), val = tensor([32, 1024, 64])]; tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_383_cast_fp16)[name = string("reshape_40_cast_fp16")]; bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 32, 64, 64])]; tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor var_3207_perm_0 = const()[name = string("op_3207_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3209 = const()[name = string("op_3209"), val = tensor([1, 64, 2048])]; tensor var_3207_cast_fp16 = transpose(perm = var_3207_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_14")]; tensor input_187_cast_fp16 = reshape(shape = var_3209, x = var_3207_cast_fp16)[name = string("input_187_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210029632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213175424))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_187_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor hidden_states_109_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_109_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_189_cast_fp16 = sub(x = hidden_states_109_cast_fp16, y = mean_55_cast_fp16)[name = string("input_189_cast_fp16")]; tensor var_3220_axes_0 = const()[name = string("op_3220_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517521088)))]; tensor var_3220_cast_fp16 = layer_norm(axes = var_3220_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_189_cast_fp16)[name = string("op_3220_cast_fp16")]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([0, 2, 1])]; tensor input_191_axes_0 = const()[name = string("input_191_axes_0"), val = tensor([2])]; tensor var_3228 = transpose(perm = var_3227, x = var_3220_cast_fp16)[name = string("transpose_13")]; tensor input_191 = expand_dims(axes = input_191_axes_0, x = var_3228)[name = string("input_191")]; string input_193_pad_type_0 = const()[name = string("input_193_pad_type_0"), val = string("valid")]; tensor input_193_strides_0 = const()[name = string("input_193_strides_0"), val = tensor([1, 1])]; tensor input_193_pad_0 = const()[name = string("input_193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_193_dilations_0 = const()[name = string("input_193_dilations_0"), val = tensor([1, 1])]; int32 input_193_groups_0 = const()[name = string("input_193_groups_0"), val = int32(1)]; tensor input_193 = conv(dilations = input_193_dilations_0, groups = input_193_groups_0, pad = input_193_pad_0, pad_type = input_193_pad_type_0, strides = input_193_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_191)[name = string("input_193")]; string up_states_27_pad_type_0 = const()[name = string("up_states_27_pad_type_0"), val = string("valid")]; tensor up_states_27_strides_0 = const()[name = string("up_states_27_strides_0"), val = tensor([1, 1])]; tensor up_states_27_pad_0 = const()[name = string("up_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_27_dilations_0 = const()[name = string("up_states_27_dilations_0"), val = tensor([1, 1])]; int32 up_states_27_groups_0 = const()[name = string("up_states_27_groups_0"), val = int32(1)]; tensor up_states_27 = conv(dilations = up_states_27_dilations_0, groups = up_states_27_groups_0, pad = up_states_27_pad_0, pad_type = up_states_27_pad_type_0, strides = up_states_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_191)[name = string("up_states_27")]; tensor gate_states_27 = silu(x = input_193)[name = string("gate_states_27")]; tensor input_195 = mul(x = gate_states_27, y = up_states_27)[name = string("input_195")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor hidden_states_111 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_195)[name = string("hidden_states_111")]; tensor var_3250_axes_0 = const()[name = string("op_3250_axes_0"), val = tensor([2])]; tensor var_3250 = squeeze(axes = var_3250_axes_0, x = hidden_states_111)[name = string("op_3250")]; tensor var_3251 = const()[name = string("op_3251"), val = tensor([0, 2, 1])]; tensor var_3252 = transpose(perm = var_3251, x = var_3250)[name = string("transpose_12")]; tensor hidden_states_113_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = var_3252)[name = string("hidden_states_113_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_113_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_197_cast_fp16 = sub(x = hidden_states_113_cast_fp16, y = mean_57_cast_fp16)[name = string("input_197_cast_fp16")]; tensor var_3260_axes_0 = const()[name = string("op_3260_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517525248)))]; tensor var_3260_cast_fp16 = layer_norm(axes = var_3260_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_197_cast_fp16)[name = string("op_3260_cast_fp16")]; tensor var_3264 = const()[name = string("op_3264"), val = tensor([0, 2, 1])]; tensor var_3266_axes_0 = const()[name = string("op_3266_axes_0"), val = tensor([2])]; tensor var_3265 = transpose(perm = var_3264, x = var_3260_cast_fp16)[name = string("transpose_11")]; tensor var_3266 = expand_dims(axes = var_3266_axes_0, x = var_3265)[name = string("op_3266")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_3266)[name = string("query_states_57")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor key_states_85 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_3266)[name = string("key_states_85")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor value_states_85 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_3266)[name = string("value_states_85")]; tensor var_3286 = const()[name = string("op_3286"), val = tensor([1, 32, 64, 64])]; tensor var_3287 = reshape(shape = var_3286, x = query_states_57)[name = string("op_3287")]; tensor var_3288 = const()[name = string("op_3288"), val = tensor([0, 1, 3, 2])]; tensor var_3290 = const()[name = string("op_3290"), val = tensor([1, 8, 64, 64])]; tensor var_3291 = reshape(shape = var_3290, x = key_states_85)[name = string("op_3291")]; tensor var_3292 = const()[name = string("op_3292"), val = tensor([0, 1, 3, 2])]; tensor var_3294 = const()[name = string("op_3294"), val = tensor([1, 8, 64, 64])]; tensor var_3295 = reshape(shape = var_3294, x = value_states_85)[name = string("op_3295")]; tensor var_3296 = const()[name = string("op_3296"), val = tensor([0, 1, 3, 2])]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_393 = transpose(perm = var_3288, x = var_3287)[name = string("transpose_10")]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = x_393)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = x_393)[name = string("x2_57")]; tensor var_3314 = mul(x = x1_57, y = cos_7)[name = string("op_3314")]; tensor var_3315 = mul(x = x2_57, y = sin_7)[name = string("op_3315")]; tensor var_3316 = sub(x = var_3314, y = var_3315)[name = string("op_3316")]; tensor var_3317 = mul(x = x2_57, y = cos_7)[name = string("op_3317")]; tensor var_3318 = mul(x = x1_57, y = sin_7)[name = string("op_3318")]; tensor var_3319 = add(x = var_3317, y = var_3318)[name = string("op_3319")]; bool rotated_57_interleave_0 = const()[name = string("rotated_57_interleave_0"), val = bool(false)]; tensor rotated_57 = concat(axis = var_75, interleave = rotated_57_interleave_0, values = (var_3316, var_3319))[name = string("rotated_57")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_397 = transpose(perm = var_3292, x = var_3291)[name = string("transpose_9")]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = x_397)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = x_397)[name = string("x2_59")]; tensor var_3335 = mul(x = x1_59, y = cos_7)[name = string("op_3335")]; tensor var_3336 = mul(x = x2_59, y = sin_7)[name = string("op_3336")]; tensor var_3337 = sub(x = var_3335, y = var_3336)[name = string("op_3337")]; tensor var_3338 = mul(x = x2_59, y = cos_7)[name = string("op_3338")]; tensor var_3339 = mul(x = x1_59, y = sin_7)[name = string("op_3339")]; tensor var_3340 = add(x = var_3338, y = var_3339)[name = string("op_3340")]; bool rotated_59_interleave_0 = const()[name = string("rotated_59_interleave_0"), val = bool(false)]; tensor rotated_59 = concat(axis = var_75, interleave = rotated_59_interleave_0, values = (var_3337, var_3340))[name = string("rotated_59")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, var_591, concat_255_values3_0))[name = string("concat_255")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = rotated_59, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_60")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([30])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([31])]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, var_591, concat_259_values3_0))[name = string("concat_259")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87 = transpose(perm = var_3296, x = var_3295)[name = string("transpose_8")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = value_states_87, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_61")]; tensor var_3363_begin_0 = const()[name = string("op_3363_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_3363_end_0 = const()[name = string("op_3363_end_0"), val = tensor([15, 8, 1024, 64])]; tensor var_3363_end_mask_0 = const()[name = string("op_3363_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3363_cast_fp16 = slice_by_index(begin = var_3363_begin_0, end = var_3363_end_0, end_mask = var_3363_end_mask_0, x = coreml_update_state_61)[name = string("op_3363_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_3363_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_3365_begin_0 = const()[name = string("op_3365_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_3365_end_0 = const()[name = string("op_3365_end_0"), val = tensor([31, 8, 1024, 64])]; tensor var_3365_end_mask_0 = const()[name = string("op_3365_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3365_cast_fp16 = slice_by_index(begin = var_3365_begin_0, end = var_3365_end_0, end_mask = var_3365_end_mask_0, x = coreml_update_state_61)[name = string("op_3365_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_3365_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_403_axes_0 = const()[name = string("x_403_axes_0"), val = tensor([1])]; tensor x_403_cast_fp16 = expand_dims(axes = x_403_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_403_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 4, 1, 1])]; tensor x_405_cast_fp16 = tile(reps = var_3374, x = x_403_cast_fp16)[name = string("x_405_cast_fp16")]; tensor var_3378 = const()[name = string("op_3378"), val = tensor([1, -1, 1024, 64])]; tensor var_3379_cast_fp16 = reshape(shape = var_3378, x = x_405_cast_fp16)[name = string("op_3379_cast_fp16")]; tensor x_409_axes_0 = const()[name = string("x_409_axes_0"), val = tensor([1])]; tensor x_409_cast_fp16 = expand_dims(axes = x_409_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 4, 1, 1])]; tensor x_411_cast_fp16 = tile(reps = var_3381, x = x_409_cast_fp16)[name = string("x_411_cast_fp16")]; bool var_3388_transpose_x_0 = const()[name = string("op_3388_transpose_x_0"), val = bool(false)]; bool var_3388_transpose_y_0 = const()[name = string("op_3388_transpose_y_0"), val = bool(true)]; tensor var_3388_cast_fp16 = matmul(transpose_x = var_3388_transpose_x_0, transpose_y = var_3388_transpose_y_0, x = rotated_57, y = var_3379_cast_fp16)[name = string("op_3388_cast_fp16")]; fp16 var_3389_to_fp16 = const()[name = string("op_3389_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_29_cast_fp16 = mul(x = var_3388_cast_fp16, y = var_3389_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor x_413_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_413_cast_fp16")]; tensor reduce_max_14_axes_0 = const()[name = string("reduce_max_14_axes_0"), val = tensor([-1])]; bool reduce_max_14_keep_dims_0 = const()[name = string("reduce_max_14_keep_dims_0"), val = bool(true)]; tensor reduce_max_14_cast_fp16 = reduce_max(axes = reduce_max_14_axes_0, keep_dims = reduce_max_14_keep_dims_0, x = x_413_cast_fp16)[name = string("reduce_max_14_cast_fp16")]; tensor x_415_cast_fp16 = sub(x = x_413_cast_fp16, y = reduce_max_14_cast_fp16)[name = string("x_415_cast_fp16")]; tensor exp_x_29_cast_fp16 = exp(x = x_415_cast_fp16)[name = string("exp_x_29_cast_fp16")]; tensor var_3400_axes_0 = const()[name = string("op_3400_axes_0"), val = tensor([-1])]; bool var_3400_keep_dims_0 = const()[name = string("op_3400_keep_dims_0"), val = bool(true)]; tensor var_3400_cast_fp16 = reduce_sum(axes = var_3400_axes_0, keep_dims = var_3400_keep_dims_0, x = exp_x_29_cast_fp16)[name = string("op_3400_cast_fp16")]; tensor var_3401_cast_fp16 = real_div(x = exp_x_29_cast_fp16, y = var_3400_cast_fp16)[name = string("op_3401_cast_fp16")]; tensor concat_264 = const()[name = string("concat_264"), val = tensor([32, 64, 1024])]; tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_3401_cast_fp16)[name = string("reshape_42_cast_fp16")]; tensor concat_265 = const()[name = string("concat_265"), val = tensor([32, 1024, 64])]; tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_411_cast_fp16)[name = string("reshape_43_cast_fp16")]; bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 32, 64, 64])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor var_3404_perm_0 = const()[name = string("op_3404_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3406 = const()[name = string("op_3406"), val = tensor([1, 64, 2048])]; tensor var_3404_cast_fp16 = transpose(perm = var_3404_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_7")]; tensor input_201_cast_fp16 = reshape(shape = var_3406, x = var_3404_cast_fp16)[name = string("input_201_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213208256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1216354048))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor hidden_states_117_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = linear_14_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor mean_59_cast_fp16 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = hidden_states_117_cast_fp16)[name = string("mean_59_cast_fp16")]; tensor input_203_cast_fp16 = sub(x = hidden_states_117_cast_fp16, y = mean_59_cast_fp16)[name = string("input_203_cast_fp16")]; tensor var_3417_axes_0 = const()[name = string("op_3417_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519634880)))]; tensor var_3417_cast_fp16 = layer_norm(axes = var_3417_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_203_cast_fp16)[name = string("op_3417_cast_fp16")]; tensor var_3424 = const()[name = string("op_3424"), val = tensor([0, 2, 1])]; tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; tensor var_3425 = transpose(perm = var_3424, x = var_3417_cast_fp16)[name = string("transpose_6")]; tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3425)[name = string("input_205")]; string input_207_pad_type_0 = const()[name = string("input_207_pad_type_0"), val = string("valid")]; tensor input_207_strides_0 = const()[name = string("input_207_strides_0"), val = tensor([1, 1])]; tensor input_207_pad_0 = const()[name = string("input_207_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_207_dilations_0 = const()[name = string("input_207_dilations_0"), val = tensor([1, 1])]; int32 input_207_groups_0 = const()[name = string("input_207_groups_0"), val = int32(1)]; tensor input_207 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_205)[name = string("input_207")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_205)[name = string("up_states")]; tensor gate_states = silu(x = input_207)[name = string("gate_states")]; tensor input_209 = mul(x = gate_states, y = up_states)[name = string("input_209")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor hidden_states_119 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_209)[name = string("hidden_states_119")]; tensor var_3447_axes_0 = const()[name = string("op_3447_axes_0"), val = tensor([2])]; tensor var_3447 = squeeze(axes = var_3447_axes_0, x = hidden_states_119)[name = string("op_3447")]; tensor var_3448 = const()[name = string("op_3448"), val = tensor([0, 2, 1])]; tensor var_3449 = transpose(perm = var_3448, x = var_3447)[name = string("transpose_5")]; tensor hidden_states_121_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = var_3449)[name = string("hidden_states_121_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_121_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_211_cast_fp16 = sub(x = hidden_states_121_cast_fp16, y = mean_cast_fp16)[name = string("input_211_cast_fp16")]; tensor var_3457_axes_0 = const()[name = string("op_3457_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519639040)))]; tensor var_3457_cast_fp16 = layer_norm(axes = var_3457_axes_0, epsilon = var_77_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_211_cast_fp16)[name = string("op_3457_cast_fp16")]; tensor var_3461 = const()[name = string("op_3461"), val = tensor([0, 2, 1])]; tensor var_3463_axes_0 = const()[name = string("op_3463_axes_0"), val = tensor([2])]; tensor var_3462 = transpose(perm = var_3461, x = var_3457_cast_fp16)[name = string("transpose_4")]; tensor var_3463 = expand_dims(axes = var_3463_axes_0, x = var_3462)[name = string("op_3463")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor query_states_61 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_3463)[name = string("query_states_61")]; string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_3463)[name = string("key_states_91")]; string value_states_91_pad_type_0 = const()[name = string("value_states_91_pad_type_0"), val = string("valid")]; tensor value_states_91_strides_0 = const()[name = string("value_states_91_strides_0"), val = tensor([1, 1])]; tensor value_states_91_pad_0 = const()[name = string("value_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_91_dilations_0 = const()[name = string("value_states_91_dilations_0"), val = tensor([1, 1])]; int32 value_states_91_groups_0 = const()[name = string("value_states_91_groups_0"), val = int32(1)]; tensor value_states_91 = conv(dilations = value_states_91_dilations_0, groups = value_states_91_groups_0, pad = value_states_91_pad_0, pad_type = value_states_91_pad_type_0, strides = value_states_91_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_3463)[name = string("value_states_91")]; tensor var_3483 = const()[name = string("op_3483"), val = tensor([1, 32, 64, 64])]; tensor var_3484 = reshape(shape = var_3483, x = query_states_61)[name = string("op_3484")]; tensor var_3485 = const()[name = string("op_3485"), val = tensor([0, 1, 3, 2])]; tensor var_3487 = const()[name = string("op_3487"), val = tensor([1, 8, 64, 64])]; tensor var_3488 = reshape(shape = var_3487, x = key_states_91)[name = string("op_3488")]; tensor var_3489 = const()[name = string("op_3489"), val = tensor([0, 1, 3, 2])]; tensor var_3491 = const()[name = string("op_3491"), val = tensor([1, 8, 64, 64])]; tensor var_3492 = reshape(shape = var_3491, x = value_states_91)[name = string("op_3492")]; tensor var_3493 = const()[name = string("op_3493"), val = tensor([0, 1, 3, 2])]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_421 = transpose(perm = var_3485, x = var_3484)[name = string("transpose_3")]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = x_421)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = x_421)[name = string("x2_61")]; tensor var_3511 = mul(x = x1_61, y = cos_7)[name = string("op_3511")]; tensor var_3512 = mul(x = x2_61, y = sin_7)[name = string("op_3512")]; tensor var_3513 = sub(x = var_3511, y = var_3512)[name = string("op_3513")]; tensor var_3514 = mul(x = x2_61, y = cos_7)[name = string("op_3514")]; tensor var_3515 = mul(x = x1_61, y = sin_7)[name = string("op_3515")]; tensor var_3516 = add(x = var_3514, y = var_3515)[name = string("op_3516")]; bool rotated_61_interleave_0 = const()[name = string("rotated_61_interleave_0"), val = bool(false)]; tensor rotated_61 = concat(axis = var_75, interleave = rotated_61_interleave_0, values = (var_3513, var_3516))[name = string("rotated_61")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_425 = transpose(perm = var_3489, x = var_3488)[name = string("transpose_2")]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_425)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_425)[name = string("x2")]; tensor var_3532 = mul(x = x1, y = cos_7)[name = string("op_3532")]; tensor var_3533 = mul(x = x2, y = sin_7)[name = string("op_3533")]; tensor var_3534 = sub(x = var_3532, y = var_3533)[name = string("op_3534")]; tensor var_3535 = mul(x = x2, y = cos_7)[name = string("op_3535")]; tensor var_3536 = mul(x = x1, y = sin_7)[name = string("op_3536")]; tensor var_3537 = add(x = var_3535, y = var_3536)[name = string("op_3537")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated = concat(axis = var_75, interleave = rotated_interleave_0, values = (var_3534, var_3537))[name = string("rotated")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, var_591, concat_273_values3_0))[name = string("concat_273")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = rotated, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_62")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([31])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([32])]; int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, var_591, concat_277_values3_0))[name = string("concat_277")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_93 = transpose(perm = var_3493, x = var_3492)[name = string("transpose_1")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = value_states_93, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_63")]; tensor var_3560_begin_0 = const()[name = string("op_3560_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_3560_end_0 = const()[name = string("op_3560_end_0"), val = tensor([16, 8, 1024, 64])]; tensor var_3560_end_mask_0 = const()[name = string("op_3560_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3560_cast_fp16 = slice_by_index(begin = var_3560_begin_0, end = var_3560_end_0, end_mask = var_3560_end_mask_0, x = coreml_update_state_63)[name = string("op_3560_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_3560_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_3562_begin_0 = const()[name = string("op_3562_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_3562_end_0 = const()[name = string("op_3562_end_0"), val = tensor([1, 8, 1024, 64])]; tensor var_3562_end_mask_0 = const()[name = string("op_3562_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3562_cast_fp16 = slice_by_index(begin = var_3562_begin_0, end = var_3562_end_0, end_mask = var_3562_end_mask_0, x = coreml_update_state_63)[name = string("op_3562_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_3562_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_431_axes_0 = const()[name = string("x_431_axes_0"), val = tensor([1])]; tensor x_431_cast_fp16 = expand_dims(axes = x_431_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_431_cast_fp16")]; tensor var_3571 = const()[name = string("op_3571"), val = tensor([1, 4, 1, 1])]; tensor x_433_cast_fp16 = tile(reps = var_3571, x = x_431_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_3575 = const()[name = string("op_3575"), val = tensor([1, -1, 1024, 64])]; tensor var_3576_cast_fp16 = reshape(shape = var_3575, x = x_433_cast_fp16)[name = string("op_3576_cast_fp16")]; tensor x_437_axes_0 = const()[name = string("x_437_axes_0"), val = tensor([1])]; tensor x_437_cast_fp16 = expand_dims(axes = x_437_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_437_cast_fp16")]; tensor var_3578 = const()[name = string("op_3578"), val = tensor([1, 4, 1, 1])]; tensor x_439_cast_fp16 = tile(reps = var_3578, x = x_437_cast_fp16)[name = string("x_439_cast_fp16")]; bool var_3585_transpose_x_0 = const()[name = string("op_3585_transpose_x_0"), val = bool(false)]; bool var_3585_transpose_y_0 = const()[name = string("op_3585_transpose_y_0"), val = bool(true)]; tensor var_3585_cast_fp16 = matmul(transpose_x = var_3585_transpose_x_0, transpose_y = var_3585_transpose_y_0, x = rotated_61, y = var_3576_cast_fp16)[name = string("op_3585_cast_fp16")]; fp16 var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_cast_fp16 = mul(x = var_3585_cast_fp16, y = var_3586_to_fp16)[name = string("attn_weights_cast_fp16")]; tensor x_441_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_441_cast_fp16")]; tensor reduce_max_15_axes_0 = const()[name = string("reduce_max_15_axes_0"), val = tensor([-1])]; bool reduce_max_15_keep_dims_0 = const()[name = string("reduce_max_15_keep_dims_0"), val = bool(true)]; tensor reduce_max_15_cast_fp16 = reduce_max(axes = reduce_max_15_axes_0, keep_dims = reduce_max_15_keep_dims_0, x = x_441_cast_fp16)[name = string("reduce_max_15_cast_fp16")]; tensor x_cast_fp16 = sub(x = x_441_cast_fp16, y = reduce_max_15_cast_fp16)[name = string("x_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_3597_axes_0 = const()[name = string("op_3597_axes_0"), val = tensor([-1])]; bool var_3597_keep_dims_0 = const()[name = string("op_3597_keep_dims_0"), val = bool(true)]; tensor var_3597_cast_fp16 = reduce_sum(axes = var_3597_axes_0, keep_dims = var_3597_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_3597_cast_fp16")]; tensor var_3598_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_3597_cast_fp16)[name = string("op_3598_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([32, 64, 1024])]; tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_3598_cast_fp16)[name = string("reshape_45_cast_fp16")]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([32, 1024, 64])]; tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_439_cast_fp16)[name = string("reshape_46_cast_fp16")]; bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 32, 64, 64])]; tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor var_3601_perm_0 = const()[name = string("op_3601_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3603 = const()[name = string("op_3603"), val = tensor([1, 64, 2048])]; tensor var_3601_cast_fp16 = transpose(perm = var_3601_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_0")]; tensor input_cast_fp16 = reshape(shape = var_3603, x = var_3601_cast_fp16)[name = string("input_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1216386880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1219532672))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = linear_15_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor var_3609_begin_0 = const()[name = string("op_3609_begin_0"), val = tensor([0, 0, 0])]; tensor var_3609_end_0 = const()[name = string("op_3609_end_0"), val = tensor([1, 1, 2048])]; tensor var_3609_end_mask_0 = const()[name = string("op_3609_end_mask_0"), val = tensor([true, false, true])]; tensor output_hidden_states = slice_by_index(begin = var_3609_begin_0, end = var_3609_end_0, end_mask = var_3609_end_mask_0, x = hidden_states_cast_fp16)[name = string("op_3609_cast_fp16")]; } -> (output_hidden_states); }