diff --git "a/Hermes_FFN_PF_lut6_chunk_03of03.mlmodelc/model.mil" "b/Hermes_FFN_PF_lut6_chunk_03of03.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Hermes_FFN_PF_lut6_chunk_03of03.mlmodelc/model.mil" @@ -0,0 +1,3697 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; + int32 var_59 = const()[name = string("op_59"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_293_axis_0 = const()[name = string("op_293_axis_0"), val = int32(1)]; + int32 var_293_batch_dims_0 = const()[name = string("op_293_batch_dims_0"), val = int32(0)]; + bool var_293_validate_indices_0 = const()[name = string("op_293_validate_indices_0"), val = bool(false)]; + tensor var_64_to_fp16 = const()[name = string("op_64_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_293_cast_fp16 = gather(axis = var_293_axis_0, batch_dims = var_293_batch_dims_0, indices = select_0, validate_indices = var_293_validate_indices_0, x = var_64_to_fp16)[name = string("op_293_cast_fp16")]; + tensor var_294 = const()[name = string("op_294"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_294, x = var_293_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + int32 var_298_batch_dims_0 = const()[name = string("op_298_batch_dims_0"), val = int32(0)]; + bool var_298_validate_indices_0 = const()[name = string("op_298_validate_indices_0"), val = bool(false)]; + tensor var_58_to_fp16 = const()[name = string("op_58_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_298_cast_fp16 = gather(axis = var_298_axis_0, batch_dims = var_298_batch_dims_0, indices = select_0, validate_indices = var_298_validate_indices_0, x = var_58_to_fp16)[name = string("op_298_cast_fp16")]; + tensor var_299 = const()[name = string("op_299"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_299, x = var_298_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_307_axes_0 = const()[name = string("op_307_axes_0"), val = tensor([-1])]; + tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_54_to_fp16 = const()[name = string("op_54_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_307_cast_fp16 = layer_norm(axes = var_307_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_307_cast_fp16")]; + tensor var_310 = const()[name = string("op_310"), val = tensor([0, 2, 1])]; + tensor var_312_axes_0 = const()[name = string("op_312_axes_0"), val = tensor([2])]; + tensor var_311 = transpose(perm = var_310, x = var_307_cast_fp16)[name = string("transpose_35")]; + tensor var_312 = expand_dims(axes = var_312_axes_0, x = var_311)[name = string("op_312")]; + string var_319_pad_type_0 = const()[name = string("op_319_pad_type_0"), val = string("valid")]; + tensor var_319_strides_0 = const()[name = string("op_319_strides_0"), val = tensor([1, 1])]; + tensor var_319_pad_0 = const()[name = string("op_319_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_319_dilations_0 = const()[name = string("op_319_dilations_0"), val = tensor([1, 1])]; + int32 var_319_groups_0 = const()[name = string("op_319_groups_0"), val = int32(1)]; + tensor var_319 = conv(dilations = var_319_dilations_0, groups = var_319_groups_0, pad = var_319_pad_0, pad_type = var_319_pad_type_0, strides = var_319_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_312)[name = string("op_319")]; + tensor var_320 = const()[name = string("op_320"), val = tensor([1, 24, 1, 128])]; + tensor var_321 = reshape(shape = var_320, x = var_319)[name = string("op_321")]; + string var_328_pad_type_0 = const()[name = string("op_328_pad_type_0"), val = string("valid")]; + tensor var_328_strides_0 = const()[name = string("op_328_strides_0"), val = tensor([1, 1])]; + tensor var_328_pad_0 = const()[name = string("op_328_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_328_dilations_0 = const()[name = string("op_328_dilations_0"), val = tensor([1, 1])]; + int32 var_328_groups_0 = const()[name = string("op_328_groups_0"), val = int32(1)]; + tensor var_328 = conv(dilations = var_328_dilations_0, groups = var_328_groups_0, pad = var_328_pad_0, pad_type = var_328_pad_type_0, strides = var_328_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_312)[name = string("op_328")]; + tensor var_329 = const()[name = string("op_329"), val = tensor([1, 8, 1, 128])]; + tensor var_330 = reshape(shape = var_329, x = var_328)[name = string("op_330")]; + string var_337_pad_type_0 = const()[name = string("op_337_pad_type_0"), val = string("valid")]; + tensor var_337_strides_0 = const()[name = string("op_337_strides_0"), val = tensor([1, 1])]; + tensor var_337_pad_0 = const()[name = string("op_337_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_dilations_0 = const()[name = string("op_337_dilations_0"), val = tensor([1, 1])]; + int32 var_337_groups_0 = const()[name = string("op_337_groups_0"), val = int32(1)]; + tensor var_337 = conv(dilations = var_337_dilations_0, groups = var_337_groups_0, pad = var_337_pad_0, pad_type = var_337_pad_type_0, strides = var_337_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_312)[name = string("op_337")]; + tensor var_338 = const()[name = string("op_338"), val = tensor([1, 8, 1, 128])]; + tensor var_339 = reshape(shape = var_338, x = var_337)[name = string("op_339")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_321)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_321)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_353_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_353_cast_fp16")]; + tensor var_354_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_354_cast_fp16")]; + tensor var_355_cast_fp16 = sub(x = var_353_cast_fp16, y = var_354_cast_fp16)[name = string("op_355_cast_fp16")]; + tensor var_356_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_356_cast_fp16")]; + tensor var_357_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_357_cast_fp16")]; + tensor var_358_cast_fp16 = add(x = var_356_cast_fp16, y = var_357_cast_fp16)[name = string("op_358_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_59, interleave = rotated_1_interleave_0, values = (var_355_cast_fp16, var_358_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_330)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_330)[name = string("x2_3")]; + tensor var_374_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_374_cast_fp16")]; + tensor var_375_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_375_cast_fp16")]; + tensor var_376_cast_fp16 = sub(x = var_374_cast_fp16, y = var_375_cast_fp16)[name = string("op_376_cast_fp16")]; + tensor var_377_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_377_cast_fp16")]; + tensor var_378_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_378_cast_fp16")]; + tensor var_379_cast_fp16 = add(x = var_377_cast_fp16, y = var_378_cast_fp16)[name = string("op_379_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_59, interleave = rotated_3_interleave_0, values = (var_376_cast_fp16, var_379_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_383 = const()[name = string("op_383"), val = int32(1)]; + tensor var_384 = add(x = current_pos, y = var_383)[name = string("op_384")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([18])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([19])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_384, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([46])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([47])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_384, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_339, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_399_begin_0 = const()[name = string("op_399_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_399_end_0 = const()[name = string("op_399_end_0"), val = tensor([19, 8, 1024, 128])]; + tensor var_399_end_mask_0 = const()[name = string("op_399_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = coreml_update_state_19)[name = string("op_399_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_399_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_401_begin_0 = const()[name = string("op_401_begin_0"), val = tensor([46, 0, 0, 0])]; + tensor var_401_end_0 = const()[name = string("op_401_end_0"), val = tensor([47, 8, 1024, 128])]; + tensor var_401_end_mask_0 = const()[name = string("op_401_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_401_cast_fp16 = slice_by_index(begin = var_401_begin_0, end = var_401_end_0, end_mask = var_401_end_mask_0, x = coreml_update_state_19)[name = string("op_401_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_401_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_410 = const()[name = string("op_410"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_410, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_414 = const()[name = string("op_414"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_414, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_417 = const()[name = string("op_417"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_417, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_421 = const()[name = string("op_421"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_421, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_424_transpose_x_1 = const()[name = string("op_424_transpose_x_1"), val = bool(false)]; + bool var_424_transpose_y_1 = const()[name = string("op_424_transpose_y_1"), val = bool(true)]; + tensor var_424_cast_fp16 = matmul(transpose_x = var_424_transpose_x_1, transpose_y = var_424_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_424_cast_fp16")]; + fp16 var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_424_cast_fp16, y = var_425_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_436_axes_0 = const()[name = string("op_436_axes_0"), val = tensor([-1])]; + bool var_436_keep_dims_0 = const()[name = string("op_436_keep_dims_0"), val = bool(true)]; + tensor var_436_cast_fp16 = reduce_sum(axes = var_436_axes_0, keep_dims = var_436_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_436_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_436_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_439_perm_0 = const()[name = string("op_439_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_441 = const()[name = string("op_441"), val = tensor([1, 1, 3072])]; + tensor var_439_cast_fp16 = transpose(perm = var_439_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_34")]; + tensor input_5_cast_fp16 = reshape(shape = var_441, x = var_439_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_452_axes_0 = const()[name = string("op_452_axes_0"), val = tensor([-1])]; + tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_452_cast_fp16 = layer_norm(axes = var_452_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_452_cast_fp16")]; + tensor var_459 = const()[name = string("op_459"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_460 = transpose(perm = var_459, x = var_452_cast_fp16)[name = string("transpose_33")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_460)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_482_axes_0 = const()[name = string("op_482_axes_0"), val = tensor([2])]; + tensor var_482 = squeeze(axes = var_482_axes_0, x = hidden_states_7)[name = string("op_482")]; + tensor var_483 = const()[name = string("op_483"), val = tensor([0, 2, 1])]; + tensor var_484 = transpose(perm = var_483, x = var_482)[name = string("transpose_32")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_484)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_492_axes_0 = const()[name = string("op_492_axes_0"), val = tensor([-1])]; + tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_492_cast_fp16 = layer_norm(axes = var_492_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([0, 2, 1])]; + tensor var_497_axes_0 = const()[name = string("op_497_axes_0"), val = tensor([2])]; + tensor var_496 = transpose(perm = var_495, x = var_492_cast_fp16)[name = string("transpose_31")]; + tensor var_497 = expand_dims(axes = var_497_axes_0, x = var_496)[name = string("op_497")]; + string var_504_pad_type_0 = const()[name = string("op_504_pad_type_0"), val = string("valid")]; + tensor var_504_strides_0 = const()[name = string("op_504_strides_0"), val = tensor([1, 1])]; + tensor var_504_pad_0 = const()[name = string("op_504_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_504_dilations_0 = const()[name = string("op_504_dilations_0"), val = tensor([1, 1])]; + int32 var_504_groups_0 = const()[name = string("op_504_groups_0"), val = int32(1)]; + tensor var_504 = conv(dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_497)[name = string("op_504")]; + tensor var_505 = const()[name = string("op_505"), val = tensor([1, 24, 1, 128])]; + tensor var_506 = reshape(shape = var_505, x = var_504)[name = string("op_506")]; + string var_513_pad_type_0 = const()[name = string("op_513_pad_type_0"), val = string("valid")]; + tensor var_513_strides_0 = const()[name = string("op_513_strides_0"), val = tensor([1, 1])]; + tensor var_513_pad_0 = const()[name = string("op_513_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_513_dilations_0 = const()[name = string("op_513_dilations_0"), val = tensor([1, 1])]; + int32 var_513_groups_0 = const()[name = string("op_513_groups_0"), val = int32(1)]; + tensor var_513 = conv(dilations = var_513_dilations_0, groups = var_513_groups_0, pad = var_513_pad_0, pad_type = var_513_pad_type_0, strides = var_513_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_497)[name = string("op_513")]; + tensor var_514 = const()[name = string("op_514"), val = tensor([1, 8, 1, 128])]; + tensor var_515 = reshape(shape = var_514, x = var_513)[name = string("op_515")]; + string var_522_pad_type_0 = const()[name = string("op_522_pad_type_0"), val = string("valid")]; + tensor var_522_strides_0 = const()[name = string("op_522_strides_0"), val = tensor([1, 1])]; + tensor var_522_pad_0 = const()[name = string("op_522_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_522_dilations_0 = const()[name = string("op_522_dilations_0"), val = tensor([1, 1])]; + int32 var_522_groups_0 = const()[name = string("op_522_groups_0"), val = int32(1)]; + tensor var_522 = conv(dilations = var_522_dilations_0, groups = var_522_groups_0, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_522_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_497)[name = string("op_522")]; + tensor var_523 = const()[name = string("op_523"), val = tensor([1, 8, 1, 128])]; + tensor var_524 = reshape(shape = var_523, x = var_522)[name = string("op_524")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_506)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_506)[name = string("x2_5")]; + tensor var_538_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_538_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = sub(x = var_538_cast_fp16, y = var_539_cast_fp16)[name = string("op_540_cast_fp16")]; + tensor var_541_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_541_cast_fp16")]; + tensor var_542_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_542_cast_fp16")]; + tensor var_543_cast_fp16 = add(x = var_541_cast_fp16, y = var_542_cast_fp16)[name = string("op_543_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_59, interleave = rotated_5_interleave_0, values = (var_540_cast_fp16, var_543_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_515)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_515)[name = string("x2_7")]; + tensor var_559_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_560_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_560_cast_fp16")]; + tensor var_561_cast_fp16 = sub(x = var_559_cast_fp16, y = var_560_cast_fp16)[name = string("op_561_cast_fp16")]; + tensor var_562_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_562_cast_fp16")]; + tensor var_563_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor var_564_cast_fp16 = add(x = var_562_cast_fp16, y = var_563_cast_fp16)[name = string("op_564_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_59, interleave = rotated_7_interleave_0, values = (var_561_cast_fp16, var_564_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([19])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([20])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_384, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([47])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([48])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_384, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_524, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_584_begin_0 = const()[name = string("op_584_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_584_end_0 = const()[name = string("op_584_end_0"), val = tensor([20, 8, 1024, 128])]; + tensor var_584_end_mask_0 = const()[name = string("op_584_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_584_cast_fp16 = slice_by_index(begin = var_584_begin_0, end = var_584_end_0, end_mask = var_584_end_mask_0, x = coreml_update_state_21)[name = string("op_584_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_584_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor([47, 0, 0, 0])]; + tensor var_586_end_0 = const()[name = string("op_586_end_0"), val = tensor([48, 8, 1024, 128])]; + tensor var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = coreml_update_state_21)[name = string("op_586_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_586_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_595 = const()[name = string("op_595"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_595, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_599, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_602 = const()[name = string("op_602"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_602, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_606 = const()[name = string("op_606"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_606, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_609_transpose_x_1 = const()[name = string("op_609_transpose_x_1"), val = bool(false)]; + bool var_609_transpose_y_1 = const()[name = string("op_609_transpose_y_1"), val = bool(true)]; + tensor var_609_cast_fp16 = matmul(transpose_x = var_609_transpose_x_1, transpose_y = var_609_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_609_cast_fp16")]; + fp16 var_610_to_fp16 = const()[name = string("op_610_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_609_cast_fp16, y = var_610_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_621_axes_0 = const()[name = string("op_621_axes_0"), val = tensor([-1])]; + bool var_621_keep_dims_0 = const()[name = string("op_621_keep_dims_0"), val = bool(true)]; + tensor var_621_cast_fp16 = reduce_sum(axes = var_621_axes_0, keep_dims = var_621_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_621_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_621_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_624_perm_0 = const()[name = string("op_624_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_626 = const()[name = string("op_626"), val = tensor([1, 1, 3072])]; + tensor var_624_cast_fp16 = transpose(perm = var_624_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_30")]; + tensor input_19_cast_fp16 = reshape(shape = var_626, x = var_624_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_637_axes_0 = const()[name = string("op_637_axes_0"), val = tensor([-1])]; + tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_637_cast_fp16 = layer_norm(axes = var_637_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_637_cast_fp16")]; + tensor var_644 = const()[name = string("op_644"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_645 = transpose(perm = var_644, x = var_637_cast_fp16)[name = string("transpose_29")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_645)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_667_axes_0 = const()[name = string("op_667_axes_0"), val = tensor([2])]; + tensor var_667 = squeeze(axes = var_667_axes_0, x = hidden_states_15)[name = string("op_667")]; + tensor var_668 = const()[name = string("op_668"), val = tensor([0, 2, 1])]; + tensor var_669 = transpose(perm = var_668, x = var_667)[name = string("transpose_28")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_669)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([-1])]; + tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_677_cast_fp16 = layer_norm(axes = var_677_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_677_cast_fp16")]; + tensor var_680 = const()[name = string("op_680"), val = tensor([0, 2, 1])]; + tensor var_682_axes_0 = const()[name = string("op_682_axes_0"), val = tensor([2])]; + tensor var_681 = transpose(perm = var_680, x = var_677_cast_fp16)[name = string("transpose_27")]; + tensor var_682 = expand_dims(axes = var_682_axes_0, x = var_681)[name = string("op_682")]; + string var_689_pad_type_0 = const()[name = string("op_689_pad_type_0"), val = string("valid")]; + tensor var_689_strides_0 = const()[name = string("op_689_strides_0"), val = tensor([1, 1])]; + tensor var_689_pad_0 = const()[name = string("op_689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_689_dilations_0 = const()[name = string("op_689_dilations_0"), val = tensor([1, 1])]; + int32 var_689_groups_0 = const()[name = string("op_689_groups_0"), val = int32(1)]; + tensor var_689 = conv(dilations = var_689_dilations_0, groups = var_689_groups_0, pad = var_689_pad_0, pad_type = var_689_pad_type_0, strides = var_689_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_682)[name = string("op_689")]; + tensor var_690 = const()[name = string("op_690"), val = tensor([1, 24, 1, 128])]; + tensor var_691 = reshape(shape = var_690, x = var_689)[name = string("op_691")]; + string var_698_pad_type_0 = const()[name = string("op_698_pad_type_0"), val = string("valid")]; + tensor var_698_strides_0 = const()[name = string("op_698_strides_0"), val = tensor([1, 1])]; + tensor var_698_pad_0 = const()[name = string("op_698_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_698_dilations_0 = const()[name = string("op_698_dilations_0"), val = tensor([1, 1])]; + int32 var_698_groups_0 = const()[name = string("op_698_groups_0"), val = int32(1)]; + tensor var_698 = conv(dilations = var_698_dilations_0, groups = var_698_groups_0, pad = var_698_pad_0, pad_type = var_698_pad_type_0, strides = var_698_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_682)[name = string("op_698")]; + tensor var_699 = const()[name = string("op_699"), val = tensor([1, 8, 1, 128])]; + tensor var_700 = reshape(shape = var_699, x = var_698)[name = string("op_700")]; + string var_707_pad_type_0 = const()[name = string("op_707_pad_type_0"), val = string("valid")]; + tensor var_707_strides_0 = const()[name = string("op_707_strides_0"), val = tensor([1, 1])]; + tensor var_707_pad_0 = const()[name = string("op_707_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_707_dilations_0 = const()[name = string("op_707_dilations_0"), val = tensor([1, 1])]; + int32 var_707_groups_0 = const()[name = string("op_707_groups_0"), val = int32(1)]; + tensor var_707 = conv(dilations = var_707_dilations_0, groups = var_707_groups_0, pad = var_707_pad_0, pad_type = var_707_pad_type_0, strides = var_707_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_682)[name = string("op_707")]; + tensor var_708 = const()[name = string("op_708"), val = tensor([1, 8, 1, 128])]; + tensor var_709 = reshape(shape = var_708, x = var_707)[name = string("op_709")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_691)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_691)[name = string("x2_9")]; + tensor var_723_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_723_cast_fp16")]; + tensor var_724_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_724_cast_fp16")]; + tensor var_725_cast_fp16 = sub(x = var_723_cast_fp16, y = var_724_cast_fp16)[name = string("op_725_cast_fp16")]; + tensor var_726_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_726_cast_fp16")]; + tensor var_727_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_727_cast_fp16")]; + tensor var_728_cast_fp16 = add(x = var_726_cast_fp16, y = var_727_cast_fp16)[name = string("op_728_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_59, interleave = rotated_9_interleave_0, values = (var_725_cast_fp16, var_728_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_700)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_700)[name = string("x2_11")]; + tensor var_744_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_744_cast_fp16")]; + tensor var_745_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_745_cast_fp16")]; + tensor var_746_cast_fp16 = sub(x = var_744_cast_fp16, y = var_745_cast_fp16)[name = string("op_746_cast_fp16")]; + tensor var_747_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_747_cast_fp16")]; + tensor var_748_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_748_cast_fp16")]; + tensor var_749_cast_fp16 = add(x = var_747_cast_fp16, y = var_748_cast_fp16)[name = string("op_749_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_59, interleave = rotated_11_interleave_0, values = (var_746_cast_fp16, var_749_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([20])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([21])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_384, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([48])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([49])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_384, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_709, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_769_begin_0 = const()[name = string("op_769_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_769_end_0 = const()[name = string("op_769_end_0"), val = tensor([21, 8, 1024, 128])]; + tensor var_769_end_mask_0 = const()[name = string("op_769_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_769_cast_fp16 = slice_by_index(begin = var_769_begin_0, end = var_769_end_0, end_mask = var_769_end_mask_0, x = coreml_update_state_23)[name = string("op_769_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_769_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_771_begin_0 = const()[name = string("op_771_begin_0"), val = tensor([48, 0, 0, 0])]; + tensor var_771_end_0 = const()[name = string("op_771_end_0"), val = tensor([49, 8, 1024, 128])]; + tensor var_771_end_mask_0 = const()[name = string("op_771_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_771_cast_fp16 = slice_by_index(begin = var_771_begin_0, end = var_771_end_0, end_mask = var_771_end_mask_0, x = coreml_update_state_23)[name = string("op_771_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_771_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_780 = const()[name = string("op_780"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_780, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_784 = const()[name = string("op_784"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_784, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_787 = const()[name = string("op_787"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_787, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_791 = const()[name = string("op_791"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_791, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_794_transpose_x_1 = const()[name = string("op_794_transpose_x_1"), val = bool(false)]; + bool var_794_transpose_y_1 = const()[name = string("op_794_transpose_y_1"), val = bool(true)]; + tensor var_794_cast_fp16 = matmul(transpose_x = var_794_transpose_x_1, transpose_y = var_794_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_794_cast_fp16")]; + fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_794_cast_fp16, y = var_795_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_806_axes_0 = const()[name = string("op_806_axes_0"), val = tensor([-1])]; + bool var_806_keep_dims_0 = const()[name = string("op_806_keep_dims_0"), val = bool(true)]; + tensor var_806_cast_fp16 = reduce_sum(axes = var_806_axes_0, keep_dims = var_806_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_806_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_806_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_809_perm_0 = const()[name = string("op_809_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_811 = const()[name = string("op_811"), val = tensor([1, 1, 3072])]; + tensor var_809_cast_fp16 = transpose(perm = var_809_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_26")]; + tensor input_33_cast_fp16 = reshape(shape = var_811, x = var_809_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_822_axes_0 = const()[name = string("op_822_axes_0"), val = tensor([-1])]; + tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_822_cast_fp16 = layer_norm(axes = var_822_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_822_cast_fp16")]; + tensor var_829 = const()[name = string("op_829"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_830 = transpose(perm = var_829, x = var_822_cast_fp16)[name = string("transpose_25")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_830)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_852_axes_0 = const()[name = string("op_852_axes_0"), val = tensor([2])]; + tensor var_852 = squeeze(axes = var_852_axes_0, x = hidden_states_23)[name = string("op_852")]; + tensor var_853 = const()[name = string("op_853"), val = tensor([0, 2, 1])]; + tensor var_854 = transpose(perm = var_853, x = var_852)[name = string("transpose_24")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_854)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_862_axes_0 = const()[name = string("op_862_axes_0"), val = tensor([-1])]; + tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_862_cast_fp16 = layer_norm(axes = var_862_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_862_cast_fp16")]; + tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1])]; + tensor var_867_axes_0 = const()[name = string("op_867_axes_0"), val = tensor([2])]; + tensor var_866 = transpose(perm = var_865, x = var_862_cast_fp16)[name = string("transpose_23")]; + tensor var_867 = expand_dims(axes = var_867_axes_0, x = var_866)[name = string("op_867")]; + string var_874_pad_type_0 = const()[name = string("op_874_pad_type_0"), val = string("valid")]; + tensor var_874_strides_0 = const()[name = string("op_874_strides_0"), val = tensor([1, 1])]; + tensor var_874_pad_0 = const()[name = string("op_874_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_874_dilations_0 = const()[name = string("op_874_dilations_0"), val = tensor([1, 1])]; + int32 var_874_groups_0 = const()[name = string("op_874_groups_0"), val = int32(1)]; + tensor var_874 = conv(dilations = var_874_dilations_0, groups = var_874_groups_0, pad = var_874_pad_0, pad_type = var_874_pad_type_0, strides = var_874_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_867)[name = string("op_874")]; + tensor var_875 = const()[name = string("op_875"), val = tensor([1, 24, 1, 128])]; + tensor var_876 = reshape(shape = var_875, x = var_874)[name = string("op_876")]; + string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")]; + tensor var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor([1, 1])]; + tensor var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor([1, 1])]; + int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)]; + tensor var_883 = conv(dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_867)[name = string("op_883")]; + tensor var_884 = const()[name = string("op_884"), val = tensor([1, 8, 1, 128])]; + tensor var_885 = reshape(shape = var_884, x = var_883)[name = string("op_885")]; + string var_892_pad_type_0 = const()[name = string("op_892_pad_type_0"), val = string("valid")]; + tensor var_892_strides_0 = const()[name = string("op_892_strides_0"), val = tensor([1, 1])]; + tensor var_892_pad_0 = const()[name = string("op_892_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_892_dilations_0 = const()[name = string("op_892_dilations_0"), val = tensor([1, 1])]; + int32 var_892_groups_0 = const()[name = string("op_892_groups_0"), val = int32(1)]; + tensor var_892 = conv(dilations = var_892_dilations_0, groups = var_892_groups_0, pad = var_892_pad_0, pad_type = var_892_pad_type_0, strides = var_892_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_867)[name = string("op_892")]; + tensor var_893 = const()[name = string("op_893"), val = tensor([1, 8, 1, 128])]; + tensor var_894 = reshape(shape = var_893, x = var_892)[name = string("op_894")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_876)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_876)[name = string("x2_13")]; + tensor var_908_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_908_cast_fp16")]; + tensor var_909_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_909_cast_fp16")]; + tensor var_910_cast_fp16 = sub(x = var_908_cast_fp16, y = var_909_cast_fp16)[name = string("op_910_cast_fp16")]; + tensor var_911_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor var_912_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_912_cast_fp16")]; + tensor var_913_cast_fp16 = add(x = var_911_cast_fp16, y = var_912_cast_fp16)[name = string("op_913_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_59, interleave = rotated_13_interleave_0, values = (var_910_cast_fp16, var_913_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_885)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_885)[name = string("x2_15")]; + tensor var_929_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_929_cast_fp16")]; + tensor var_930_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_930_cast_fp16")]; + tensor var_931_cast_fp16 = sub(x = var_929_cast_fp16, y = var_930_cast_fp16)[name = string("op_931_cast_fp16")]; + tensor var_932_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_932_cast_fp16")]; + tensor var_933_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_933_cast_fp16")]; + tensor var_934_cast_fp16 = add(x = var_932_cast_fp16, y = var_933_cast_fp16)[name = string("op_934_cast_fp16")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15_cast_fp16 = concat(axis = var_59, interleave = rotated_15_interleave_0, values = (var_931_cast_fp16, var_934_cast_fp16))[name = string("rotated_15_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([21])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([22])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_384, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([49])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([50])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_384, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_894, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_954_begin_0 = const()[name = string("op_954_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_954_end_0 = const()[name = string("op_954_end_0"), val = tensor([22, 8, 1024, 128])]; + tensor var_954_end_mask_0 = const()[name = string("op_954_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_954_cast_fp16 = slice_by_index(begin = var_954_begin_0, end = var_954_end_0, end_mask = var_954_end_mask_0, x = coreml_update_state_25)[name = string("op_954_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_954_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_956_begin_0 = const()[name = string("op_956_begin_0"), val = tensor([49, 0, 0, 0])]; + tensor var_956_end_0 = const()[name = string("op_956_end_0"), val = tensor([50, 8, 1024, 128])]; + tensor var_956_end_mask_0 = const()[name = string("op_956_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = coreml_update_state_25)[name = string("op_956_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_956_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_965 = const()[name = string("op_965"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_965, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_969 = const()[name = string("op_969"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_969, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_972 = const()[name = string("op_972"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_972, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_976 = const()[name = string("op_976"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_976, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_979_transpose_x_1 = const()[name = string("op_979_transpose_x_1"), val = bool(false)]; + bool var_979_transpose_y_1 = const()[name = string("op_979_transpose_y_1"), val = bool(true)]; + tensor var_979_cast_fp16 = matmul(transpose_x = var_979_transpose_x_1, transpose_y = var_979_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_979_cast_fp16")]; + fp16 var_980_to_fp16 = const()[name = string("op_980_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_979_cast_fp16, y = var_980_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_991_axes_0 = const()[name = string("op_991_axes_0"), val = tensor([-1])]; + bool var_991_keep_dims_0 = const()[name = string("op_991_keep_dims_0"), val = bool(true)]; + tensor var_991_cast_fp16 = reduce_sum(axes = var_991_axes_0, keep_dims = var_991_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_991_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_991_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_994_perm_0 = const()[name = string("op_994_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_996 = const()[name = string("op_996"), val = tensor([1, 1, 3072])]; + tensor var_994_cast_fp16 = transpose(perm = var_994_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_22")]; + tensor input_47_cast_fp16 = reshape(shape = var_996, x = var_994_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1007_axes_0 = const()[name = string("op_1007_axes_0"), val = tensor([-1])]; + tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1007_cast_fp16 = layer_norm(axes = var_1007_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1007_cast_fp16")]; + tensor var_1014 = const()[name = string("op_1014"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1015 = transpose(perm = var_1014, x = var_1007_cast_fp16)[name = string("transpose_21")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1015)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1037_axes_0 = const()[name = string("op_1037_axes_0"), val = tensor([2])]; + tensor var_1037 = squeeze(axes = var_1037_axes_0, x = hidden_states_31)[name = string("op_1037")]; + tensor var_1038 = const()[name = string("op_1038"), val = tensor([0, 2, 1])]; + tensor var_1039 = transpose(perm = var_1038, x = var_1037)[name = string("transpose_20")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1039)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1047_axes_0 = const()[name = string("op_1047_axes_0"), val = tensor([-1])]; + tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1047_cast_fp16 = layer_norm(axes = var_1047_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1047_cast_fp16")]; + tensor var_1050 = const()[name = string("op_1050"), val = tensor([0, 2, 1])]; + tensor var_1052_axes_0 = const()[name = string("op_1052_axes_0"), val = tensor([2])]; + tensor var_1051 = transpose(perm = var_1050, x = var_1047_cast_fp16)[name = string("transpose_19")]; + tensor var_1052 = expand_dims(axes = var_1052_axes_0, x = var_1051)[name = string("op_1052")]; + string var_1059_pad_type_0 = const()[name = string("op_1059_pad_type_0"), val = string("valid")]; + tensor var_1059_strides_0 = const()[name = string("op_1059_strides_0"), val = tensor([1, 1])]; + tensor var_1059_pad_0 = const()[name = string("op_1059_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1059_dilations_0 = const()[name = string("op_1059_dilations_0"), val = tensor([1, 1])]; + int32 var_1059_groups_0 = const()[name = string("op_1059_groups_0"), val = int32(1)]; + tensor var_1059 = conv(dilations = var_1059_dilations_0, groups = var_1059_groups_0, pad = var_1059_pad_0, pad_type = var_1059_pad_type_0, strides = var_1059_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_1052)[name = string("op_1059")]; + tensor var_1060 = const()[name = string("op_1060"), val = tensor([1, 24, 1, 128])]; + tensor var_1061 = reshape(shape = var_1060, x = var_1059)[name = string("op_1061")]; + string var_1068_pad_type_0 = const()[name = string("op_1068_pad_type_0"), val = string("valid")]; + tensor var_1068_strides_0 = const()[name = string("op_1068_strides_0"), val = tensor([1, 1])]; + tensor var_1068_pad_0 = const()[name = string("op_1068_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1068_dilations_0 = const()[name = string("op_1068_dilations_0"), val = tensor([1, 1])]; + int32 var_1068_groups_0 = const()[name = string("op_1068_groups_0"), val = int32(1)]; + tensor var_1068 = conv(dilations = var_1068_dilations_0, groups = var_1068_groups_0, pad = var_1068_pad_0, pad_type = var_1068_pad_type_0, strides = var_1068_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_1052)[name = string("op_1068")]; + tensor var_1069 = const()[name = string("op_1069"), val = tensor([1, 8, 1, 128])]; + tensor var_1070 = reshape(shape = var_1069, x = var_1068)[name = string("op_1070")]; + string var_1077_pad_type_0 = const()[name = string("op_1077_pad_type_0"), val = string("valid")]; + tensor var_1077_strides_0 = const()[name = string("op_1077_strides_0"), val = tensor([1, 1])]; + tensor var_1077_pad_0 = const()[name = string("op_1077_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1077_dilations_0 = const()[name = string("op_1077_dilations_0"), val = tensor([1, 1])]; + int32 var_1077_groups_0 = const()[name = string("op_1077_groups_0"), val = int32(1)]; + tensor var_1077 = conv(dilations = var_1077_dilations_0, groups = var_1077_groups_0, pad = var_1077_pad_0, pad_type = var_1077_pad_type_0, strides = var_1077_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_1052)[name = string("op_1077")]; + tensor var_1078 = const()[name = string("op_1078"), val = tensor([1, 8, 1, 128])]; + tensor var_1079 = reshape(shape = var_1078, x = var_1077)[name = string("op_1079")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1061)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1061)[name = string("x2_17")]; + tensor var_1093_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1093_cast_fp16")]; + tensor var_1094_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor var_1095_cast_fp16 = sub(x = var_1093_cast_fp16, y = var_1094_cast_fp16)[name = string("op_1095_cast_fp16")]; + tensor var_1096_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1096_cast_fp16")]; + tensor var_1097_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1097_cast_fp16")]; + tensor var_1098_cast_fp16 = add(x = var_1096_cast_fp16, y = var_1097_cast_fp16)[name = string("op_1098_cast_fp16")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17_cast_fp16 = concat(axis = var_59, interleave = rotated_17_interleave_0, values = (var_1095_cast_fp16, var_1098_cast_fp16))[name = string("rotated_17_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1070)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1070)[name = string("x2_19")]; + tensor var_1114_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1114_cast_fp16")]; + tensor var_1115_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1115_cast_fp16")]; + tensor var_1116_cast_fp16 = sub(x = var_1114_cast_fp16, y = var_1115_cast_fp16)[name = string("op_1116_cast_fp16")]; + tensor var_1117_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor var_1118_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1118_cast_fp16")]; + tensor var_1119_cast_fp16 = add(x = var_1117_cast_fp16, y = var_1118_cast_fp16)[name = string("op_1119_cast_fp16")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19_cast_fp16 = concat(axis = var_59, interleave = rotated_19_interleave_0, values = (var_1116_cast_fp16, var_1119_cast_fp16))[name = string("rotated_19_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([22])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([23])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_384, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([50])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([51])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_384, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1079, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_1139_begin_0 = const()[name = string("op_1139_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_1139_end_0 = const()[name = string("op_1139_end_0"), val = tensor([23, 8, 1024, 128])]; + tensor var_1139_end_mask_0 = const()[name = string("op_1139_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1139_cast_fp16 = slice_by_index(begin = var_1139_begin_0, end = var_1139_end_0, end_mask = var_1139_end_mask_0, x = coreml_update_state_27)[name = string("op_1139_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1139_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1141_begin_0 = const()[name = string("op_1141_begin_0"), val = tensor([50, 0, 0, 0])]; + tensor var_1141_end_0 = const()[name = string("op_1141_end_0"), val = tensor([51, 8, 1024, 128])]; + tensor var_1141_end_mask_0 = const()[name = string("op_1141_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1141_cast_fp16 = slice_by_index(begin = var_1141_begin_0, end = var_1141_end_0, end_mask = var_1141_end_mask_0, x = coreml_update_state_27)[name = string("op_1141_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1141_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1150, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1154 = const()[name = string("op_1154"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_1154, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1157 = const()[name = string("op_1157"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1157, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1161 = const()[name = string("op_1161"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_19_cast_fp16 = reshape(shape = var_1161, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; + bool var_1164_transpose_x_1 = const()[name = string("op_1164_transpose_x_1"), val = bool(false)]; + bool var_1164_transpose_y_1 = const()[name = string("op_1164_transpose_y_1"), val = bool(true)]; + tensor var_1164_cast_fp16 = matmul(transpose_x = var_1164_transpose_x_1, transpose_y = var_1164_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1164_cast_fp16")]; + fp16 var_1165_to_fp16 = const()[name = string("op_1165_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_1164_cast_fp16, y = var_1165_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1176_axes_0 = const()[name = string("op_1176_axes_0"), val = tensor([-1])]; + bool var_1176_keep_dims_0 = const()[name = string("op_1176_keep_dims_0"), val = bool(true)]; + tensor var_1176_cast_fp16 = reduce_sum(axes = var_1176_axes_0, keep_dims = var_1176_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1176_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1176_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1179_perm_0 = const()[name = string("op_1179_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1181 = const()[name = string("op_1181"), val = tensor([1, 1, 3072])]; + tensor var_1179_cast_fp16 = transpose(perm = var_1179_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_18")]; + tensor input_61_cast_fp16 = reshape(shape = var_1181, x = var_1179_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1192_axes_0 = const()[name = string("op_1192_axes_0"), val = tensor([-1])]; + tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1192_cast_fp16 = layer_norm(axes = var_1192_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1192_cast_fp16")]; + tensor var_1199 = const()[name = string("op_1199"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1200 = transpose(perm = var_1199, x = var_1192_cast_fp16)[name = string("transpose_17")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1200)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor([2])]; + tensor var_1222 = squeeze(axes = var_1222_axes_0, x = hidden_states_39)[name = string("op_1222")]; + tensor var_1223 = const()[name = string("op_1223"), val = tensor([0, 2, 1])]; + tensor var_1224 = transpose(perm = var_1223, x = var_1222)[name = string("transpose_16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1224)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1232_axes_0 = const()[name = string("op_1232_axes_0"), val = tensor([-1])]; + tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1232_cast_fp16 = layer_norm(axes = var_1232_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1232_cast_fp16")]; + tensor var_1235 = const()[name = string("op_1235"), val = tensor([0, 2, 1])]; + tensor var_1237_axes_0 = const()[name = string("op_1237_axes_0"), val = tensor([2])]; + tensor var_1236 = transpose(perm = var_1235, x = var_1232_cast_fp16)[name = string("transpose_15")]; + tensor var_1237 = expand_dims(axes = var_1237_axes_0, x = var_1236)[name = string("op_1237")]; + string var_1244_pad_type_0 = const()[name = string("op_1244_pad_type_0"), val = string("valid")]; + tensor var_1244_strides_0 = const()[name = string("op_1244_strides_0"), val = tensor([1, 1])]; + tensor var_1244_pad_0 = const()[name = string("op_1244_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1244_dilations_0 = const()[name = string("op_1244_dilations_0"), val = tensor([1, 1])]; + int32 var_1244_groups_0 = const()[name = string("op_1244_groups_0"), val = int32(1)]; + tensor var_1244 = conv(dilations = var_1244_dilations_0, groups = var_1244_groups_0, pad = var_1244_pad_0, pad_type = var_1244_pad_type_0, strides = var_1244_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_1237)[name = string("op_1244")]; + tensor var_1245 = const()[name = string("op_1245"), val = tensor([1, 24, 1, 128])]; + tensor var_1246 = reshape(shape = var_1245, x = var_1244)[name = string("op_1246")]; + string var_1253_pad_type_0 = const()[name = string("op_1253_pad_type_0"), val = string("valid")]; + tensor var_1253_strides_0 = const()[name = string("op_1253_strides_0"), val = tensor([1, 1])]; + tensor var_1253_pad_0 = const()[name = string("op_1253_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1253_dilations_0 = const()[name = string("op_1253_dilations_0"), val = tensor([1, 1])]; + int32 var_1253_groups_0 = const()[name = string("op_1253_groups_0"), val = int32(1)]; + tensor var_1253 = conv(dilations = var_1253_dilations_0, groups = var_1253_groups_0, pad = var_1253_pad_0, pad_type = var_1253_pad_type_0, strides = var_1253_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_1237)[name = string("op_1253")]; + tensor var_1254 = const()[name = string("op_1254"), val = tensor([1, 8, 1, 128])]; + tensor var_1255 = reshape(shape = var_1254, x = var_1253)[name = string("op_1255")]; + string var_1262_pad_type_0 = const()[name = string("op_1262_pad_type_0"), val = string("valid")]; + tensor var_1262_strides_0 = const()[name = string("op_1262_strides_0"), val = tensor([1, 1])]; + tensor var_1262_pad_0 = const()[name = string("op_1262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1262_dilations_0 = const()[name = string("op_1262_dilations_0"), val = tensor([1, 1])]; + int32 var_1262_groups_0 = const()[name = string("op_1262_groups_0"), val = int32(1)]; + tensor var_1262 = conv(dilations = var_1262_dilations_0, groups = var_1262_groups_0, pad = var_1262_pad_0, pad_type = var_1262_pad_type_0, strides = var_1262_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_1237)[name = string("op_1262")]; + tensor var_1263 = const()[name = string("op_1263"), val = tensor([1, 8, 1, 128])]; + tensor var_1264 = reshape(shape = var_1263, x = var_1262)[name = string("op_1264")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1246)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1246)[name = string("x2_21")]; + tensor var_1278_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1278_cast_fp16")]; + tensor var_1279_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1279_cast_fp16")]; + tensor var_1280_cast_fp16 = sub(x = var_1278_cast_fp16, y = var_1279_cast_fp16)[name = string("op_1280_cast_fp16")]; + tensor var_1281_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1281_cast_fp16")]; + tensor var_1282_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1282_cast_fp16")]; + tensor var_1283_cast_fp16 = add(x = var_1281_cast_fp16, y = var_1282_cast_fp16)[name = string("op_1283_cast_fp16")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21_cast_fp16 = concat(axis = var_59, interleave = rotated_21_interleave_0, values = (var_1280_cast_fp16, var_1283_cast_fp16))[name = string("rotated_21_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1255)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1255)[name = string("x2_23")]; + tensor var_1299_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1299_cast_fp16")]; + tensor var_1300_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1300_cast_fp16")]; + tensor var_1301_cast_fp16 = sub(x = var_1299_cast_fp16, y = var_1300_cast_fp16)[name = string("op_1301_cast_fp16")]; + tensor var_1302_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1302_cast_fp16")]; + tensor var_1303_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1303_cast_fp16")]; + tensor var_1304_cast_fp16 = add(x = var_1302_cast_fp16, y = var_1303_cast_fp16)[name = string("op_1304_cast_fp16")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23_cast_fp16 = concat(axis = var_59, interleave = rotated_23_interleave_0, values = (var_1301_cast_fp16, var_1304_cast_fp16))[name = string("rotated_23_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([23])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([24])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_384, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([51])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([52])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_384, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1264, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_1324_begin_0 = const()[name = string("op_1324_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_1324_end_0 = const()[name = string("op_1324_end_0"), val = tensor([24, 8, 1024, 128])]; + tensor var_1324_end_mask_0 = const()[name = string("op_1324_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = coreml_update_state_29)[name = string("op_1324_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1324_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1326_begin_0 = const()[name = string("op_1326_begin_0"), val = tensor([51, 0, 0, 0])]; + tensor var_1326_end_0 = const()[name = string("op_1326_end_0"), val = tensor([52, 8, 1024, 128])]; + tensor var_1326_end_mask_0 = const()[name = string("op_1326_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1326_cast_fp16 = slice_by_index(begin = var_1326_begin_0, end = var_1326_end_0, end_mask = var_1326_end_mask_0, x = coreml_update_state_29)[name = string("op_1326_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1326_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1335 = const()[name = string("op_1335"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1335, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1339 = const()[name = string("op_1339"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_1339, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1342 = const()[name = string("op_1342"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1342, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1346 = const()[name = string("op_1346"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_23_cast_fp16 = reshape(shape = var_1346, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; + bool var_1349_transpose_x_1 = const()[name = string("op_1349_transpose_x_1"), val = bool(false)]; + bool var_1349_transpose_y_1 = const()[name = string("op_1349_transpose_y_1"), val = bool(true)]; + tensor var_1349_cast_fp16 = matmul(transpose_x = var_1349_transpose_x_1, transpose_y = var_1349_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1349_cast_fp16")]; + fp16 var_1350_to_fp16 = const()[name = string("op_1350_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_1349_cast_fp16, y = var_1350_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1361_axes_0 = const()[name = string("op_1361_axes_0"), val = tensor([-1])]; + bool var_1361_keep_dims_0 = const()[name = string("op_1361_keep_dims_0"), val = bool(true)]; + tensor var_1361_cast_fp16 = reduce_sum(axes = var_1361_axes_0, keep_dims = var_1361_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1361_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1361_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_1364_perm_0 = const()[name = string("op_1364_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1366 = const()[name = string("op_1366"), val = tensor([1, 1, 3072])]; + tensor var_1364_cast_fp16 = transpose(perm = var_1364_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_14")]; + tensor input_75_cast_fp16 = reshape(shape = var_1366, x = var_1364_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1377_axes_0 = const()[name = string("op_1377_axes_0"), val = tensor([-1])]; + tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1377_cast_fp16 = layer_norm(axes = var_1377_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1377_cast_fp16")]; + tensor var_1384 = const()[name = string("op_1384"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1385 = transpose(perm = var_1384, x = var_1377_cast_fp16)[name = string("transpose_13")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1385)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor([2])]; + tensor var_1407 = squeeze(axes = var_1407_axes_0, x = hidden_states_47)[name = string("op_1407")]; + tensor var_1408 = const()[name = string("op_1408"), val = tensor([0, 2, 1])]; + tensor var_1409 = transpose(perm = var_1408, x = var_1407)[name = string("transpose_12")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1409)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1417_axes_0 = const()[name = string("op_1417_axes_0"), val = tensor([-1])]; + tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1417_cast_fp16 = layer_norm(axes = var_1417_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1417_cast_fp16")]; + tensor var_1420 = const()[name = string("op_1420"), val = tensor([0, 2, 1])]; + tensor var_1422_axes_0 = const()[name = string("op_1422_axes_0"), val = tensor([2])]; + tensor var_1421 = transpose(perm = var_1420, x = var_1417_cast_fp16)[name = string("transpose_11")]; + tensor var_1422 = expand_dims(axes = var_1422_axes_0, x = var_1421)[name = string("op_1422")]; + string var_1429_pad_type_0 = const()[name = string("op_1429_pad_type_0"), val = string("valid")]; + tensor var_1429_strides_0 = const()[name = string("op_1429_strides_0"), val = tensor([1, 1])]; + tensor var_1429_pad_0 = const()[name = string("op_1429_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1429_dilations_0 = const()[name = string("op_1429_dilations_0"), val = tensor([1, 1])]; + int32 var_1429_groups_0 = const()[name = string("op_1429_groups_0"), val = int32(1)]; + tensor var_1429 = conv(dilations = var_1429_dilations_0, groups = var_1429_groups_0, pad = var_1429_pad_0, pad_type = var_1429_pad_type_0, strides = var_1429_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_1422)[name = string("op_1429")]; + tensor var_1430 = const()[name = string("op_1430"), val = tensor([1, 24, 1, 128])]; + tensor var_1431 = reshape(shape = var_1430, x = var_1429)[name = string("op_1431")]; + string var_1438_pad_type_0 = const()[name = string("op_1438_pad_type_0"), val = string("valid")]; + tensor var_1438_strides_0 = const()[name = string("op_1438_strides_0"), val = tensor([1, 1])]; + tensor var_1438_pad_0 = const()[name = string("op_1438_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1438_dilations_0 = const()[name = string("op_1438_dilations_0"), val = tensor([1, 1])]; + int32 var_1438_groups_0 = const()[name = string("op_1438_groups_0"), val = int32(1)]; + tensor var_1438 = conv(dilations = var_1438_dilations_0, groups = var_1438_groups_0, pad = var_1438_pad_0, pad_type = var_1438_pad_type_0, strides = var_1438_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_1422)[name = string("op_1438")]; + tensor var_1439 = const()[name = string("op_1439"), val = tensor([1, 8, 1, 128])]; + tensor var_1440 = reshape(shape = var_1439, x = var_1438)[name = string("op_1440")]; + string var_1447_pad_type_0 = const()[name = string("op_1447_pad_type_0"), val = string("valid")]; + tensor var_1447_strides_0 = const()[name = string("op_1447_strides_0"), val = tensor([1, 1])]; + tensor var_1447_pad_0 = const()[name = string("op_1447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1447_dilations_0 = const()[name = string("op_1447_dilations_0"), val = tensor([1, 1])]; + int32 var_1447_groups_0 = const()[name = string("op_1447_groups_0"), val = int32(1)]; + tensor var_1447 = conv(dilations = var_1447_dilations_0, groups = var_1447_groups_0, pad = var_1447_pad_0, pad_type = var_1447_pad_type_0, strides = var_1447_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_1422)[name = string("op_1447")]; + tensor var_1448 = const()[name = string("op_1448"), val = tensor([1, 8, 1, 128])]; + tensor var_1449 = reshape(shape = var_1448, x = var_1447)[name = string("op_1449")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1431)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1431)[name = string("x2_25")]; + tensor var_1463_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1463_cast_fp16")]; + tensor var_1464_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1464_cast_fp16")]; + tensor var_1465_cast_fp16 = sub(x = var_1463_cast_fp16, y = var_1464_cast_fp16)[name = string("op_1465_cast_fp16")]; + tensor var_1466_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1466_cast_fp16")]; + tensor var_1467_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1467_cast_fp16")]; + tensor var_1468_cast_fp16 = add(x = var_1466_cast_fp16, y = var_1467_cast_fp16)[name = string("op_1468_cast_fp16")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25_cast_fp16 = concat(axis = var_59, interleave = rotated_25_interleave_0, values = (var_1465_cast_fp16, var_1468_cast_fp16))[name = string("rotated_25_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1440)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1440)[name = string("x2_27")]; + tensor var_1484_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1484_cast_fp16")]; + tensor var_1485_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1485_cast_fp16")]; + tensor var_1486_cast_fp16 = sub(x = var_1484_cast_fp16, y = var_1485_cast_fp16)[name = string("op_1486_cast_fp16")]; + tensor var_1487_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1487_cast_fp16")]; + tensor var_1488_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1488_cast_fp16")]; + tensor var_1489_cast_fp16 = add(x = var_1487_cast_fp16, y = var_1488_cast_fp16)[name = string("op_1489_cast_fp16")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27_cast_fp16 = concat(axis = var_59, interleave = rotated_27_interleave_0, values = (var_1486_cast_fp16, var_1489_cast_fp16))[name = string("rotated_27_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([24])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([25])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_384, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([52])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([53])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_384, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1449, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_1509_begin_0 = const()[name = string("op_1509_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_1509_end_0 = const()[name = string("op_1509_end_0"), val = tensor([25, 8, 1024, 128])]; + tensor var_1509_end_mask_0 = const()[name = string("op_1509_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1509_cast_fp16 = slice_by_index(begin = var_1509_begin_0, end = var_1509_end_0, end_mask = var_1509_end_mask_0, x = coreml_update_state_31)[name = string("op_1509_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1509_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1511_begin_0 = const()[name = string("op_1511_begin_0"), val = tensor([52, 0, 0, 0])]; + tensor var_1511_end_0 = const()[name = string("op_1511_end_0"), val = tensor([53, 8, 1024, 128])]; + tensor var_1511_end_mask_0 = const()[name = string("op_1511_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1511_cast_fp16 = slice_by_index(begin = var_1511_begin_0, end = var_1511_end_0, end_mask = var_1511_end_mask_0, x = coreml_update_state_31)[name = string("op_1511_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1511_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1520 = const()[name = string("op_1520"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1520, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1524 = const()[name = string("op_1524"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_1524, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1527 = const()[name = string("op_1527"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1527, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_1531 = const()[name = string("op_1531"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_27_cast_fp16 = reshape(shape = var_1531, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; + bool var_1534_transpose_x_1 = const()[name = string("op_1534_transpose_x_1"), val = bool(false)]; + bool var_1534_transpose_y_1 = const()[name = string("op_1534_transpose_y_1"), val = bool(true)]; + tensor var_1534_cast_fp16 = matmul(transpose_x = var_1534_transpose_x_1, transpose_y = var_1534_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1534_cast_fp16")]; + fp16 var_1535_to_fp16 = const()[name = string("op_1535_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1546_axes_0 = const()[name = string("op_1546_axes_0"), val = tensor([-1])]; + bool var_1546_keep_dims_0 = const()[name = string("op_1546_keep_dims_0"), val = bool(true)]; + tensor var_1546_cast_fp16 = reduce_sum(axes = var_1546_axes_0, keep_dims = var_1546_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1546_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1546_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1549_perm_0 = const()[name = string("op_1549_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1551 = const()[name = string("op_1551"), val = tensor([1, 1, 3072])]; + tensor var_1549_cast_fp16 = transpose(perm = var_1549_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_10")]; + tensor input_89_cast_fp16 = reshape(shape = var_1551, x = var_1549_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1562_axes_0 = const()[name = string("op_1562_axes_0"), val = tensor([-1])]; + tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1562_cast_fp16 = layer_norm(axes = var_1562_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1562_cast_fp16")]; + tensor var_1569 = const()[name = string("op_1569"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1570 = transpose(perm = var_1569, x = var_1562_cast_fp16)[name = string("transpose_9")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1570)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1592_axes_0 = const()[name = string("op_1592_axes_0"), val = tensor([2])]; + tensor var_1592 = squeeze(axes = var_1592_axes_0, x = hidden_states_55)[name = string("op_1592")]; + tensor var_1593 = const()[name = string("op_1593"), val = tensor([0, 2, 1])]; + tensor var_1594 = transpose(perm = var_1593, x = var_1592)[name = string("transpose_8")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1594)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1602_axes_0 = const()[name = string("op_1602_axes_0"), val = tensor([-1])]; + tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1602_cast_fp16 = layer_norm(axes = var_1602_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1602_cast_fp16")]; + tensor var_1605 = const()[name = string("op_1605"), val = tensor([0, 2, 1])]; + tensor var_1607_axes_0 = const()[name = string("op_1607_axes_0"), val = tensor([2])]; + tensor var_1606 = transpose(perm = var_1605, x = var_1602_cast_fp16)[name = string("transpose_7")]; + tensor var_1607 = expand_dims(axes = var_1607_axes_0, x = var_1606)[name = string("op_1607")]; + string var_1614_pad_type_0 = const()[name = string("op_1614_pad_type_0"), val = string("valid")]; + tensor var_1614_strides_0 = const()[name = string("op_1614_strides_0"), val = tensor([1, 1])]; + tensor var_1614_pad_0 = const()[name = string("op_1614_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1614_dilations_0 = const()[name = string("op_1614_dilations_0"), val = tensor([1, 1])]; + int32 var_1614_groups_0 = const()[name = string("op_1614_groups_0"), val = int32(1)]; + tensor var_1614 = conv(dilations = var_1614_dilations_0, groups = var_1614_groups_0, pad = var_1614_pad_0, pad_type = var_1614_pad_type_0, strides = var_1614_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_1607)[name = string("op_1614")]; + tensor var_1615 = const()[name = string("op_1615"), val = tensor([1, 24, 1, 128])]; + tensor var_1616 = reshape(shape = var_1615, x = var_1614)[name = string("op_1616")]; + string var_1623_pad_type_0 = const()[name = string("op_1623_pad_type_0"), val = string("valid")]; + tensor var_1623_strides_0 = const()[name = string("op_1623_strides_0"), val = tensor([1, 1])]; + tensor var_1623_pad_0 = const()[name = string("op_1623_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1623_dilations_0 = const()[name = string("op_1623_dilations_0"), val = tensor([1, 1])]; + int32 var_1623_groups_0 = const()[name = string("op_1623_groups_0"), val = int32(1)]; + tensor var_1623 = conv(dilations = var_1623_dilations_0, groups = var_1623_groups_0, pad = var_1623_pad_0, pad_type = var_1623_pad_type_0, strides = var_1623_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_1607)[name = string("op_1623")]; + tensor var_1624 = const()[name = string("op_1624"), val = tensor([1, 8, 1, 128])]; + tensor var_1625 = reshape(shape = var_1624, x = var_1623)[name = string("op_1625")]; + string var_1632_pad_type_0 = const()[name = string("op_1632_pad_type_0"), val = string("valid")]; + tensor var_1632_strides_0 = const()[name = string("op_1632_strides_0"), val = tensor([1, 1])]; + tensor var_1632_pad_0 = const()[name = string("op_1632_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1632_dilations_0 = const()[name = string("op_1632_dilations_0"), val = tensor([1, 1])]; + int32 var_1632_groups_0 = const()[name = string("op_1632_groups_0"), val = int32(1)]; + tensor var_1632 = conv(dilations = var_1632_dilations_0, groups = var_1632_groups_0, pad = var_1632_pad_0, pad_type = var_1632_pad_type_0, strides = var_1632_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_1607)[name = string("op_1632")]; + tensor var_1633 = const()[name = string("op_1633"), val = tensor([1, 8, 1, 128])]; + tensor var_1634 = reshape(shape = var_1633, x = var_1632)[name = string("op_1634")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1616)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1616)[name = string("x2_29")]; + tensor var_1648_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1648_cast_fp16")]; + tensor var_1649_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1649_cast_fp16")]; + tensor var_1650_cast_fp16 = sub(x = var_1648_cast_fp16, y = var_1649_cast_fp16)[name = string("op_1650_cast_fp16")]; + tensor var_1651_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1651_cast_fp16")]; + tensor var_1652_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1652_cast_fp16")]; + tensor var_1653_cast_fp16 = add(x = var_1651_cast_fp16, y = var_1652_cast_fp16)[name = string("op_1653_cast_fp16")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29_cast_fp16 = concat(axis = var_59, interleave = rotated_29_interleave_0, values = (var_1650_cast_fp16, var_1653_cast_fp16))[name = string("rotated_29_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1625)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1625)[name = string("x2_31")]; + tensor var_1669_cast_fp16 = mul(x = x1_31, y = cos_3_cast_fp16)[name = string("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = mul(x = x2_31, y = sin_3_cast_fp16)[name = string("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = sub(x = var_1669_cast_fp16, y = var_1670_cast_fp16)[name = string("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = mul(x = x2_31, y = cos_3_cast_fp16)[name = string("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = mul(x = x1_31, y = sin_3_cast_fp16)[name = string("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = add(x = var_1672_cast_fp16, y = var_1673_cast_fp16)[name = string("op_1674_cast_fp16")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31_cast_fp16 = concat(axis = var_59, interleave = rotated_31_interleave_0, values = (var_1671_cast_fp16, var_1674_cast_fp16))[name = string("rotated_31_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([25])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([26])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_384, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31_cast_fp16, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([53])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([54])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_384, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1634, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_1694_begin_0 = const()[name = string("op_1694_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_1694_end_0 = const()[name = string("op_1694_end_0"), val = tensor([26, 8, 1024, 128])]; + tensor var_1694_end_mask_0 = const()[name = string("op_1694_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1694_cast_fp16 = slice_by_index(begin = var_1694_begin_0, end = var_1694_end_0, end_mask = var_1694_end_mask_0, x = coreml_update_state_33)[name = string("op_1694_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1694_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1696_begin_0 = const()[name = string("op_1696_begin_0"), val = tensor([53, 0, 0, 0])]; + tensor var_1696_end_0 = const()[name = string("op_1696_end_0"), val = tensor([54, 8, 1024, 128])]; + tensor var_1696_end_mask_0 = const()[name = string("op_1696_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1696_cast_fp16 = slice_by_index(begin = var_1696_begin_0, end = var_1696_end_0, end_mask = var_1696_end_mask_0, x = coreml_update_state_33)[name = string("op_1696_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1696_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1705 = const()[name = string("op_1705"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1705, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1709 = const()[name = string("op_1709"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_31_cast_fp16 = reshape(shape = var_1709, x = x_209_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1712 = const()[name = string("op_1712"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1712, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1716 = const()[name = string("op_1716"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_31_cast_fp16 = reshape(shape = var_1716, x = x_215_cast_fp16)[name = string("value_states_31_cast_fp16")]; + bool var_1719_transpose_x_1 = const()[name = string("op_1719_transpose_x_1"), val = bool(false)]; + bool var_1719_transpose_y_1 = const()[name = string("op_1719_transpose_y_1"), val = bool(true)]; + tensor var_1719_cast_fp16 = matmul(transpose_x = var_1719_transpose_x_1, transpose_y = var_1719_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_31_cast_fp16)[name = string("op_1719_cast_fp16")]; + fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_1719_cast_fp16, y = var_1720_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1731_axes_0 = const()[name = string("op_1731_axes_0"), val = tensor([-1])]; + bool var_1731_keep_dims_0 = const()[name = string("op_1731_keep_dims_0"), val = bool(true)]; + tensor var_1731_cast_fp16 = reduce_sum(axes = var_1731_axes_0, keep_dims = var_1731_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1731_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1731_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_31_cast_fp16, y = value_states_31_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_1734_perm_0 = const()[name = string("op_1734_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1736 = const()[name = string("op_1736"), val = tensor([1, 1, 3072])]; + tensor var_1734_cast_fp16 = transpose(perm = var_1734_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_6")]; + tensor input_103_cast_fp16 = reshape(shape = var_1736, x = var_1734_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1747_axes_0 = const()[name = string("op_1747_axes_0"), val = tensor([-1])]; + tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1747_cast_fp16 = layer_norm(axes = var_1747_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1747_cast_fp16")]; + tensor var_1754 = const()[name = string("op_1754"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1755 = transpose(perm = var_1754, x = var_1747_cast_fp16)[name = string("transpose_5")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1755)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1777_axes_0 = const()[name = string("op_1777_axes_0"), val = tensor([2])]; + tensor var_1777 = squeeze(axes = var_1777_axes_0, x = hidden_states_63)[name = string("op_1777")]; + tensor var_1778 = const()[name = string("op_1778"), val = tensor([0, 2, 1])]; + tensor var_1779 = transpose(perm = var_1778, x = var_1777)[name = string("transpose_4")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1779)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1787_axes_0 = const()[name = string("op_1787_axes_0"), val = tensor([-1])]; + tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1787_cast_fp16 = layer_norm(axes = var_1787_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1787_cast_fp16")]; + tensor var_1790 = const()[name = string("op_1790"), val = tensor([0, 2, 1])]; + tensor var_1792_axes_0 = const()[name = string("op_1792_axes_0"), val = tensor([2])]; + tensor var_1791 = transpose(perm = var_1790, x = var_1787_cast_fp16)[name = string("transpose_3")]; + tensor var_1792 = expand_dims(axes = var_1792_axes_0, x = var_1791)[name = string("op_1792")]; + string var_1799_pad_type_0 = const()[name = string("op_1799_pad_type_0"), val = string("valid")]; + tensor var_1799_strides_0 = const()[name = string("op_1799_strides_0"), val = tensor([1, 1])]; + tensor var_1799_pad_0 = const()[name = string("op_1799_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1799_dilations_0 = const()[name = string("op_1799_dilations_0"), val = tensor([1, 1])]; + int32 var_1799_groups_0 = const()[name = string("op_1799_groups_0"), val = int32(1)]; + tensor var_1799 = conv(dilations = var_1799_dilations_0, groups = var_1799_groups_0, pad = var_1799_pad_0, pad_type = var_1799_pad_type_0, strides = var_1799_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_1792)[name = string("op_1799")]; + tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 24, 1, 128])]; + tensor var_1801 = reshape(shape = var_1800, x = var_1799)[name = string("op_1801")]; + string var_1808_pad_type_0 = const()[name = string("op_1808_pad_type_0"), val = string("valid")]; + tensor var_1808_strides_0 = const()[name = string("op_1808_strides_0"), val = tensor([1, 1])]; + tensor var_1808_pad_0 = const()[name = string("op_1808_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1808_dilations_0 = const()[name = string("op_1808_dilations_0"), val = tensor([1, 1])]; + int32 var_1808_groups_0 = const()[name = string("op_1808_groups_0"), val = int32(1)]; + tensor var_1808 = conv(dilations = var_1808_dilations_0, groups = var_1808_groups_0, pad = var_1808_pad_0, pad_type = var_1808_pad_type_0, strides = var_1808_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_1792)[name = string("op_1808")]; + tensor var_1809 = const()[name = string("op_1809"), val = tensor([1, 8, 1, 128])]; + tensor var_1810 = reshape(shape = var_1809, x = var_1808)[name = string("op_1810")]; + string var_1817_pad_type_0 = const()[name = string("op_1817_pad_type_0"), val = string("valid")]; + tensor var_1817_strides_0 = const()[name = string("op_1817_strides_0"), val = tensor([1, 1])]; + tensor var_1817_pad_0 = const()[name = string("op_1817_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1817_dilations_0 = const()[name = string("op_1817_dilations_0"), val = tensor([1, 1])]; + int32 var_1817_groups_0 = const()[name = string("op_1817_groups_0"), val = int32(1)]; + tensor var_1817 = conv(dilations = var_1817_dilations_0, groups = var_1817_groups_0, pad = var_1817_pad_0, pad_type = var_1817_pad_type_0, strides = var_1817_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_1792)[name = string("op_1817")]; + tensor var_1818 = const()[name = string("op_1818"), val = tensor([1, 8, 1, 128])]; + tensor var_1819 = reshape(shape = var_1818, x = var_1817)[name = string("op_1819")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1801)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1801)[name = string("x2_33")]; + tensor var_1833_cast_fp16 = mul(x = x1_33, y = cos_3_cast_fp16)[name = string("op_1833_cast_fp16")]; + tensor var_1834_cast_fp16 = mul(x = x2_33, y = sin_3_cast_fp16)[name = string("op_1834_cast_fp16")]; + tensor var_1835_cast_fp16 = sub(x = var_1833_cast_fp16, y = var_1834_cast_fp16)[name = string("op_1835_cast_fp16")]; + tensor var_1836_cast_fp16 = mul(x = x2_33, y = cos_3_cast_fp16)[name = string("op_1836_cast_fp16")]; + tensor var_1837_cast_fp16 = mul(x = x1_33, y = sin_3_cast_fp16)[name = string("op_1837_cast_fp16")]; + tensor var_1838_cast_fp16 = add(x = var_1836_cast_fp16, y = var_1837_cast_fp16)[name = string("op_1838_cast_fp16")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33_cast_fp16 = concat(axis = var_59, interleave = rotated_33_interleave_0, values = (var_1835_cast_fp16, var_1838_cast_fp16))[name = string("rotated_33_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1810)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1810)[name = string("x2")]; + tensor var_1854_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1854_cast_fp16")]; + tensor var_1855_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor var_1856_cast_fp16 = sub(x = var_1854_cast_fp16, y = var_1855_cast_fp16)[name = string("op_1856_cast_fp16")]; + tensor var_1857_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1857_cast_fp16")]; + tensor var_1858_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1858_cast_fp16")]; + tensor var_1859_cast_fp16 = add(x = var_1857_cast_fp16, y = var_1858_cast_fp16)[name = string("op_1859_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_59, interleave = rotated_interleave_0, values = (var_1856_cast_fp16, var_1859_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([26])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([27])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; + tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; + tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; + int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; + bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; + tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_384, concat_67_values3_0))[name = string("concat_67")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated_cast_fp16, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([54])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([55])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_384, concat_71_values3_0))[name = string("concat_71")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_1819, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_1879_begin_0 = const()[name = string("op_1879_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_1879_end_0 = const()[name = string("op_1879_end_0"), val = tensor([27, 8, 1024, 128])]; + tensor var_1879_end_mask_0 = const()[name = string("op_1879_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1879_cast_fp16 = slice_by_index(begin = var_1879_begin_0, end = var_1879_end_0, end_mask = var_1879_end_mask_0, x = coreml_update_state_35)[name = string("op_1879_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1879_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1881_begin_0 = const()[name = string("op_1881_begin_0"), val = tensor([54, 0, 0, 0])]; + tensor var_1881_end_0 = const()[name = string("op_1881_end_0"), val = tensor([55, 8, 1024, 128])]; + tensor var_1881_end_mask_0 = const()[name = string("op_1881_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1881_cast_fp16 = slice_by_index(begin = var_1881_begin_0, end = var_1881_end_0, end_mask = var_1881_end_mask_0, x = coreml_update_state_35)[name = string("op_1881_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1881_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_1890 = const()[name = string("op_1890"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_1890, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_1894 = const()[name = string("op_1894"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_1894, x = x_237_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_1897 = const()[name = string("op_1897"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_1897, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_1901 = const()[name = string("op_1901"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_cast_fp16 = reshape(shape = var_1901, x = x_243_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_1904_transpose_x_1 = const()[name = string("op_1904_transpose_x_1"), val = bool(false)]; + bool var_1904_transpose_y_1 = const()[name = string("op_1904_transpose_y_1"), val = bool(true)]; + tensor var_1904_cast_fp16 = matmul(transpose_x = var_1904_transpose_x_1, transpose_y = var_1904_transpose_y_1, x = rotated_33_cast_fp16, y = key_states_cast_fp16)[name = string("op_1904_cast_fp16")]; + fp16 var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_1904_cast_fp16, y = var_1905_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1916_axes_0 = const()[name = string("op_1916_axes_0"), val = tensor([-1])]; + bool var_1916_keep_dims_0 = const()[name = string("op_1916_keep_dims_0"), val = bool(true)]; + tensor var_1916_cast_fp16 = reduce_sum(axes = var_1916_axes_0, keep_dims = var_1916_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1916_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1916_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_1919_perm_0 = const()[name = string("op_1919_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1921 = const()[name = string("op_1921"), val = tensor([1, 1, 3072])]; + tensor var_1919_cast_fp16 = transpose(perm = var_1919_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_1921, x = var_1919_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_1932_axes_0 = const()[name = string("op_1932_axes_0"), val = tensor([-1])]; + tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_1932_cast_fp16 = layer_norm(axes = var_1932_axes_0, epsilon = var_54_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_1932_cast_fp16")]; + tensor var_1939 = const()[name = string("op_1939"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_1940 = transpose(perm = var_1939, x = var_1932_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_1940)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor([2])]; + tensor var_1962 = squeeze(axes = var_1962_axes_0, x = hidden_states_1)[name = string("op_1962")]; + tensor var_1963 = const()[name = string("op_1963"), val = tensor([0, 2, 1])]; + tensor var_1964 = transpose(perm = var_1963, x = var_1962)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_1964)[name = string("op_1965_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; + int32 var_54 = const()[name = string("op_54"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_296_axis_0 = const()[name = string("op_296_axis_0"), val = int32(1)]; + int32 var_296_batch_dims_0 = const()[name = string("op_296_batch_dims_0"), val = int32(0)]; + bool var_296_validate_indices_0 = const()[name = string("op_296_validate_indices_0"), val = bool(false)]; + tensor var_65_to_fp16 = const()[name = string("op_65_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_296_cast_fp16 = gather(axis = var_296_axis_0, batch_dims = var_296_batch_dims_0, indices = select_0, validate_indices = var_296_validate_indices_0, x = var_65_to_fp16)[name = string("op_296_cast_fp16")]; + tensor var_297 = const()[name = string("op_297"), val = tensor([1, 64, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_297, x = var_296_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + int32 var_301_batch_dims_0 = const()[name = string("op_301_batch_dims_0"), val = int32(0)]; + bool var_301_validate_indices_0 = const()[name = string("op_301_validate_indices_0"), val = bool(false)]; + tensor var_60_to_fp16 = const()[name = string("op_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_301_cast_fp16 = gather(axis = var_301_axis_0, batch_dims = var_301_batch_dims_0, indices = select_0, validate_indices = var_301_validate_indices_0, x = var_60_to_fp16)[name = string("op_301_cast_fp16")]; + tensor var_302 = const()[name = string("op_302"), val = tensor([1, 64, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_302, x = var_301_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_312_axes_0 = const()[name = string("op_312_axes_0"), val = tensor([-1])]; + tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_56_to_fp16 = const()[name = string("op_56_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_312_cast_fp16 = layer_norm(axes = var_312_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_312_cast_fp16")]; + tensor var_316 = const()[name = string("op_316"), val = tensor([0, 2, 1])]; + tensor var_318_axes_0 = const()[name = string("op_318_axes_0"), val = tensor([2])]; + tensor var_317 = transpose(perm = var_316, x = var_312_cast_fp16)[name = string("transpose_64")]; + tensor var_318 = expand_dims(axes = var_318_axes_0, x = var_317)[name = string("op_318")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_318)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_318)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_318)[name = string("value_states_1")]; + tensor var_338 = const()[name = string("op_338"), val = tensor([1, 24, 128, 64])]; + tensor var_339 = reshape(shape = var_338, x = query_states_1)[name = string("op_339")]; + tensor var_340 = const()[name = string("op_340"), val = tensor([0, 1, 3, 2])]; + tensor var_342 = const()[name = string("op_342"), val = tensor([1, 8, 128, 64])]; + tensor var_343 = reshape(shape = var_342, x = key_states_1)[name = string("op_343")]; + tensor var_344 = const()[name = string("op_344"), val = tensor([0, 1, 3, 2])]; + tensor var_346 = const()[name = string("op_346"), val = tensor([1, 8, 128, 64])]; + tensor var_347 = reshape(shape = var_346, x = value_states_1)[name = string("op_347")]; + tensor var_348 = const()[name = string("op_348"), val = tensor([0, 1, 3, 2])]; + tensor var_350 = const()[name = string("op_350"), val = tensor([0, 2, 1, 3])]; + tensor var_352 = const()[name = string("op_352"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_340, x = var_339)[name = string("transpose_63")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_350, x = cos_1_cast_fp16)[name = string("transpose_62")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_352, x = sin_1_cast_fp16)[name = string("transpose_61")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_366 = mul(x = x1_1, y = cos_7)[name = string("op_366")]; + tensor var_367 = mul(x = x2_1, y = sin_7)[name = string("op_367")]; + tensor var_368 = sub(x = var_366, y = var_367)[name = string("op_368")]; + tensor var_369 = mul(x = x2_1, y = cos_7)[name = string("op_369")]; + tensor var_370 = mul(x = x1_1, y = sin_7)[name = string("op_370")]; + tensor var_371 = add(x = var_369, y = var_370)[name = string("op_371")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_54, interleave = rotated_1_interleave_0, values = (var_368, var_371))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_344, x = var_343)[name = string("transpose_60")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_387 = mul(x = x1_3, y = cos_7)[name = string("op_387")]; + tensor var_388 = mul(x = x2_3, y = sin_7)[name = string("op_388")]; + tensor var_389 = sub(x = var_387, y = var_388)[name = string("op_389")]; + tensor var_390 = mul(x = x2_3, y = cos_7)[name = string("op_390")]; + tensor var_391 = mul(x = x1_3, y = sin_7)[name = string("op_391")]; + tensor var_392 = add(x = var_390, y = var_391)[name = string("op_392")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_54, interleave = rotated_3_interleave_0, values = (var_389, var_392))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_401 = add(x = current_pos, y = seq_length_1)[name = string("op_401")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([18])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([19])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_401, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([46])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([47])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_401, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_348, x = var_347)[name = string("transpose_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_415_begin_0 = const()[name = string("op_415_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_415_end_0 = const()[name = string("op_415_end_0"), val = tensor([19, 8, 1024, 128])]; + tensor var_415_end_mask_0 = const()[name = string("op_415_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_415_cast_fp16 = slice_by_index(begin = var_415_begin_0, end = var_415_end_0, end_mask = var_415_end_mask_0, x = coreml_update_state_19)[name = string("op_415_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_415_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_417_begin_0 = const()[name = string("op_417_begin_0"), val = tensor([46, 0, 0, 0])]; + tensor var_417_end_0 = const()[name = string("op_417_end_0"), val = tensor([47, 8, 1024, 128])]; + tensor var_417_end_mask_0 = const()[name = string("op_417_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_417_cast_fp16 = slice_by_index(begin = var_417_begin_0, end = var_417_end_0, end_mask = var_417_end_mask_0, x = coreml_update_state_19)[name = string("op_417_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_417_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_426 = const()[name = string("op_426"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_426, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_430 = const()[name = string("op_430"), val = tensor([1, -1, 1024, 128])]; + tensor var_431_cast_fp16 = reshape(shape = var_430, x = x_13_cast_fp16)[name = string("op_431_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_433 = const()[name = string("op_433"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_433, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_440_transpose_x_0 = const()[name = string("op_440_transpose_x_0"), val = bool(false)]; + bool var_440_transpose_y_0 = const()[name = string("op_440_transpose_y_0"), val = bool(true)]; + tensor var_440_cast_fp16 = matmul(transpose_x = var_440_transpose_x_0, transpose_y = var_440_transpose_y_0, x = rotated_1, y = var_431_cast_fp16)[name = string("op_440_cast_fp16")]; + fp16 var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_440_cast_fp16, y = var_441_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_452_axes_0 = const()[name = string("op_452_axes_0"), val = tensor([-1])]; + bool var_452_keep_dims_0 = const()[name = string("op_452_keep_dims_0"), val = bool(true)]; + tensor var_452_cast_fp16 = reduce_sum(axes = var_452_axes_0, keep_dims = var_452_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_452_cast_fp16")]; + tensor var_453_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_452_cast_fp16)[name = string("op_453_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([24, 64, 1024])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_453_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([24, 1024, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 24, 64, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_456_perm_0 = const()[name = string("op_456_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_458 = const()[name = string("op_458"), val = tensor([1, 64, 3072])]; + tensor var_456_cast_fp16 = transpose(perm = var_456_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_58")]; + tensor input_5_cast_fp16 = reshape(shape = var_458, x = var_456_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_469_axes_0 = const()[name = string("op_469_axes_0"), val = tensor([-1])]; + tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_469_cast_fp16 = layer_norm(axes = var_469_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_469_cast_fp16")]; + tensor var_476 = const()[name = string("op_476"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_477 = transpose(perm = var_476, x = var_469_cast_fp16)[name = string("transpose_57")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_477)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_499_axes_0 = const()[name = string("op_499_axes_0"), val = tensor([2])]; + tensor var_499 = squeeze(axes = var_499_axes_0, x = hidden_states_7)[name = string("op_499")]; + tensor var_500 = const()[name = string("op_500"), val = tensor([0, 2, 1])]; + tensor var_501 = transpose(perm = var_500, x = var_499)[name = string("transpose_56")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_501)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_509_axes_0 = const()[name = string("op_509_axes_0"), val = tensor([-1])]; + tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_509_cast_fp16 = layer_norm(axes = var_509_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_509_cast_fp16")]; + tensor var_513 = const()[name = string("op_513"), val = tensor([0, 2, 1])]; + tensor var_515_axes_0 = const()[name = string("op_515_axes_0"), val = tensor([2])]; + tensor var_514 = transpose(perm = var_513, x = var_509_cast_fp16)[name = string("transpose_55")]; + tensor var_515 = expand_dims(axes = var_515_axes_0, x = var_514)[name = string("op_515")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_515)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_515)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_515)[name = string("value_states_7")]; + tensor var_535 = const()[name = string("op_535"), val = tensor([1, 24, 128, 64])]; + tensor var_536 = reshape(shape = var_535, x = query_states_5)[name = string("op_536")]; + tensor var_537 = const()[name = string("op_537"), val = tensor([0, 1, 3, 2])]; + tensor var_539 = const()[name = string("op_539"), val = tensor([1, 8, 128, 64])]; + tensor var_540 = reshape(shape = var_539, x = key_states_7)[name = string("op_540")]; + tensor var_541 = const()[name = string("op_541"), val = tensor([0, 1, 3, 2])]; + tensor var_543 = const()[name = string("op_543"), val = tensor([1, 8, 128, 64])]; + tensor var_544 = reshape(shape = var_543, x = value_states_7)[name = string("op_544")]; + tensor var_545 = const()[name = string("op_545"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_537, x = var_536)[name = string("transpose_54")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_563 = mul(x = x1_5, y = cos_7)[name = string("op_563")]; + tensor var_564 = mul(x = x2_5, y = sin_7)[name = string("op_564")]; + tensor var_565 = sub(x = var_563, y = var_564)[name = string("op_565")]; + tensor var_566 = mul(x = x2_5, y = cos_7)[name = string("op_566")]; + tensor var_567 = mul(x = x1_5, y = sin_7)[name = string("op_567")]; + tensor var_568 = add(x = var_566, y = var_567)[name = string("op_568")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_54, interleave = rotated_5_interleave_0, values = (var_565, var_568))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_541, x = var_540)[name = string("transpose_53")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_584 = mul(x = x1_7, y = cos_7)[name = string("op_584")]; + tensor var_585 = mul(x = x2_7, y = sin_7)[name = string("op_585")]; + tensor var_586 = sub(x = var_584, y = var_585)[name = string("op_586")]; + tensor var_587 = mul(x = x2_7, y = cos_7)[name = string("op_587")]; + tensor var_588 = mul(x = x1_7, y = sin_7)[name = string("op_588")]; + tensor var_589 = add(x = var_587, y = var_588)[name = string("op_589")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_54, interleave = rotated_7_interleave_0, values = (var_586, var_589))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([19])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([20])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_401, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([47])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([48])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_401, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_545, x = var_544)[name = string("transpose_52")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_612_begin_0 = const()[name = string("op_612_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_612_end_0 = const()[name = string("op_612_end_0"), val = tensor([20, 8, 1024, 128])]; + tensor var_612_end_mask_0 = const()[name = string("op_612_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_612_cast_fp16 = slice_by_index(begin = var_612_begin_0, end = var_612_end_0, end_mask = var_612_end_mask_0, x = coreml_update_state_21)[name = string("op_612_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_612_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_614_begin_0 = const()[name = string("op_614_begin_0"), val = tensor([47, 0, 0, 0])]; + tensor var_614_end_0 = const()[name = string("op_614_end_0"), val = tensor([48, 8, 1024, 128])]; + tensor var_614_end_mask_0 = const()[name = string("op_614_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_614_cast_fp16 = slice_by_index(begin = var_614_begin_0, end = var_614_end_0, end_mask = var_614_end_mask_0, x = coreml_update_state_21)[name = string("op_614_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_614_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_623 = const()[name = string("op_623"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_623, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_627 = const()[name = string("op_627"), val = tensor([1, -1, 1024, 128])]; + tensor var_628_cast_fp16 = reshape(shape = var_627, x = x_41_cast_fp16)[name = string("op_628_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_630 = const()[name = string("op_630"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_630, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_637_transpose_x_0 = const()[name = string("op_637_transpose_x_0"), val = bool(false)]; + bool var_637_transpose_y_0 = const()[name = string("op_637_transpose_y_0"), val = bool(true)]; + tensor var_637_cast_fp16 = matmul(transpose_x = var_637_transpose_x_0, transpose_y = var_637_transpose_y_0, x = rotated_5, y = var_628_cast_fp16)[name = string("op_637_cast_fp16")]; + fp16 var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_637_cast_fp16, y = var_638_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_649_axes_0 = const()[name = string("op_649_axes_0"), val = tensor([-1])]; + bool var_649_keep_dims_0 = const()[name = string("op_649_keep_dims_0"), val = bool(true)]; + tensor var_649_cast_fp16 = reduce_sum(axes = var_649_axes_0, keep_dims = var_649_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_649_cast_fp16")]; + tensor var_650_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_649_cast_fp16)[name = string("op_650_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 64, 1024])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_650_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([24, 1024, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 24, 64, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_653_perm_0 = const()[name = string("op_653_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_655 = const()[name = string("op_655"), val = tensor([1, 64, 3072])]; + tensor var_653_cast_fp16 = transpose(perm = var_653_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_51")]; + tensor input_19_cast_fp16 = reshape(shape = var_655, x = var_653_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_666_axes_0 = const()[name = string("op_666_axes_0"), val = tensor([-1])]; + tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_666_cast_fp16 = layer_norm(axes = var_666_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor var_673 = const()[name = string("op_673"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_674 = transpose(perm = var_673, x = var_666_cast_fp16)[name = string("transpose_50")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_674)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_696_axes_0 = const()[name = string("op_696_axes_0"), val = tensor([2])]; + tensor var_696 = squeeze(axes = var_696_axes_0, x = hidden_states_15)[name = string("op_696")]; + tensor var_697 = const()[name = string("op_697"), val = tensor([0, 2, 1])]; + tensor var_698 = transpose(perm = var_697, x = var_696)[name = string("transpose_49")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_698)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_706_axes_0 = const()[name = string("op_706_axes_0"), val = tensor([-1])]; + tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_706_cast_fp16 = layer_norm(axes = var_706_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_706_cast_fp16")]; + tensor var_710 = const()[name = string("op_710"), val = tensor([0, 2, 1])]; + tensor var_712_axes_0 = const()[name = string("op_712_axes_0"), val = tensor([2])]; + tensor var_711 = transpose(perm = var_710, x = var_706_cast_fp16)[name = string("transpose_48")]; + tensor var_712 = expand_dims(axes = var_712_axes_0, x = var_711)[name = string("op_712")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_712)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_712)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_712)[name = string("value_states_13")]; + tensor var_732 = const()[name = string("op_732"), val = tensor([1, 24, 128, 64])]; + tensor var_733 = reshape(shape = var_732, x = query_states_9)[name = string("op_733")]; + tensor var_734 = const()[name = string("op_734"), val = tensor([0, 1, 3, 2])]; + tensor var_736 = const()[name = string("op_736"), val = tensor([1, 8, 128, 64])]; + tensor var_737 = reshape(shape = var_736, x = key_states_13)[name = string("op_737")]; + tensor var_738 = const()[name = string("op_738"), val = tensor([0, 1, 3, 2])]; + tensor var_740 = const()[name = string("op_740"), val = tensor([1, 8, 128, 64])]; + tensor var_741 = reshape(shape = var_740, x = value_states_13)[name = string("op_741")]; + tensor var_742 = const()[name = string("op_742"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_734, x = var_733)[name = string("transpose_47")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_760 = mul(x = x1_9, y = cos_7)[name = string("op_760")]; + tensor var_761 = mul(x = x2_9, y = sin_7)[name = string("op_761")]; + tensor var_762 = sub(x = var_760, y = var_761)[name = string("op_762")]; + tensor var_763 = mul(x = x2_9, y = cos_7)[name = string("op_763")]; + tensor var_764 = mul(x = x1_9, y = sin_7)[name = string("op_764")]; + tensor var_765 = add(x = var_763, y = var_764)[name = string("op_765")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_54, interleave = rotated_9_interleave_0, values = (var_762, var_765))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_738, x = var_737)[name = string("transpose_46")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_781 = mul(x = x1_11, y = cos_7)[name = string("op_781")]; + tensor var_782 = mul(x = x2_11, y = sin_7)[name = string("op_782")]; + tensor var_783 = sub(x = var_781, y = var_782)[name = string("op_783")]; + tensor var_784 = mul(x = x2_11, y = cos_7)[name = string("op_784")]; + tensor var_785 = mul(x = x1_11, y = sin_7)[name = string("op_785")]; + tensor var_786 = add(x = var_784, y = var_785)[name = string("op_786")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_54, interleave = rotated_11_interleave_0, values = (var_783, var_786))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([20])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([21])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_401, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([48])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([49])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_401, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_742, x = var_741)[name = string("transpose_45")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_809_begin_0 = const()[name = string("op_809_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_809_end_0 = const()[name = string("op_809_end_0"), val = tensor([21, 8, 1024, 128])]; + tensor var_809_end_mask_0 = const()[name = string("op_809_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_809_cast_fp16 = slice_by_index(begin = var_809_begin_0, end = var_809_end_0, end_mask = var_809_end_mask_0, x = coreml_update_state_23)[name = string("op_809_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_809_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_811_begin_0 = const()[name = string("op_811_begin_0"), val = tensor([48, 0, 0, 0])]; + tensor var_811_end_0 = const()[name = string("op_811_end_0"), val = tensor([49, 8, 1024, 128])]; + tensor var_811_end_mask_0 = const()[name = string("op_811_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_811_cast_fp16 = slice_by_index(begin = var_811_begin_0, end = var_811_end_0, end_mask = var_811_end_mask_0, x = coreml_update_state_23)[name = string("op_811_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_811_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_820 = const()[name = string("op_820"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_820, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_824 = const()[name = string("op_824"), val = tensor([1, -1, 1024, 128])]; + tensor var_825_cast_fp16 = reshape(shape = var_824, x = x_69_cast_fp16)[name = string("op_825_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_827, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_834_transpose_x_0 = const()[name = string("op_834_transpose_x_0"), val = bool(false)]; + bool var_834_transpose_y_0 = const()[name = string("op_834_transpose_y_0"), val = bool(true)]; + tensor var_834_cast_fp16 = matmul(transpose_x = var_834_transpose_x_0, transpose_y = var_834_transpose_y_0, x = rotated_9, y = var_825_cast_fp16)[name = string("op_834_cast_fp16")]; + fp16 var_835_to_fp16 = const()[name = string("op_835_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_834_cast_fp16, y = var_835_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_846_axes_0 = const()[name = string("op_846_axes_0"), val = tensor([-1])]; + bool var_846_keep_dims_0 = const()[name = string("op_846_keep_dims_0"), val = bool(true)]; + tensor var_846_cast_fp16 = reduce_sum(axes = var_846_axes_0, keep_dims = var_846_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor var_847_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_846_cast_fp16)[name = string("op_847_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([24, 64, 1024])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_847_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([24, 1024, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 24, 64, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_850_perm_0 = const()[name = string("op_850_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_852 = const()[name = string("op_852"), val = tensor([1, 64, 3072])]; + tensor var_850_cast_fp16 = transpose(perm = var_850_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_44")]; + tensor input_33_cast_fp16 = reshape(shape = var_852, x = var_850_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_863_axes_0 = const()[name = string("op_863_axes_0"), val = tensor([-1])]; + tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_863_cast_fp16 = layer_norm(axes = var_863_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_863_cast_fp16")]; + tensor var_870 = const()[name = string("op_870"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_871 = transpose(perm = var_870, x = var_863_cast_fp16)[name = string("transpose_43")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_871)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_893_axes_0 = const()[name = string("op_893_axes_0"), val = tensor([2])]; + tensor var_893 = squeeze(axes = var_893_axes_0, x = hidden_states_23)[name = string("op_893")]; + tensor var_894 = const()[name = string("op_894"), val = tensor([0, 2, 1])]; + tensor var_895 = transpose(perm = var_894, x = var_893)[name = string("transpose_42")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_895)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_903_axes_0 = const()[name = string("op_903_axes_0"), val = tensor([-1])]; + tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_903_cast_fp16 = layer_norm(axes = var_903_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_903_cast_fp16")]; + tensor var_907 = const()[name = string("op_907"), val = tensor([0, 2, 1])]; + tensor var_909_axes_0 = const()[name = string("op_909_axes_0"), val = tensor([2])]; + tensor var_908 = transpose(perm = var_907, x = var_903_cast_fp16)[name = string("transpose_41")]; + tensor var_909 = expand_dims(axes = var_909_axes_0, x = var_908)[name = string("op_909")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_909)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_909)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_909)[name = string("value_states_19")]; + tensor var_929 = const()[name = string("op_929"), val = tensor([1, 24, 128, 64])]; + tensor var_930 = reshape(shape = var_929, x = query_states_13)[name = string("op_930")]; + tensor var_931 = const()[name = string("op_931"), val = tensor([0, 1, 3, 2])]; + tensor var_933 = const()[name = string("op_933"), val = tensor([1, 8, 128, 64])]; + tensor var_934 = reshape(shape = var_933, x = key_states_19)[name = string("op_934")]; + tensor var_935 = const()[name = string("op_935"), val = tensor([0, 1, 3, 2])]; + tensor var_937 = const()[name = string("op_937"), val = tensor([1, 8, 128, 64])]; + tensor var_938 = reshape(shape = var_937, x = value_states_19)[name = string("op_938")]; + tensor var_939 = const()[name = string("op_939"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_931, x = var_930)[name = string("transpose_40")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_957 = mul(x = x1_13, y = cos_7)[name = string("op_957")]; + tensor var_958 = mul(x = x2_13, y = sin_7)[name = string("op_958")]; + tensor var_959 = sub(x = var_957, y = var_958)[name = string("op_959")]; + tensor var_960 = mul(x = x2_13, y = cos_7)[name = string("op_960")]; + tensor var_961 = mul(x = x1_13, y = sin_7)[name = string("op_961")]; + tensor var_962 = add(x = var_960, y = var_961)[name = string("op_962")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_54, interleave = rotated_13_interleave_0, values = (var_959, var_962))[name = string("rotated_13")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_935, x = var_934)[name = string("transpose_39")]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; + tensor var_978 = mul(x = x1_15, y = cos_7)[name = string("op_978")]; + tensor var_979 = mul(x = x2_15, y = sin_7)[name = string("op_979")]; + tensor var_980 = sub(x = var_978, y = var_979)[name = string("op_980")]; + tensor var_981 = mul(x = x2_15, y = cos_7)[name = string("op_981")]; + tensor var_982 = mul(x = x1_15, y = sin_7)[name = string("op_982")]; + tensor var_983 = add(x = var_981, y = var_982)[name = string("op_983")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15 = concat(axis = var_54, interleave = rotated_15_interleave_0, values = (var_980, var_983))[name = string("rotated_15")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([21])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([22])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_401, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([49])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([50])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_401, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_939, x = var_938)[name = string("transpose_38")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_1006_begin_0 = const()[name = string("op_1006_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_1006_end_0 = const()[name = string("op_1006_end_0"), val = tensor([22, 8, 1024, 128])]; + tensor var_1006_end_mask_0 = const()[name = string("op_1006_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1006_cast_fp16 = slice_by_index(begin = var_1006_begin_0, end = var_1006_end_0, end_mask = var_1006_end_mask_0, x = coreml_update_state_25)[name = string("op_1006_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_1006_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = string("op_1008_begin_0"), val = tensor([49, 0, 0, 0])]; + tensor var_1008_end_0 = const()[name = string("op_1008_end_0"), val = tensor([50, 8, 1024, 128])]; + tensor var_1008_end_mask_0 = const()[name = string("op_1008_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = coreml_update_state_25)[name = string("op_1008_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_1008_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_1017 = const()[name = string("op_1017"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_1017, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1021 = const()[name = string("op_1021"), val = tensor([1, -1, 1024, 128])]; + tensor var_1022_cast_fp16 = reshape(shape = var_1021, x = x_97_cast_fp16)[name = string("op_1022_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_1024 = const()[name = string("op_1024"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_1024, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_1031_transpose_x_0 = const()[name = string("op_1031_transpose_x_0"), val = bool(false)]; + bool var_1031_transpose_y_0 = const()[name = string("op_1031_transpose_y_0"), val = bool(true)]; + tensor var_1031_cast_fp16 = matmul(transpose_x = var_1031_transpose_x_0, transpose_y = var_1031_transpose_y_0, x = rotated_13, y = var_1022_cast_fp16)[name = string("op_1031_cast_fp16")]; + fp16 var_1032_to_fp16 = const()[name = string("op_1032_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_1031_cast_fp16, y = var_1032_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_1043_axes_0 = const()[name = string("op_1043_axes_0"), val = tensor([-1])]; + bool var_1043_keep_dims_0 = const()[name = string("op_1043_keep_dims_0"), val = bool(true)]; + tensor var_1043_cast_fp16 = reduce_sum(axes = var_1043_axes_0, keep_dims = var_1043_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1043_cast_fp16")]; + tensor var_1044_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1043_cast_fp16)[name = string("op_1044_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([24, 64, 1024])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1044_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([24, 1024, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 24, 64, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_1047_perm_0 = const()[name = string("op_1047_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1049 = const()[name = string("op_1049"), val = tensor([1, 64, 3072])]; + tensor var_1047_cast_fp16 = transpose(perm = var_1047_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_37")]; + tensor input_47_cast_fp16 = reshape(shape = var_1049, x = var_1047_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1060_axes_0 = const()[name = string("op_1060_axes_0"), val = tensor([-1])]; + tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1060_cast_fp16 = layer_norm(axes = var_1060_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1060_cast_fp16")]; + tensor var_1067 = const()[name = string("op_1067"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1068 = transpose(perm = var_1067, x = var_1060_cast_fp16)[name = string("transpose_36")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1068)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1090_axes_0 = const()[name = string("op_1090_axes_0"), val = tensor([2])]; + tensor var_1090 = squeeze(axes = var_1090_axes_0, x = hidden_states_31)[name = string("op_1090")]; + tensor var_1091 = const()[name = string("op_1091"), val = tensor([0, 2, 1])]; + tensor var_1092 = transpose(perm = var_1091, x = var_1090)[name = string("transpose_35")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1092)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1100_axes_0 = const()[name = string("op_1100_axes_0"), val = tensor([-1])]; + tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1100_cast_fp16 = layer_norm(axes = var_1100_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1100_cast_fp16")]; + tensor var_1104 = const()[name = string("op_1104"), val = tensor([0, 2, 1])]; + tensor var_1106_axes_0 = const()[name = string("op_1106_axes_0"), val = tensor([2])]; + tensor var_1105 = transpose(perm = var_1104, x = var_1100_cast_fp16)[name = string("transpose_34")]; + tensor var_1106 = expand_dims(axes = var_1106_axes_0, x = var_1105)[name = string("op_1106")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_1106)[name = string("query_states_17")]; + string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; + tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; + tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; + int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; + tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_1106)[name = string("key_states_25")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_1106)[name = string("value_states_25")]; + tensor var_1126 = const()[name = string("op_1126"), val = tensor([1, 24, 128, 64])]; + tensor var_1127 = reshape(shape = var_1126, x = query_states_17)[name = string("op_1127")]; + tensor var_1128 = const()[name = string("op_1128"), val = tensor([0, 1, 3, 2])]; + tensor var_1130 = const()[name = string("op_1130"), val = tensor([1, 8, 128, 64])]; + tensor var_1131 = reshape(shape = var_1130, x = key_states_25)[name = string("op_1131")]; + tensor var_1132 = const()[name = string("op_1132"), val = tensor([0, 1, 3, 2])]; + tensor var_1134 = const()[name = string("op_1134"), val = tensor([1, 8, 128, 64])]; + tensor var_1135 = reshape(shape = var_1134, x = value_states_25)[name = string("op_1135")]; + tensor var_1136 = const()[name = string("op_1136"), val = tensor([0, 1, 3, 2])]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_113 = transpose(perm = var_1128, x = var_1127)[name = string("transpose_33")]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; + tensor var_1154 = mul(x = x1_17, y = cos_7)[name = string("op_1154")]; + tensor var_1155 = mul(x = x2_17, y = sin_7)[name = string("op_1155")]; + tensor var_1156 = sub(x = var_1154, y = var_1155)[name = string("op_1156")]; + tensor var_1157 = mul(x = x2_17, y = cos_7)[name = string("op_1157")]; + tensor var_1158 = mul(x = x1_17, y = sin_7)[name = string("op_1158")]; + tensor var_1159 = add(x = var_1157, y = var_1158)[name = string("op_1159")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17 = concat(axis = var_54, interleave = rotated_17_interleave_0, values = (var_1156, var_1159))[name = string("rotated_17")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_117 = transpose(perm = var_1132, x = var_1131)[name = string("transpose_32")]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; + tensor var_1175 = mul(x = x1_19, y = cos_7)[name = string("op_1175")]; + tensor var_1176 = mul(x = x2_19, y = sin_7)[name = string("op_1176")]; + tensor var_1177 = sub(x = var_1175, y = var_1176)[name = string("op_1177")]; + tensor var_1178 = mul(x = x2_19, y = cos_7)[name = string("op_1178")]; + tensor var_1179 = mul(x = x1_19, y = sin_7)[name = string("op_1179")]; + tensor var_1180 = add(x = var_1178, y = var_1179)[name = string("op_1180")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19 = concat(axis = var_54, interleave = rotated_19_interleave_0, values = (var_1177, var_1180))[name = string("rotated_19")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([22])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([23])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_401, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([50])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([51])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_401, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_1136, x = var_1135)[name = string("transpose_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_1203_begin_0 = const()[name = string("op_1203_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_1203_end_0 = const()[name = string("op_1203_end_0"), val = tensor([23, 8, 1024, 128])]; + tensor var_1203_end_mask_0 = const()[name = string("op_1203_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1203_cast_fp16 = slice_by_index(begin = var_1203_begin_0, end = var_1203_end_0, end_mask = var_1203_end_mask_0, x = coreml_update_state_27)[name = string("op_1203_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1203_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1205_begin_0 = const()[name = string("op_1205_begin_0"), val = tensor([50, 0, 0, 0])]; + tensor var_1205_end_0 = const()[name = string("op_1205_end_0"), val = tensor([51, 8, 1024, 128])]; + tensor var_1205_end_mask_0 = const()[name = string("op_1205_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1205_cast_fp16 = slice_by_index(begin = var_1205_begin_0, end = var_1205_end_0, end_mask = var_1205_end_mask_0, x = coreml_update_state_27)[name = string("op_1205_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1205_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1214 = const()[name = string("op_1214"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1214, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1218 = const()[name = string("op_1218"), val = tensor([1, -1, 1024, 128])]; + tensor var_1219_cast_fp16 = reshape(shape = var_1218, x = x_125_cast_fp16)[name = string("op_1219_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1221 = const()[name = string("op_1221"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1221, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + bool var_1228_transpose_x_0 = const()[name = string("op_1228_transpose_x_0"), val = bool(false)]; + bool var_1228_transpose_y_0 = const()[name = string("op_1228_transpose_y_0"), val = bool(true)]; + tensor var_1228_cast_fp16 = matmul(transpose_x = var_1228_transpose_x_0, transpose_y = var_1228_transpose_y_0, x = rotated_17, y = var_1219_cast_fp16)[name = string("op_1228_cast_fp16")]; + fp16 var_1229_to_fp16 = const()[name = string("op_1229_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1240_axes_0 = const()[name = string("op_1240_axes_0"), val = tensor([-1])]; + bool var_1240_keep_dims_0 = const()[name = string("op_1240_keep_dims_0"), val = bool(true)]; + tensor var_1240_cast_fp16 = reduce_sum(axes = var_1240_axes_0, keep_dims = var_1240_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1240_cast_fp16")]; + tensor var_1241_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1240_cast_fp16)[name = string("op_1241_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([24, 64, 1024])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1241_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([24, 1024, 128])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 24, 64, 128])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_1244_perm_0 = const()[name = string("op_1244_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1246 = const()[name = string("op_1246"), val = tensor([1, 64, 3072])]; + tensor var_1244_cast_fp16 = transpose(perm = var_1244_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_30")]; + tensor input_61_cast_fp16 = reshape(shape = var_1246, x = var_1244_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1257_axes_0 = const()[name = string("op_1257_axes_0"), val = tensor([-1])]; + tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1257_cast_fp16 = layer_norm(axes = var_1257_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1257_cast_fp16")]; + tensor var_1264 = const()[name = string("op_1264"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1265 = transpose(perm = var_1264, x = var_1257_cast_fp16)[name = string("transpose_29")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1265)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1287_axes_0 = const()[name = string("op_1287_axes_0"), val = tensor([2])]; + tensor var_1287 = squeeze(axes = var_1287_axes_0, x = hidden_states_39)[name = string("op_1287")]; + tensor var_1288 = const()[name = string("op_1288"), val = tensor([0, 2, 1])]; + tensor var_1289 = transpose(perm = var_1288, x = var_1287)[name = string("transpose_28")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1289)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1297_axes_0 = const()[name = string("op_1297_axes_0"), val = tensor([-1])]; + tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1297_cast_fp16 = layer_norm(axes = var_1297_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1297_cast_fp16")]; + tensor var_1301 = const()[name = string("op_1301"), val = tensor([0, 2, 1])]; + tensor var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor([2])]; + tensor var_1302 = transpose(perm = var_1301, x = var_1297_cast_fp16)[name = string("transpose_27")]; + tensor var_1303 = expand_dims(axes = var_1303_axes_0, x = var_1302)[name = string("op_1303")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_1303)[name = string("query_states_21")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_1303)[name = string("key_states_31")]; + string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; + tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; + tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; + int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; + tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_1303)[name = string("value_states_31")]; + tensor var_1323 = const()[name = string("op_1323"), val = tensor([1, 24, 128, 64])]; + tensor var_1324 = reshape(shape = var_1323, x = query_states_21)[name = string("op_1324")]; + tensor var_1325 = const()[name = string("op_1325"), val = tensor([0, 1, 3, 2])]; + tensor var_1327 = const()[name = string("op_1327"), val = tensor([1, 8, 128, 64])]; + tensor var_1328 = reshape(shape = var_1327, x = key_states_31)[name = string("op_1328")]; + tensor var_1329 = const()[name = string("op_1329"), val = tensor([0, 1, 3, 2])]; + tensor var_1331 = const()[name = string("op_1331"), val = tensor([1, 8, 128, 64])]; + tensor var_1332 = reshape(shape = var_1331, x = value_states_31)[name = string("op_1332")]; + tensor var_1333 = const()[name = string("op_1333"), val = tensor([0, 1, 3, 2])]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_141 = transpose(perm = var_1325, x = var_1324)[name = string("transpose_26")]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; + tensor var_1351 = mul(x = x1_21, y = cos_7)[name = string("op_1351")]; + tensor var_1352 = mul(x = x2_21, y = sin_7)[name = string("op_1352")]; + tensor var_1353 = sub(x = var_1351, y = var_1352)[name = string("op_1353")]; + tensor var_1354 = mul(x = x2_21, y = cos_7)[name = string("op_1354")]; + tensor var_1355 = mul(x = x1_21, y = sin_7)[name = string("op_1355")]; + tensor var_1356 = add(x = var_1354, y = var_1355)[name = string("op_1356")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21 = concat(axis = var_54, interleave = rotated_21_interleave_0, values = (var_1353, var_1356))[name = string("rotated_21")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_145 = transpose(perm = var_1329, x = var_1328)[name = string("transpose_25")]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; + tensor var_1372 = mul(x = x1_23, y = cos_7)[name = string("op_1372")]; + tensor var_1373 = mul(x = x2_23, y = sin_7)[name = string("op_1373")]; + tensor var_1374 = sub(x = var_1372, y = var_1373)[name = string("op_1374")]; + tensor var_1375 = mul(x = x2_23, y = cos_7)[name = string("op_1375")]; + tensor var_1376 = mul(x = x1_23, y = sin_7)[name = string("op_1376")]; + tensor var_1377 = add(x = var_1375, y = var_1376)[name = string("op_1377")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23 = concat(axis = var_54, interleave = rotated_23_interleave_0, values = (var_1374, var_1377))[name = string("rotated_23")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([23])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([24])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_401, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([51])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([52])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_401, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_33 = transpose(perm = var_1333, x = var_1332)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; + tensor var_1400_begin_0 = const()[name = string("op_1400_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_1400_end_0 = const()[name = string("op_1400_end_0"), val = tensor([24, 8, 1024, 128])]; + tensor var_1400_end_mask_0 = const()[name = string("op_1400_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1400_cast_fp16 = slice_by_index(begin = var_1400_begin_0, end = var_1400_end_0, end_mask = var_1400_end_mask_0, x = coreml_update_state_29)[name = string("op_1400_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1400_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = string("op_1402_begin_0"), val = tensor([51, 0, 0, 0])]; + tensor var_1402_end_0 = const()[name = string("op_1402_end_0"), val = tensor([52, 8, 1024, 128])]; + tensor var_1402_end_mask_0 = const()[name = string("op_1402_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = coreml_update_state_29)[name = string("op_1402_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1402_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1411 = const()[name = string("op_1411"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1411, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1415 = const()[name = string("op_1415"), val = tensor([1, -1, 1024, 128])]; + tensor var_1416_cast_fp16 = reshape(shape = var_1415, x = x_153_cast_fp16)[name = string("op_1416_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1418 = const()[name = string("op_1418"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1418, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + bool var_1425_transpose_x_0 = const()[name = string("op_1425_transpose_x_0"), val = bool(false)]; + bool var_1425_transpose_y_0 = const()[name = string("op_1425_transpose_y_0"), val = bool(true)]; + tensor var_1425_cast_fp16 = matmul(transpose_x = var_1425_transpose_x_0, transpose_y = var_1425_transpose_y_0, x = rotated_21, y = var_1416_cast_fp16)[name = string("op_1425_cast_fp16")]; + fp16 var_1426_to_fp16 = const()[name = string("op_1426_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_11_cast_fp16 = mul(x = var_1425_cast_fp16, y = var_1426_to_fp16)[name = string("attn_weights_11_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1437_axes_0 = const()[name = string("op_1437_axes_0"), val = tensor([-1])]; + bool var_1437_keep_dims_0 = const()[name = string("op_1437_keep_dims_0"), val = bool(true)]; + tensor var_1437_cast_fp16 = reduce_sum(axes = var_1437_axes_0, keep_dims = var_1437_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1437_cast_fp16")]; + tensor var_1438_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1437_cast_fp16)[name = string("op_1438_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([24, 64, 1024])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1438_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([24, 1024, 128])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 24, 64, 128])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_1441_perm_0 = const()[name = string("op_1441_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1443 = const()[name = string("op_1443"), val = tensor([1, 64, 3072])]; + tensor var_1441_cast_fp16 = transpose(perm = var_1441_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_23")]; + tensor input_75_cast_fp16 = reshape(shape = var_1443, x = var_1441_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1454_axes_0 = const()[name = string("op_1454_axes_0"), val = tensor([-1])]; + tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1454_cast_fp16 = layer_norm(axes = var_1454_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1454_cast_fp16")]; + tensor var_1461 = const()[name = string("op_1461"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1462 = transpose(perm = var_1461, x = var_1454_cast_fp16)[name = string("transpose_22")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1462)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1484_axes_0 = const()[name = string("op_1484_axes_0"), val = tensor([2])]; + tensor var_1484 = squeeze(axes = var_1484_axes_0, x = hidden_states_47)[name = string("op_1484")]; + tensor var_1485 = const()[name = string("op_1485"), val = tensor([0, 2, 1])]; + tensor var_1486 = transpose(perm = var_1485, x = var_1484)[name = string("transpose_21")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1486)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1494_axes_0 = const()[name = string("op_1494_axes_0"), val = tensor([-1])]; + tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1494_cast_fp16 = layer_norm(axes = var_1494_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1494_cast_fp16")]; + tensor var_1498 = const()[name = string("op_1498"), val = tensor([0, 2, 1])]; + tensor var_1500_axes_0 = const()[name = string("op_1500_axes_0"), val = tensor([2])]; + tensor var_1499 = transpose(perm = var_1498, x = var_1494_cast_fp16)[name = string("transpose_20")]; + tensor var_1500 = expand_dims(axes = var_1500_axes_0, x = var_1499)[name = string("op_1500")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_1500)[name = string("query_states_25")]; + string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; + tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; + tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; + int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; + tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_1500)[name = string("key_states_37")]; + string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; + tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; + tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; + int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; + tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_1500)[name = string("value_states_37")]; + tensor var_1520 = const()[name = string("op_1520"), val = tensor([1, 24, 128, 64])]; + tensor var_1521 = reshape(shape = var_1520, x = query_states_25)[name = string("op_1521")]; + tensor var_1522 = const()[name = string("op_1522"), val = tensor([0, 1, 3, 2])]; + tensor var_1524 = const()[name = string("op_1524"), val = tensor([1, 8, 128, 64])]; + tensor var_1525 = reshape(shape = var_1524, x = key_states_37)[name = string("op_1525")]; + tensor var_1526 = const()[name = string("op_1526"), val = tensor([0, 1, 3, 2])]; + tensor var_1528 = const()[name = string("op_1528"), val = tensor([1, 8, 128, 64])]; + tensor var_1529 = reshape(shape = var_1528, x = value_states_37)[name = string("op_1529")]; + tensor var_1530 = const()[name = string("op_1530"), val = tensor([0, 1, 3, 2])]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_169 = transpose(perm = var_1522, x = var_1521)[name = string("transpose_19")]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; + tensor var_1548 = mul(x = x1_25, y = cos_7)[name = string("op_1548")]; + tensor var_1549 = mul(x = x2_25, y = sin_7)[name = string("op_1549")]; + tensor var_1550 = sub(x = var_1548, y = var_1549)[name = string("op_1550")]; + tensor var_1551 = mul(x = x2_25, y = cos_7)[name = string("op_1551")]; + tensor var_1552 = mul(x = x1_25, y = sin_7)[name = string("op_1552")]; + tensor var_1553 = add(x = var_1551, y = var_1552)[name = string("op_1553")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25 = concat(axis = var_54, interleave = rotated_25_interleave_0, values = (var_1550, var_1553))[name = string("rotated_25")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_173 = transpose(perm = var_1526, x = var_1525)[name = string("transpose_18")]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; + tensor var_1569 = mul(x = x1_27, y = cos_7)[name = string("op_1569")]; + tensor var_1570 = mul(x = x2_27, y = sin_7)[name = string("op_1570")]; + tensor var_1571 = sub(x = var_1569, y = var_1570)[name = string("op_1571")]; + tensor var_1572 = mul(x = x2_27, y = cos_7)[name = string("op_1572")]; + tensor var_1573 = mul(x = x1_27, y = sin_7)[name = string("op_1573")]; + tensor var_1574 = add(x = var_1572, y = var_1573)[name = string("op_1574")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27 = concat(axis = var_54, interleave = rotated_27_interleave_0, values = (var_1571, var_1574))[name = string("rotated_27")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([24])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([25])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_401, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([52])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([53])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_401, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_39 = transpose(perm = var_1530, x = var_1529)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; + tensor var_1597_begin_0 = const()[name = string("op_1597_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_1597_end_0 = const()[name = string("op_1597_end_0"), val = tensor([25, 8, 1024, 128])]; + tensor var_1597_end_mask_0 = const()[name = string("op_1597_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1597_cast_fp16 = slice_by_index(begin = var_1597_begin_0, end = var_1597_end_0, end_mask = var_1597_end_mask_0, x = coreml_update_state_31)[name = string("op_1597_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1597_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1599_begin_0 = const()[name = string("op_1599_begin_0"), val = tensor([52, 0, 0, 0])]; + tensor var_1599_end_0 = const()[name = string("op_1599_end_0"), val = tensor([53, 8, 1024, 128])]; + tensor var_1599_end_mask_0 = const()[name = string("op_1599_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1599_cast_fp16 = slice_by_index(begin = var_1599_begin_0, end = var_1599_end_0, end_mask = var_1599_end_mask_0, x = coreml_update_state_31)[name = string("op_1599_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1599_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1608 = const()[name = string("op_1608"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1608, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1612 = const()[name = string("op_1612"), val = tensor([1, -1, 1024, 128])]; + tensor var_1613_cast_fp16 = reshape(shape = var_1612, x = x_181_cast_fp16)[name = string("op_1613_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1615 = const()[name = string("op_1615"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1615, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_1622_transpose_x_0 = const()[name = string("op_1622_transpose_x_0"), val = bool(false)]; + bool var_1622_transpose_y_0 = const()[name = string("op_1622_transpose_y_0"), val = bool(true)]; + tensor var_1622_cast_fp16 = matmul(transpose_x = var_1622_transpose_x_0, transpose_y = var_1622_transpose_y_0, x = rotated_25, y = var_1613_cast_fp16)[name = string("op_1622_cast_fp16")]; + fp16 var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1634_axes_0 = const()[name = string("op_1634_axes_0"), val = tensor([-1])]; + bool var_1634_keep_dims_0 = const()[name = string("op_1634_keep_dims_0"), val = bool(true)]; + tensor var_1634_cast_fp16 = reduce_sum(axes = var_1634_axes_0, keep_dims = var_1634_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1634_cast_fp16")]; + tensor var_1635_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1634_cast_fp16)[name = string("op_1635_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([24, 64, 1024])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1635_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([24, 1024, 128])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 24, 64, 128])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_1638_perm_0 = const()[name = string("op_1638_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1640 = const()[name = string("op_1640"), val = tensor([1, 64, 3072])]; + tensor var_1638_cast_fp16 = transpose(perm = var_1638_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_16")]; + tensor input_89_cast_fp16 = reshape(shape = var_1640, x = var_1638_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1651_axes_0 = const()[name = string("op_1651_axes_0"), val = tensor([-1])]; + tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1651_cast_fp16 = layer_norm(axes = var_1651_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1651_cast_fp16")]; + tensor var_1658 = const()[name = string("op_1658"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1659 = transpose(perm = var_1658, x = var_1651_cast_fp16)[name = string("transpose_15")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1659)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1681_axes_0 = const()[name = string("op_1681_axes_0"), val = tensor([2])]; + tensor var_1681 = squeeze(axes = var_1681_axes_0, x = hidden_states_55)[name = string("op_1681")]; + tensor var_1682 = const()[name = string("op_1682"), val = tensor([0, 2, 1])]; + tensor var_1683 = transpose(perm = var_1682, x = var_1681)[name = string("transpose_14")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1683)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1691_axes_0 = const()[name = string("op_1691_axes_0"), val = tensor([-1])]; + tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1691_cast_fp16 = layer_norm(axes = var_1691_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1691_cast_fp16")]; + tensor var_1695 = const()[name = string("op_1695"), val = tensor([0, 2, 1])]; + tensor var_1697_axes_0 = const()[name = string("op_1697_axes_0"), val = tensor([2])]; + tensor var_1696 = transpose(perm = var_1695, x = var_1691_cast_fp16)[name = string("transpose_13")]; + tensor var_1697 = expand_dims(axes = var_1697_axes_0, x = var_1696)[name = string("op_1697")]; + string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; + tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; + tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; + int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; + tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_1697)[name = string("query_states_29")]; + string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; + tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; + tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; + int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; + tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_1697)[name = string("key_states_43")]; + string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; + tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; + tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; + int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; + tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_1697)[name = string("value_states_43")]; + tensor var_1717 = const()[name = string("op_1717"), val = tensor([1, 24, 128, 64])]; + tensor var_1718 = reshape(shape = var_1717, x = query_states_29)[name = string("op_1718")]; + tensor var_1719 = const()[name = string("op_1719"), val = tensor([0, 1, 3, 2])]; + tensor var_1721 = const()[name = string("op_1721"), val = tensor([1, 8, 128, 64])]; + tensor var_1722 = reshape(shape = var_1721, x = key_states_43)[name = string("op_1722")]; + tensor var_1723 = const()[name = string("op_1723"), val = tensor([0, 1, 3, 2])]; + tensor var_1725 = const()[name = string("op_1725"), val = tensor([1, 8, 128, 64])]; + tensor var_1726 = reshape(shape = var_1725, x = value_states_43)[name = string("op_1726")]; + tensor var_1727 = const()[name = string("op_1727"), val = tensor([0, 1, 3, 2])]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_197 = transpose(perm = var_1719, x = var_1718)[name = string("transpose_12")]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; + tensor var_1745 = mul(x = x1_29, y = cos_7)[name = string("op_1745")]; + tensor var_1746 = mul(x = x2_29, y = sin_7)[name = string("op_1746")]; + tensor var_1747 = sub(x = var_1745, y = var_1746)[name = string("op_1747")]; + tensor var_1748 = mul(x = x2_29, y = cos_7)[name = string("op_1748")]; + tensor var_1749 = mul(x = x1_29, y = sin_7)[name = string("op_1749")]; + tensor var_1750 = add(x = var_1748, y = var_1749)[name = string("op_1750")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29 = concat(axis = var_54, interleave = rotated_29_interleave_0, values = (var_1747, var_1750))[name = string("rotated_29")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_201 = transpose(perm = var_1723, x = var_1722)[name = string("transpose_11")]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = x_201)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = x_201)[name = string("x2_31")]; + tensor var_1766 = mul(x = x1_31, y = cos_7)[name = string("op_1766")]; + tensor var_1767 = mul(x = x2_31, y = sin_7)[name = string("op_1767")]; + tensor var_1768 = sub(x = var_1766, y = var_1767)[name = string("op_1768")]; + tensor var_1769 = mul(x = x2_31, y = cos_7)[name = string("op_1769")]; + tensor var_1770 = mul(x = x1_31, y = sin_7)[name = string("op_1770")]; + tensor var_1771 = add(x = var_1769, y = var_1770)[name = string("op_1771")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31 = concat(axis = var_54, interleave = rotated_31_interleave_0, values = (var_1768, var_1771))[name = string("rotated_31")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([25])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([26])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_401, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([53])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([54])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_401, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_45 = transpose(perm = var_1727, x = var_1726)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; + tensor var_1794_begin_0 = const()[name = string("op_1794_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_1794_end_0 = const()[name = string("op_1794_end_0"), val = tensor([26, 8, 1024, 128])]; + tensor var_1794_end_mask_0 = const()[name = string("op_1794_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1794_cast_fp16 = slice_by_index(begin = var_1794_begin_0, end = var_1794_end_0, end_mask = var_1794_end_mask_0, x = coreml_update_state_33)[name = string("op_1794_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1794_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1796_begin_0 = const()[name = string("op_1796_begin_0"), val = tensor([53, 0, 0, 0])]; + tensor var_1796_end_0 = const()[name = string("op_1796_end_0"), val = tensor([54, 8, 1024, 128])]; + tensor var_1796_end_mask_0 = const()[name = string("op_1796_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1796_cast_fp16 = slice_by_index(begin = var_1796_begin_0, end = var_1796_end_0, end_mask = var_1796_end_mask_0, x = coreml_update_state_33)[name = string("op_1796_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1796_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1805 = const()[name = string("op_1805"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1805, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1809 = const()[name = string("op_1809"), val = tensor([1, -1, 1024, 128])]; + tensor var_1810_cast_fp16 = reshape(shape = var_1809, x = x_209_cast_fp16)[name = string("op_1810_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1812 = const()[name = string("op_1812"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1812, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + bool var_1819_transpose_x_0 = const()[name = string("op_1819_transpose_x_0"), val = bool(false)]; + bool var_1819_transpose_y_0 = const()[name = string("op_1819_transpose_y_0"), val = bool(true)]; + tensor var_1819_cast_fp16 = matmul(transpose_x = var_1819_transpose_x_0, transpose_y = var_1819_transpose_y_0, x = rotated_29, y = var_1810_cast_fp16)[name = string("op_1819_cast_fp16")]; + fp16 var_1820_to_fp16 = const()[name = string("op_1820_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_15_cast_fp16 = mul(x = var_1819_cast_fp16, y = var_1820_to_fp16)[name = string("attn_weights_15_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_15_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1831_axes_0 = const()[name = string("op_1831_axes_0"), val = tensor([-1])]; + bool var_1831_keep_dims_0 = const()[name = string("op_1831_keep_dims_0"), val = bool(true)]; + tensor var_1831_cast_fp16 = reduce_sum(axes = var_1831_axes_0, keep_dims = var_1831_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1831_cast_fp16")]; + tensor var_1832_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1831_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([24, 64, 1024])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1832_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([24, 1024, 128])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 24, 64, 128])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_1835_perm_0 = const()[name = string("op_1835_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1837 = const()[name = string("op_1837"), val = tensor([1, 64, 3072])]; + tensor var_1835_cast_fp16 = transpose(perm = var_1835_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_9")]; + tensor input_103_cast_fp16 = reshape(shape = var_1837, x = var_1835_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1848_axes_0 = const()[name = string("op_1848_axes_0"), val = tensor([-1])]; + tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1848_cast_fp16 = layer_norm(axes = var_1848_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1848_cast_fp16")]; + tensor var_1855 = const()[name = string("op_1855"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1856 = transpose(perm = var_1855, x = var_1848_cast_fp16)[name = string("transpose_8")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1856)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1878_axes_0 = const()[name = string("op_1878_axes_0"), val = tensor([2])]; + tensor var_1878 = squeeze(axes = var_1878_axes_0, x = hidden_states_63)[name = string("op_1878")]; + tensor var_1879 = const()[name = string("op_1879"), val = tensor([0, 2, 1])]; + tensor var_1880 = transpose(perm = var_1879, x = var_1878)[name = string("transpose_7")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1880)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1888_axes_0 = const()[name = string("op_1888_axes_0"), val = tensor([-1])]; + tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1888_cast_fp16 = layer_norm(axes = var_1888_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1888_cast_fp16")]; + tensor var_1892 = const()[name = string("op_1892"), val = tensor([0, 2, 1])]; + tensor var_1894_axes_0 = const()[name = string("op_1894_axes_0"), val = tensor([2])]; + tensor var_1893 = transpose(perm = var_1892, x = var_1888_cast_fp16)[name = string("transpose_6")]; + tensor var_1894 = expand_dims(axes = var_1894_axes_0, x = var_1893)[name = string("op_1894")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_1894)[name = string("query_states_33")]; + string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; + tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; + tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; + int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; + tensor key_states_49 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_1894)[name = string("key_states_49")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_1894)[name = string("value_states_49")]; + tensor var_1914 = const()[name = string("op_1914"), val = tensor([1, 24, 128, 64])]; + tensor var_1915 = reshape(shape = var_1914, x = query_states_33)[name = string("op_1915")]; + tensor var_1916 = const()[name = string("op_1916"), val = tensor([0, 1, 3, 2])]; + tensor var_1918 = const()[name = string("op_1918"), val = tensor([1, 8, 128, 64])]; + tensor var_1919 = reshape(shape = var_1918, x = key_states_49)[name = string("op_1919")]; + tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 1, 3, 2])]; + tensor var_1922 = const()[name = string("op_1922"), val = tensor([1, 8, 128, 64])]; + tensor var_1923 = reshape(shape = var_1922, x = value_states_49)[name = string("op_1923")]; + tensor var_1924 = const()[name = string("op_1924"), val = tensor([0, 1, 3, 2])]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_225 = transpose(perm = var_1916, x = var_1915)[name = string("transpose_5")]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = x_225)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = x_225)[name = string("x2_33")]; + tensor var_1942 = mul(x = x1_33, y = cos_7)[name = string("op_1942")]; + tensor var_1943 = mul(x = x2_33, y = sin_7)[name = string("op_1943")]; + tensor var_1944 = sub(x = var_1942, y = var_1943)[name = string("op_1944")]; + tensor var_1945 = mul(x = x2_33, y = cos_7)[name = string("op_1945")]; + tensor var_1946 = mul(x = x1_33, y = sin_7)[name = string("op_1946")]; + tensor var_1947 = add(x = var_1945, y = var_1946)[name = string("op_1947")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33 = concat(axis = var_54, interleave = rotated_33_interleave_0, values = (var_1944, var_1947))[name = string("rotated_33")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_229 = transpose(perm = var_1920, x = var_1919)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_229)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_229)[name = string("x2")]; + tensor var_1963 = mul(x = x1, y = cos_7)[name = string("op_1963")]; + tensor var_1964 = mul(x = x2, y = sin_7)[name = string("op_1964")]; + tensor var_1965 = sub(x = var_1963, y = var_1964)[name = string("op_1965")]; + tensor var_1966 = mul(x = x2, y = cos_7)[name = string("op_1966")]; + tensor var_1967 = mul(x = x1, y = sin_7)[name = string("op_1967")]; + tensor var_1968 = add(x = var_1966, y = var_1967)[name = string("op_1968")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_54, interleave = rotated_interleave_0, values = (var_1965, var_1968))[name = string("rotated")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([26])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([27])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_401, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([54])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([55])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_401, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_51 = transpose(perm = var_1924, x = var_1923)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_51, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; + tensor var_1991_begin_0 = const()[name = string("op_1991_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_1991_end_0 = const()[name = string("op_1991_end_0"), val = tensor([27, 8, 1024, 128])]; + tensor var_1991_end_mask_0 = const()[name = string("op_1991_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1991_cast_fp16 = slice_by_index(begin = var_1991_begin_0, end = var_1991_end_0, end_mask = var_1991_end_mask_0, x = coreml_update_state_35)[name = string("op_1991_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1991_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1993_begin_0 = const()[name = string("op_1993_begin_0"), val = tensor([54, 0, 0, 0])]; + tensor var_1993_end_0 = const()[name = string("op_1993_end_0"), val = tensor([55, 8, 1024, 128])]; + tensor var_1993_end_mask_0 = const()[name = string("op_1993_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1993_cast_fp16 = slice_by_index(begin = var_1993_begin_0, end = var_1993_end_0, end_mask = var_1993_end_mask_0, x = coreml_update_state_35)[name = string("op_1993_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1993_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_2002 = const()[name = string("op_2002"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_2002, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_2006 = const()[name = string("op_2006"), val = tensor([1, -1, 1024, 128])]; + tensor var_2007_cast_fp16 = reshape(shape = var_2006, x = x_237_cast_fp16)[name = string("op_2007_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2009 = const()[name = string("op_2009"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_2009, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + bool var_2016_transpose_x_0 = const()[name = string("op_2016_transpose_x_0"), val = bool(false)]; + bool var_2016_transpose_y_0 = const()[name = string("op_2016_transpose_y_0"), val = bool(true)]; + tensor var_2016_cast_fp16 = matmul(transpose_x = var_2016_transpose_x_0, transpose_y = var_2016_transpose_y_0, x = rotated_33, y = var_2007_cast_fp16)[name = string("op_2016_cast_fp16")]; + fp16 var_2017_to_fp16 = const()[name = string("op_2017_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_cast_fp16 = mul(x = var_2016_cast_fp16, y = var_2017_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_2028_axes_0 = const()[name = string("op_2028_axes_0"), val = tensor([-1])]; + bool var_2028_keep_dims_0 = const()[name = string("op_2028_keep_dims_0"), val = bool(true)]; + tensor var_2028_cast_fp16 = reduce_sum(axes = var_2028_axes_0, keep_dims = var_2028_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_2028_cast_fp16")]; + tensor var_2029_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_2028_cast_fp16)[name = string("op_2029_cast_fp16")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([24, 64, 1024])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_2029_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([24, 1024, 128])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_243_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 24, 64, 128])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_2032_perm_0 = const()[name = string("op_2032_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2034 = const()[name = string("op_2034"), val = tensor([1, 64, 3072])]; + tensor var_2032_cast_fp16 = transpose(perm = var_2032_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_2034, x = var_2032_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_2045_axes_0 = const()[name = string("op_2045_axes_0"), val = tensor([-1])]; + tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_2045_cast_fp16 = layer_norm(axes = var_2045_axes_0, epsilon = var_56_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_2045_cast_fp16")]; + tensor var_2052 = const()[name = string("op_2052"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_2053 = transpose(perm = var_2052, x = var_2045_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_2053)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_2075_axes_0 = const()[name = string("op_2075_axes_0"), val = tensor([2])]; + tensor var_2075 = squeeze(axes = var_2075_axes_0, x = hidden_states_1)[name = string("op_2075")]; + tensor var_2076 = const()[name = string("op_2076"), val = tensor([0, 2, 1])]; + tensor var_2077 = transpose(perm = var_2076, x = var_2075)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_2077)[name = string("op_2078_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file