diff --git "a/Hermes_FFN_PF_lut6_chunk_01of03.mlmodelc/model.mil" "b/Hermes_FFN_PF_lut6_chunk_01of03.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Hermes_FFN_PF_lut6_chunk_01of03.mlmodelc/model.mil" @@ -0,0 +1,3697 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; + int32 var_54 = const()[name = string("op_54"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + int32 var_288_batch_dims_0 = const()[name = string("op_288_batch_dims_0"), val = int32(0)]; + bool var_288_validate_indices_0 = const()[name = string("op_288_validate_indices_0"), val = bool(false)]; + tensor var_59_to_fp16 = const()[name = string("op_59_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_288_cast_fp16 = gather(axis = var_288_axis_0, batch_dims = var_288_batch_dims_0, indices = select_0, validate_indices = var_288_validate_indices_0, x = var_59_to_fp16)[name = string("op_288_cast_fp16")]; + tensor var_289 = const()[name = string("op_289"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_289, x = var_288_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_293_axis_0 = const()[name = string("op_293_axis_0"), val = int32(1)]; + int32 var_293_batch_dims_0 = const()[name = string("op_293_batch_dims_0"), val = int32(0)]; + bool var_293_validate_indices_0 = const()[name = string("op_293_validate_indices_0"), val = bool(false)]; + tensor var_53_to_fp16 = const()[name = string("op_53_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_293_cast_fp16 = gather(axis = var_293_axis_0, batch_dims = var_293_batch_dims_0, indices = select_0, validate_indices = var_293_validate_indices_0, x = var_53_to_fp16)[name = string("op_293_cast_fp16")]; + tensor var_294 = const()[name = string("op_294"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_294, x = var_293_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_302_axes_0 = const()[name = string("op_302_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_49_to_fp16 = const()[name = string("op_49_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_302_cast_fp16 = layer_norm(axes = var_302_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_302_cast_fp16")]; + tensor var_305 = const()[name = string("op_305"), val = tensor([0, 2, 1])]; + tensor var_307_axes_0 = const()[name = string("op_307_axes_0"), val = tensor([2])]; + tensor var_306 = transpose(perm = var_305, x = var_302_cast_fp16)[name = string("transpose_35")]; + tensor var_307 = expand_dims(axes = var_307_axes_0, x = var_306)[name = string("op_307")]; + string var_314_pad_type_0 = const()[name = string("op_314_pad_type_0"), val = string("valid")]; + tensor var_314_strides_0 = const()[name = string("op_314_strides_0"), val = tensor([1, 1])]; + tensor var_314_pad_0 = const()[name = string("op_314_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_314_dilations_0 = const()[name = string("op_314_dilations_0"), val = tensor([1, 1])]; + int32 var_314_groups_0 = const()[name = string("op_314_groups_0"), val = int32(1)]; + tensor var_314 = conv(dilations = var_314_dilations_0, groups = var_314_groups_0, pad = var_314_pad_0, pad_type = var_314_pad_type_0, strides = var_314_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_307)[name = string("op_314")]; + tensor var_315 = const()[name = string("op_315"), val = tensor([1, 24, 1, 128])]; + tensor var_316 = reshape(shape = var_315, x = var_314)[name = string("op_316")]; + string var_323_pad_type_0 = const()[name = string("op_323_pad_type_0"), val = string("valid")]; + tensor var_323_strides_0 = const()[name = string("op_323_strides_0"), val = tensor([1, 1])]; + tensor var_323_pad_0 = const()[name = string("op_323_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_323_dilations_0 = const()[name = string("op_323_dilations_0"), val = tensor([1, 1])]; + int32 var_323_groups_0 = const()[name = string("op_323_groups_0"), val = int32(1)]; + tensor var_323 = conv(dilations = var_323_dilations_0, groups = var_323_groups_0, pad = var_323_pad_0, pad_type = var_323_pad_type_0, strides = var_323_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_307)[name = string("op_323")]; + tensor var_324 = const()[name = string("op_324"), val = tensor([1, 8, 1, 128])]; + tensor var_325 = reshape(shape = var_324, x = var_323)[name = string("op_325")]; + string var_332_pad_type_0 = const()[name = string("op_332_pad_type_0"), val = string("valid")]; + tensor var_332_strides_0 = const()[name = string("op_332_strides_0"), val = tensor([1, 1])]; + tensor var_332_pad_0 = const()[name = string("op_332_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_332_dilations_0 = const()[name = string("op_332_dilations_0"), val = tensor([1, 1])]; + int32 var_332_groups_0 = const()[name = string("op_332_groups_0"), val = int32(1)]; + tensor var_332 = conv(dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_307)[name = string("op_332")]; + tensor var_333 = const()[name = string("op_333"), val = tensor([1, 8, 1, 128])]; + tensor var_334 = reshape(shape = var_333, x = var_332)[name = string("op_334")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_316)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_316)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_349_cast_fp16")]; + tensor var_350_cast_fp16 = sub(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = string("op_350_cast_fp16")]; + tensor var_351_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_351_cast_fp16")]; + tensor var_352_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_352_cast_fp16")]; + tensor var_353_cast_fp16 = add(x = var_351_cast_fp16, y = var_352_cast_fp16)[name = string("op_353_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_54, interleave = rotated_1_interleave_0, values = (var_350_cast_fp16, var_353_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_325)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_325)[name = string("x2_3")]; + tensor var_369_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_369_cast_fp16")]; + tensor var_370_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor var_371_cast_fp16 = sub(x = var_369_cast_fp16, y = var_370_cast_fp16)[name = string("op_371_cast_fp16")]; + tensor var_372_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_372_cast_fp16")]; + tensor var_373_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_373_cast_fp16")]; + tensor var_374_cast_fp16 = add(x = var_372_cast_fp16, y = var_373_cast_fp16)[name = string("op_374_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_54, interleave = rotated_3_interleave_0, values = (var_371_cast_fp16, var_374_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_378 = const()[name = string("op_378"), val = int32(1)]; + tensor var_379 = add(x = current_pos, y = var_378)[name = string("op_379")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_379, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_379, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_334, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_394_begin_0 = const()[name = string("op_394_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_394_end_0 = const()[name = string("op_394_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_394_end_mask_0 = const()[name = string("op_394_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_394_cast_fp16 = slice_by_index(begin = var_394_begin_0, end = var_394_end_0, end_mask = var_394_end_mask_0, x = coreml_update_state_19)[name = string("op_394_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_394_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_396_begin_0 = const()[name = string("op_396_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_396_end_0 = const()[name = string("op_396_end_0"), val = tensor([29, 8, 1024, 128])]; + tensor var_396_end_mask_0 = const()[name = string("op_396_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = coreml_update_state_19)[name = string("op_396_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_396_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_405 = const()[name = string("op_405"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_405, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_409 = const()[name = string("op_409"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_409, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_412 = const()[name = string("op_412"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_412, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_416 = const()[name = string("op_416"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_416, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_419_transpose_x_1 = const()[name = string("op_419_transpose_x_1"), val = bool(false)]; + bool var_419_transpose_y_1 = const()[name = string("op_419_transpose_y_1"), val = bool(true)]; + tensor var_419_cast_fp16 = matmul(transpose_x = var_419_transpose_x_1, transpose_y = var_419_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_419_cast_fp16")]; + fp16 var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_419_cast_fp16, y = var_420_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_431_axes_0 = const()[name = string("op_431_axes_0"), val = tensor([-1])]; + bool var_431_keep_dims_0 = const()[name = string("op_431_keep_dims_0"), val = bool(true)]; + tensor var_431_cast_fp16 = reduce_sum(axes = var_431_axes_0, keep_dims = var_431_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_431_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_431_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_434_perm_0 = const()[name = string("op_434_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_436 = const()[name = string("op_436"), val = tensor([1, 1, 3072])]; + tensor var_434_cast_fp16 = transpose(perm = var_434_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_34")]; + tensor input_5_cast_fp16 = reshape(shape = var_436, x = var_434_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_447_axes_0 = const()[name = string("op_447_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_447_cast_fp16 = layer_norm(axes = var_447_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor var_454 = const()[name = string("op_454"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_455 = transpose(perm = var_454, x = var_447_cast_fp16)[name = string("transpose_33")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_455)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_477_axes_0 = const()[name = string("op_477_axes_0"), val = tensor([2])]; + tensor var_477 = squeeze(axes = var_477_axes_0, x = hidden_states_7)[name = string("op_477")]; + tensor var_478 = const()[name = string("op_478"), val = tensor([0, 2, 1])]; + tensor var_479 = transpose(perm = var_478, x = var_477)[name = string("transpose_32")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_479)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_487_axes_0 = const()[name = string("op_487_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_487_cast_fp16 = layer_norm(axes = var_487_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_487_cast_fp16")]; + tensor var_490 = const()[name = string("op_490"), val = tensor([0, 2, 1])]; + tensor var_492_axes_0 = const()[name = string("op_492_axes_0"), val = tensor([2])]; + tensor var_491 = transpose(perm = var_490, x = var_487_cast_fp16)[name = string("transpose_31")]; + tensor var_492 = expand_dims(axes = var_492_axes_0, x = var_491)[name = string("op_492")]; + string var_499_pad_type_0 = const()[name = string("op_499_pad_type_0"), val = string("valid")]; + tensor var_499_strides_0 = const()[name = string("op_499_strides_0"), val = tensor([1, 1])]; + tensor var_499_pad_0 = const()[name = string("op_499_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_499_dilations_0 = const()[name = string("op_499_dilations_0"), val = tensor([1, 1])]; + int32 var_499_groups_0 = const()[name = string("op_499_groups_0"), val = int32(1)]; + tensor var_499 = conv(dilations = var_499_dilations_0, groups = var_499_groups_0, pad = var_499_pad_0, pad_type = var_499_pad_type_0, strides = var_499_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_492)[name = string("op_499")]; + tensor var_500 = const()[name = string("op_500"), val = tensor([1, 24, 1, 128])]; + tensor var_501 = reshape(shape = var_500, x = var_499)[name = string("op_501")]; + string var_508_pad_type_0 = const()[name = string("op_508_pad_type_0"), val = string("valid")]; + tensor var_508_strides_0 = const()[name = string("op_508_strides_0"), val = tensor([1, 1])]; + tensor var_508_pad_0 = const()[name = string("op_508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_508_dilations_0 = const()[name = string("op_508_dilations_0"), val = tensor([1, 1])]; + int32 var_508_groups_0 = const()[name = string("op_508_groups_0"), val = int32(1)]; + tensor var_508 = conv(dilations = var_508_dilations_0, groups = var_508_groups_0, pad = var_508_pad_0, pad_type = var_508_pad_type_0, strides = var_508_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_492)[name = string("op_508")]; + tensor var_509 = const()[name = string("op_509"), val = tensor([1, 8, 1, 128])]; + tensor var_510 = reshape(shape = var_509, x = var_508)[name = string("op_510")]; + string var_517_pad_type_0 = const()[name = string("op_517_pad_type_0"), val = string("valid")]; + tensor var_517_strides_0 = const()[name = string("op_517_strides_0"), val = tensor([1, 1])]; + tensor var_517_pad_0 = const()[name = string("op_517_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_517_dilations_0 = const()[name = string("op_517_dilations_0"), val = tensor([1, 1])]; + int32 var_517_groups_0 = const()[name = string("op_517_groups_0"), val = int32(1)]; + tensor var_517 = conv(dilations = var_517_dilations_0, groups = var_517_groups_0, pad = var_517_pad_0, pad_type = var_517_pad_type_0, strides = var_517_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_492)[name = string("op_517")]; + tensor var_518 = const()[name = string("op_518"), val = tensor([1, 8, 1, 128])]; + tensor var_519 = reshape(shape = var_518, x = var_517)[name = string("op_519")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_501)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_501)[name = string("x2_5")]; + tensor var_533_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_533_cast_fp16")]; + tensor var_534_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_534_cast_fp16")]; + tensor var_535_cast_fp16 = sub(x = var_533_cast_fp16, y = var_534_cast_fp16)[name = string("op_535_cast_fp16")]; + tensor var_536_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_536_cast_fp16")]; + tensor var_537_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_537_cast_fp16")]; + tensor var_538_cast_fp16 = add(x = var_536_cast_fp16, y = var_537_cast_fp16)[name = string("op_538_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_54, interleave = rotated_5_interleave_0, values = (var_535_cast_fp16, var_538_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_510)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_510)[name = string("x2_7")]; + tensor var_554_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_554_cast_fp16")]; + tensor var_555_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_555_cast_fp16")]; + tensor var_556_cast_fp16 = sub(x = var_554_cast_fp16, y = var_555_cast_fp16)[name = string("op_556_cast_fp16")]; + tensor var_557_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_557_cast_fp16")]; + tensor var_558_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_558_cast_fp16")]; + tensor var_559_cast_fp16 = add(x = var_557_cast_fp16, y = var_558_cast_fp16)[name = string("op_559_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_54, interleave = rotated_7_interleave_0, values = (var_556_cast_fp16, var_559_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_379, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_379, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_519, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_579_begin_0 = const()[name = string("op_579_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_579_end_0 = const()[name = string("op_579_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_579_end_mask_0 = const()[name = string("op_579_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_579_cast_fp16 = slice_by_index(begin = var_579_begin_0, end = var_579_end_0, end_mask = var_579_end_mask_0, x = coreml_update_state_21)[name = string("op_579_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_579_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_581_begin_0 = const()[name = string("op_581_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_581_end_0 = const()[name = string("op_581_end_0"), val = tensor([30, 8, 1024, 128])]; + tensor var_581_end_mask_0 = const()[name = string("op_581_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, x = coreml_update_state_21)[name = string("op_581_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_581_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_590 = const()[name = string("op_590"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_590, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_594 = const()[name = string("op_594"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_594, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_597 = const()[name = string("op_597"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_597, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_601 = const()[name = string("op_601"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_601, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_604_transpose_x_1 = const()[name = string("op_604_transpose_x_1"), val = bool(false)]; + bool var_604_transpose_y_1 = const()[name = string("op_604_transpose_y_1"), val = bool(true)]; + tensor var_604_cast_fp16 = matmul(transpose_x = var_604_transpose_x_1, transpose_y = var_604_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_604_cast_fp16")]; + fp16 var_605_to_fp16 = const()[name = string("op_605_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_604_cast_fp16, y = var_605_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_616_axes_0 = const()[name = string("op_616_axes_0"), val = tensor([-1])]; + bool var_616_keep_dims_0 = const()[name = string("op_616_keep_dims_0"), val = bool(true)]; + tensor var_616_cast_fp16 = reduce_sum(axes = var_616_axes_0, keep_dims = var_616_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_616_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_616_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_619_perm_0 = const()[name = string("op_619_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_621 = const()[name = string("op_621"), val = tensor([1, 1, 3072])]; + tensor var_619_cast_fp16 = transpose(perm = var_619_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_30")]; + tensor input_19_cast_fp16 = reshape(shape = var_621, x = var_619_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_632_axes_0 = const()[name = string("op_632_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_632_cast_fp16 = layer_norm(axes = var_632_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_632_cast_fp16")]; + tensor var_639 = const()[name = string("op_639"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_640 = transpose(perm = var_639, x = var_632_cast_fp16)[name = string("transpose_29")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_640)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_662_axes_0 = const()[name = string("op_662_axes_0"), val = tensor([2])]; + tensor var_662 = squeeze(axes = var_662_axes_0, x = hidden_states_15)[name = string("op_662")]; + tensor var_663 = const()[name = string("op_663"), val = tensor([0, 2, 1])]; + tensor var_664 = transpose(perm = var_663, x = var_662)[name = string("transpose_28")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_664)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_672_axes_0 = const()[name = string("op_672_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_672_cast_fp16 = layer_norm(axes = var_672_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_672_cast_fp16")]; + tensor var_675 = const()[name = string("op_675"), val = tensor([0, 2, 1])]; + tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([2])]; + tensor var_676 = transpose(perm = var_675, x = var_672_cast_fp16)[name = string("transpose_27")]; + tensor var_677 = expand_dims(axes = var_677_axes_0, x = var_676)[name = string("op_677")]; + string var_684_pad_type_0 = const()[name = string("op_684_pad_type_0"), val = string("valid")]; + tensor var_684_strides_0 = const()[name = string("op_684_strides_0"), val = tensor([1, 1])]; + tensor var_684_pad_0 = const()[name = string("op_684_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_684_dilations_0 = const()[name = string("op_684_dilations_0"), val = tensor([1, 1])]; + int32 var_684_groups_0 = const()[name = string("op_684_groups_0"), val = int32(1)]; + tensor var_684 = conv(dilations = var_684_dilations_0, groups = var_684_groups_0, pad = var_684_pad_0, pad_type = var_684_pad_type_0, strides = var_684_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_677)[name = string("op_684")]; + tensor var_685 = const()[name = string("op_685"), val = tensor([1, 24, 1, 128])]; + tensor var_686 = reshape(shape = var_685, x = var_684)[name = string("op_686")]; + string var_693_pad_type_0 = const()[name = string("op_693_pad_type_0"), val = string("valid")]; + tensor var_693_strides_0 = const()[name = string("op_693_strides_0"), val = tensor([1, 1])]; + tensor var_693_pad_0 = const()[name = string("op_693_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_693_dilations_0 = const()[name = string("op_693_dilations_0"), val = tensor([1, 1])]; + int32 var_693_groups_0 = const()[name = string("op_693_groups_0"), val = int32(1)]; + tensor var_693 = conv(dilations = var_693_dilations_0, groups = var_693_groups_0, pad = var_693_pad_0, pad_type = var_693_pad_type_0, strides = var_693_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_677)[name = string("op_693")]; + tensor var_694 = const()[name = string("op_694"), val = tensor([1, 8, 1, 128])]; + tensor var_695 = reshape(shape = var_694, x = var_693)[name = string("op_695")]; + string var_702_pad_type_0 = const()[name = string("op_702_pad_type_0"), val = string("valid")]; + tensor var_702_strides_0 = const()[name = string("op_702_strides_0"), val = tensor([1, 1])]; + tensor var_702_pad_0 = const()[name = string("op_702_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_702_dilations_0 = const()[name = string("op_702_dilations_0"), val = tensor([1, 1])]; + int32 var_702_groups_0 = const()[name = string("op_702_groups_0"), val = int32(1)]; + tensor var_702 = conv(dilations = var_702_dilations_0, groups = var_702_groups_0, pad = var_702_pad_0, pad_type = var_702_pad_type_0, strides = var_702_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_677)[name = string("op_702")]; + tensor var_703 = const()[name = string("op_703"), val = tensor([1, 8, 1, 128])]; + tensor var_704 = reshape(shape = var_703, x = var_702)[name = string("op_704")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_686)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_686)[name = string("x2_9")]; + tensor var_718_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_718_cast_fp16")]; + tensor var_719_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_719_cast_fp16")]; + tensor var_720_cast_fp16 = sub(x = var_718_cast_fp16, y = var_719_cast_fp16)[name = string("op_720_cast_fp16")]; + tensor var_721_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_721_cast_fp16")]; + tensor var_722_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_722_cast_fp16")]; + tensor var_723_cast_fp16 = add(x = var_721_cast_fp16, y = var_722_cast_fp16)[name = string("op_723_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_54, interleave = rotated_9_interleave_0, values = (var_720_cast_fp16, var_723_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_695)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_695)[name = string("x2_11")]; + tensor var_739_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_739_cast_fp16")]; + tensor var_740_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_740_cast_fp16")]; + tensor var_741_cast_fp16 = sub(x = var_739_cast_fp16, y = var_740_cast_fp16)[name = string("op_741_cast_fp16")]; + tensor var_742_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_742_cast_fp16")]; + tensor var_743_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_743_cast_fp16")]; + tensor var_744_cast_fp16 = add(x = var_742_cast_fp16, y = var_743_cast_fp16)[name = string("op_744_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_54, interleave = rotated_11_interleave_0, values = (var_741_cast_fp16, var_744_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_379, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_379, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_704, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_764_begin_0 = const()[name = string("op_764_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_764_end_0 = const()[name = string("op_764_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_764_end_mask_0 = const()[name = string("op_764_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_764_cast_fp16 = slice_by_index(begin = var_764_begin_0, end = var_764_end_0, end_mask = var_764_end_mask_0, x = coreml_update_state_23)[name = string("op_764_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_764_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_766_begin_0 = const()[name = string("op_766_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_766_end_0 = const()[name = string("op_766_end_0"), val = tensor([31, 8, 1024, 128])]; + tensor var_766_end_mask_0 = const()[name = string("op_766_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_766_cast_fp16 = slice_by_index(begin = var_766_begin_0, end = var_766_end_0, end_mask = var_766_end_mask_0, x = coreml_update_state_23)[name = string("op_766_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_766_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_775 = const()[name = string("op_775"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_775, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_779 = const()[name = string("op_779"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_779, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_782 = const()[name = string("op_782"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_782, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_786 = const()[name = string("op_786"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_786, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_789_transpose_x_1 = const()[name = string("op_789_transpose_x_1"), val = bool(false)]; + bool var_789_transpose_y_1 = const()[name = string("op_789_transpose_y_1"), val = bool(true)]; + tensor var_789_cast_fp16 = matmul(transpose_x = var_789_transpose_x_1, transpose_y = var_789_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_789_cast_fp16")]; + fp16 var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_789_cast_fp16, y = var_790_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_801_axes_0 = const()[name = string("op_801_axes_0"), val = tensor([-1])]; + bool var_801_keep_dims_0 = const()[name = string("op_801_keep_dims_0"), val = bool(true)]; + tensor var_801_cast_fp16 = reduce_sum(axes = var_801_axes_0, keep_dims = var_801_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_801_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_801_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_804_perm_0 = const()[name = string("op_804_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_806 = const()[name = string("op_806"), val = tensor([1, 1, 3072])]; + tensor var_804_cast_fp16 = transpose(perm = var_804_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_26")]; + tensor input_33_cast_fp16 = reshape(shape = var_806, x = var_804_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_817_cast_fp16 = layer_norm(axes = var_817_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_817_cast_fp16")]; + tensor var_824 = const()[name = string("op_824"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_825 = transpose(perm = var_824, x = var_817_cast_fp16)[name = string("transpose_25")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_825)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_847_axes_0 = const()[name = string("op_847_axes_0"), val = tensor([2])]; + tensor var_847 = squeeze(axes = var_847_axes_0, x = hidden_states_23)[name = string("op_847")]; + tensor var_848 = const()[name = string("op_848"), val = tensor([0, 2, 1])]; + tensor var_849 = transpose(perm = var_848, x = var_847)[name = string("transpose_24")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_849)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_857_axes_0 = const()[name = string("op_857_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_857_cast_fp16 = layer_norm(axes = var_857_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_857_cast_fp16")]; + tensor var_860 = const()[name = string("op_860"), val = tensor([0, 2, 1])]; + tensor var_862_axes_0 = const()[name = string("op_862_axes_0"), val = tensor([2])]; + tensor var_861 = transpose(perm = var_860, x = var_857_cast_fp16)[name = string("transpose_23")]; + tensor var_862 = expand_dims(axes = var_862_axes_0, x = var_861)[name = string("op_862")]; + string var_869_pad_type_0 = const()[name = string("op_869_pad_type_0"), val = string("valid")]; + tensor var_869_strides_0 = const()[name = string("op_869_strides_0"), val = tensor([1, 1])]; + tensor var_869_pad_0 = const()[name = string("op_869_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_869_dilations_0 = const()[name = string("op_869_dilations_0"), val = tensor([1, 1])]; + int32 var_869_groups_0 = const()[name = string("op_869_groups_0"), val = int32(1)]; + tensor var_869 = conv(dilations = var_869_dilations_0, groups = var_869_groups_0, pad = var_869_pad_0, pad_type = var_869_pad_type_0, strides = var_869_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_862)[name = string("op_869")]; + tensor var_870 = const()[name = string("op_870"), val = tensor([1, 24, 1, 128])]; + tensor var_871 = reshape(shape = var_870, x = var_869)[name = string("op_871")]; + string var_878_pad_type_0 = const()[name = string("op_878_pad_type_0"), val = string("valid")]; + tensor var_878_strides_0 = const()[name = string("op_878_strides_0"), val = tensor([1, 1])]; + tensor var_878_pad_0 = const()[name = string("op_878_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_878_dilations_0 = const()[name = string("op_878_dilations_0"), val = tensor([1, 1])]; + int32 var_878_groups_0 = const()[name = string("op_878_groups_0"), val = int32(1)]; + tensor var_878 = conv(dilations = var_878_dilations_0, groups = var_878_groups_0, pad = var_878_pad_0, pad_type = var_878_pad_type_0, strides = var_878_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_862)[name = string("op_878")]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 8, 1, 128])]; + tensor var_880 = reshape(shape = var_879, x = var_878)[name = string("op_880")]; + string var_887_pad_type_0 = const()[name = string("op_887_pad_type_0"), val = string("valid")]; + tensor var_887_strides_0 = const()[name = string("op_887_strides_0"), val = tensor([1, 1])]; + tensor var_887_pad_0 = const()[name = string("op_887_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_887_dilations_0 = const()[name = string("op_887_dilations_0"), val = tensor([1, 1])]; + int32 var_887_groups_0 = const()[name = string("op_887_groups_0"), val = int32(1)]; + tensor var_887 = conv(dilations = var_887_dilations_0, groups = var_887_groups_0, pad = var_887_pad_0, pad_type = var_887_pad_type_0, strides = var_887_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_862)[name = string("op_887")]; + tensor var_888 = const()[name = string("op_888"), val = tensor([1, 8, 1, 128])]; + tensor var_889 = reshape(shape = var_888, x = var_887)[name = string("op_889")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_871)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_871)[name = string("x2_13")]; + tensor var_903_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_903_cast_fp16")]; + tensor var_904_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_904_cast_fp16")]; + tensor var_905_cast_fp16 = sub(x = var_903_cast_fp16, y = var_904_cast_fp16)[name = string("op_905_cast_fp16")]; + tensor var_906_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_906_cast_fp16")]; + tensor var_907_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_907_cast_fp16")]; + tensor var_908_cast_fp16 = add(x = var_906_cast_fp16, y = var_907_cast_fp16)[name = string("op_908_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_54, interleave = rotated_13_interleave_0, values = (var_905_cast_fp16, var_908_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_880)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_880)[name = string("x2_15")]; + tensor var_924_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_924_cast_fp16")]; + tensor var_925_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_925_cast_fp16")]; + tensor var_926_cast_fp16 = sub(x = var_924_cast_fp16, y = var_925_cast_fp16)[name = string("op_926_cast_fp16")]; + tensor var_927_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_927_cast_fp16")]; + tensor var_928_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_928_cast_fp16")]; + tensor var_929_cast_fp16 = add(x = var_927_cast_fp16, y = var_928_cast_fp16)[name = string("op_929_cast_fp16")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15_cast_fp16 = concat(axis = var_54, interleave = rotated_15_interleave_0, values = (var_926_cast_fp16, var_929_cast_fp16))[name = string("rotated_15_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_379, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_379, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_889, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_949_begin_0 = const()[name = string("op_949_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_949_end_0 = const()[name = string("op_949_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_949_end_mask_0 = const()[name = string("op_949_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_949_cast_fp16 = slice_by_index(begin = var_949_begin_0, end = var_949_end_0, end_mask = var_949_end_mask_0, x = coreml_update_state_25)[name = string("op_949_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_949_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_951_begin_0 = const()[name = string("op_951_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_951_end_0 = const()[name = string("op_951_end_0"), val = tensor([32, 8, 1024, 128])]; + tensor var_951_end_mask_0 = const()[name = string("op_951_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_951_cast_fp16 = slice_by_index(begin = var_951_begin_0, end = var_951_end_0, end_mask = var_951_end_mask_0, x = coreml_update_state_25)[name = string("op_951_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_951_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_960 = const()[name = string("op_960"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_960, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_964 = const()[name = string("op_964"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_964, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_967 = const()[name = string("op_967"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_967, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_971 = const()[name = string("op_971"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_971, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_974_transpose_x_1 = const()[name = string("op_974_transpose_x_1"), val = bool(false)]; + bool var_974_transpose_y_1 = const()[name = string("op_974_transpose_y_1"), val = bool(true)]; + tensor var_974_cast_fp16 = matmul(transpose_x = var_974_transpose_x_1, transpose_y = var_974_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_974_cast_fp16")]; + fp16 var_975_to_fp16 = const()[name = string("op_975_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_974_cast_fp16, y = var_975_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_986_axes_0 = const()[name = string("op_986_axes_0"), val = tensor([-1])]; + bool var_986_keep_dims_0 = const()[name = string("op_986_keep_dims_0"), val = bool(true)]; + tensor var_986_cast_fp16 = reduce_sum(axes = var_986_axes_0, keep_dims = var_986_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_986_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_986_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_989_perm_0 = const()[name = string("op_989_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_991 = const()[name = string("op_991"), val = tensor([1, 1, 3072])]; + tensor var_989_cast_fp16 = transpose(perm = var_989_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_22")]; + tensor input_47_cast_fp16 = reshape(shape = var_991, x = var_989_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1002_axes_0 = const()[name = string("op_1002_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1002_cast_fp16 = layer_norm(axes = var_1002_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1002_cast_fp16")]; + tensor var_1009 = const()[name = string("op_1009"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1010 = transpose(perm = var_1009, x = var_1002_cast_fp16)[name = string("transpose_21")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1010)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1032_axes_0 = const()[name = string("op_1032_axes_0"), val = tensor([2])]; + tensor var_1032 = squeeze(axes = var_1032_axes_0, x = hidden_states_31)[name = string("op_1032")]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([0, 2, 1])]; + tensor var_1034 = transpose(perm = var_1033, x = var_1032)[name = string("transpose_20")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1034)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1042_cast_fp16")]; + tensor var_1045 = const()[name = string("op_1045"), val = tensor([0, 2, 1])]; + tensor var_1047_axes_0 = const()[name = string("op_1047_axes_0"), val = tensor([2])]; + tensor var_1046 = transpose(perm = var_1045, x = var_1042_cast_fp16)[name = string("transpose_19")]; + tensor var_1047 = expand_dims(axes = var_1047_axes_0, x = var_1046)[name = string("op_1047")]; + string var_1054_pad_type_0 = const()[name = string("op_1054_pad_type_0"), val = string("valid")]; + tensor var_1054_strides_0 = const()[name = string("op_1054_strides_0"), val = tensor([1, 1])]; + tensor var_1054_pad_0 = const()[name = string("op_1054_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1054_dilations_0 = const()[name = string("op_1054_dilations_0"), val = tensor([1, 1])]; + int32 var_1054_groups_0 = const()[name = string("op_1054_groups_0"), val = int32(1)]; + tensor var_1054 = conv(dilations = var_1054_dilations_0, groups = var_1054_groups_0, pad = var_1054_pad_0, pad_type = var_1054_pad_type_0, strides = var_1054_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1047)[name = string("op_1054")]; + tensor var_1055 = const()[name = string("op_1055"), val = tensor([1, 24, 1, 128])]; + tensor var_1056 = reshape(shape = var_1055, x = var_1054)[name = string("op_1056")]; + string var_1063_pad_type_0 = const()[name = string("op_1063_pad_type_0"), val = string("valid")]; + tensor var_1063_strides_0 = const()[name = string("op_1063_strides_0"), val = tensor([1, 1])]; + tensor var_1063_pad_0 = const()[name = string("op_1063_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1063_dilations_0 = const()[name = string("op_1063_dilations_0"), val = tensor([1, 1])]; + int32 var_1063_groups_0 = const()[name = string("op_1063_groups_0"), val = int32(1)]; + tensor var_1063 = conv(dilations = var_1063_dilations_0, groups = var_1063_groups_0, pad = var_1063_pad_0, pad_type = var_1063_pad_type_0, strides = var_1063_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1047)[name = string("op_1063")]; + tensor var_1064 = const()[name = string("op_1064"), val = tensor([1, 8, 1, 128])]; + tensor var_1065 = reshape(shape = var_1064, x = var_1063)[name = string("op_1065")]; + string var_1072_pad_type_0 = const()[name = string("op_1072_pad_type_0"), val = string("valid")]; + tensor var_1072_strides_0 = const()[name = string("op_1072_strides_0"), val = tensor([1, 1])]; + tensor var_1072_pad_0 = const()[name = string("op_1072_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1072_dilations_0 = const()[name = string("op_1072_dilations_0"), val = tensor([1, 1])]; + int32 var_1072_groups_0 = const()[name = string("op_1072_groups_0"), val = int32(1)]; + tensor var_1072 = conv(dilations = var_1072_dilations_0, groups = var_1072_groups_0, pad = var_1072_pad_0, pad_type = var_1072_pad_type_0, strides = var_1072_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1047)[name = string("op_1072")]; + tensor var_1073 = const()[name = string("op_1073"), val = tensor([1, 8, 1, 128])]; + tensor var_1074 = reshape(shape = var_1073, x = var_1072)[name = string("op_1074")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1056)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1056)[name = string("x2_17")]; + tensor var_1088_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1088_cast_fp16")]; + tensor var_1089_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1089_cast_fp16")]; + tensor var_1090_cast_fp16 = sub(x = var_1088_cast_fp16, y = var_1089_cast_fp16)[name = string("op_1090_cast_fp16")]; + tensor var_1091_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1091_cast_fp16")]; + tensor var_1092_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1092_cast_fp16")]; + tensor var_1093_cast_fp16 = add(x = var_1091_cast_fp16, y = var_1092_cast_fp16)[name = string("op_1093_cast_fp16")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17_cast_fp16 = concat(axis = var_54, interleave = rotated_17_interleave_0, values = (var_1090_cast_fp16, var_1093_cast_fp16))[name = string("rotated_17_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1065)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1065)[name = string("x2_19")]; + tensor var_1109_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1109_cast_fp16")]; + tensor var_1110_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1110_cast_fp16")]; + tensor var_1111_cast_fp16 = sub(x = var_1109_cast_fp16, y = var_1110_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1112_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1112_cast_fp16")]; + tensor var_1113_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1113_cast_fp16")]; + tensor var_1114_cast_fp16 = add(x = var_1112_cast_fp16, y = var_1113_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19_cast_fp16 = concat(axis = var_54, interleave = rotated_19_interleave_0, values = (var_1111_cast_fp16, var_1114_cast_fp16))[name = string("rotated_19_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_379, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_379, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1074, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_1134_begin_0 = const()[name = string("op_1134_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1134_end_0 = const()[name = string("op_1134_end_0"), val = tensor([5, 8, 1024, 128])]; + tensor var_1134_end_mask_0 = const()[name = string("op_1134_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = coreml_update_state_27)[name = string("op_1134_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1134_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = string("op_1136_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_1136_end_0 = const()[name = string("op_1136_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_1136_end_mask_0 = const()[name = string("op_1136_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = coreml_update_state_27)[name = string("op_1136_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1136_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1145 = const()[name = string("op_1145"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1145, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1149 = const()[name = string("op_1149"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_1149, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1152 = const()[name = string("op_1152"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1152, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1156 = const()[name = string("op_1156"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_19_cast_fp16 = reshape(shape = var_1156, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; + bool var_1159_transpose_x_1 = const()[name = string("op_1159_transpose_x_1"), val = bool(false)]; + bool var_1159_transpose_y_1 = const()[name = string("op_1159_transpose_y_1"), val = bool(true)]; + tensor var_1159_cast_fp16 = matmul(transpose_x = var_1159_transpose_x_1, transpose_y = var_1159_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1159_cast_fp16")]; + fp16 var_1160_to_fp16 = const()[name = string("op_1160_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_1159_cast_fp16, y = var_1160_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1171_axes_0 = const()[name = string("op_1171_axes_0"), val = tensor([-1])]; + bool var_1171_keep_dims_0 = const()[name = string("op_1171_keep_dims_0"), val = bool(true)]; + tensor var_1171_cast_fp16 = reduce_sum(axes = var_1171_axes_0, keep_dims = var_1171_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1171_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1171_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1174_perm_0 = const()[name = string("op_1174_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1176 = const()[name = string("op_1176"), val = tensor([1, 1, 3072])]; + tensor var_1174_cast_fp16 = transpose(perm = var_1174_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_18")]; + tensor input_61_cast_fp16 = reshape(shape = var_1176, x = var_1174_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1187_axes_0 = const()[name = string("op_1187_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1187_cast_fp16 = layer_norm(axes = var_1187_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1187_cast_fp16")]; + tensor var_1194 = const()[name = string("op_1194"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1195 = transpose(perm = var_1194, x = var_1187_cast_fp16)[name = string("transpose_17")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1195)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1217_axes_0 = const()[name = string("op_1217_axes_0"), val = tensor([2])]; + tensor var_1217 = squeeze(axes = var_1217_axes_0, x = hidden_states_39)[name = string("op_1217")]; + tensor var_1218 = const()[name = string("op_1218"), val = tensor([0, 2, 1])]; + tensor var_1219 = transpose(perm = var_1218, x = var_1217)[name = string("transpose_16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1219)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1227_axes_0 = const()[name = string("op_1227_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1227_cast_fp16 = layer_norm(axes = var_1227_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1227_cast_fp16")]; + tensor var_1230 = const()[name = string("op_1230"), val = tensor([0, 2, 1])]; + tensor var_1232_axes_0 = const()[name = string("op_1232_axes_0"), val = tensor([2])]; + tensor var_1231 = transpose(perm = var_1230, x = var_1227_cast_fp16)[name = string("transpose_15")]; + tensor var_1232 = expand_dims(axes = var_1232_axes_0, x = var_1231)[name = string("op_1232")]; + string var_1239_pad_type_0 = const()[name = string("op_1239_pad_type_0"), val = string("valid")]; + tensor var_1239_strides_0 = const()[name = string("op_1239_strides_0"), val = tensor([1, 1])]; + tensor var_1239_pad_0 = const()[name = string("op_1239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1239_dilations_0 = const()[name = string("op_1239_dilations_0"), val = tensor([1, 1])]; + int32 var_1239_groups_0 = const()[name = string("op_1239_groups_0"), val = int32(1)]; + tensor var_1239 = conv(dilations = var_1239_dilations_0, groups = var_1239_groups_0, pad = var_1239_pad_0, pad_type = var_1239_pad_type_0, strides = var_1239_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1232)[name = string("op_1239")]; + tensor var_1240 = const()[name = string("op_1240"), val = tensor([1, 24, 1, 128])]; + tensor var_1241 = reshape(shape = var_1240, x = var_1239)[name = string("op_1241")]; + string var_1248_pad_type_0 = const()[name = string("op_1248_pad_type_0"), val = string("valid")]; + tensor var_1248_strides_0 = const()[name = string("op_1248_strides_0"), val = tensor([1, 1])]; + tensor var_1248_pad_0 = const()[name = string("op_1248_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1248_dilations_0 = const()[name = string("op_1248_dilations_0"), val = tensor([1, 1])]; + int32 var_1248_groups_0 = const()[name = string("op_1248_groups_0"), val = int32(1)]; + tensor var_1248 = conv(dilations = var_1248_dilations_0, groups = var_1248_groups_0, pad = var_1248_pad_0, pad_type = var_1248_pad_type_0, strides = var_1248_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1232)[name = string("op_1248")]; + tensor var_1249 = const()[name = string("op_1249"), val = tensor([1, 8, 1, 128])]; + tensor var_1250 = reshape(shape = var_1249, x = var_1248)[name = string("op_1250")]; + string var_1257_pad_type_0 = const()[name = string("op_1257_pad_type_0"), val = string("valid")]; + tensor var_1257_strides_0 = const()[name = string("op_1257_strides_0"), val = tensor([1, 1])]; + tensor var_1257_pad_0 = const()[name = string("op_1257_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1257_dilations_0 = const()[name = string("op_1257_dilations_0"), val = tensor([1, 1])]; + int32 var_1257_groups_0 = const()[name = string("op_1257_groups_0"), val = int32(1)]; + tensor var_1257 = conv(dilations = var_1257_dilations_0, groups = var_1257_groups_0, pad = var_1257_pad_0, pad_type = var_1257_pad_type_0, strides = var_1257_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1232)[name = string("op_1257")]; + tensor var_1258 = const()[name = string("op_1258"), val = tensor([1, 8, 1, 128])]; + tensor var_1259 = reshape(shape = var_1258, x = var_1257)[name = string("op_1259")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1241)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1241)[name = string("x2_21")]; + tensor var_1273_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1273_cast_fp16")]; + tensor var_1274_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1274_cast_fp16")]; + tensor var_1275_cast_fp16 = sub(x = var_1273_cast_fp16, y = var_1274_cast_fp16)[name = string("op_1275_cast_fp16")]; + tensor var_1276_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1276_cast_fp16")]; + tensor var_1277_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1277_cast_fp16")]; + tensor var_1278_cast_fp16 = add(x = var_1276_cast_fp16, y = var_1277_cast_fp16)[name = string("op_1278_cast_fp16")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21_cast_fp16 = concat(axis = var_54, interleave = rotated_21_interleave_0, values = (var_1275_cast_fp16, var_1278_cast_fp16))[name = string("rotated_21_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1250)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1250)[name = string("x2_23")]; + tensor var_1294_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1294_cast_fp16")]; + tensor var_1295_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1295_cast_fp16")]; + tensor var_1296_cast_fp16 = sub(x = var_1294_cast_fp16, y = var_1295_cast_fp16)[name = string("op_1296_cast_fp16")]; + tensor var_1297_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1297_cast_fp16")]; + tensor var_1298_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor var_1299_cast_fp16 = add(x = var_1297_cast_fp16, y = var_1298_cast_fp16)[name = string("op_1299_cast_fp16")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23_cast_fp16 = concat(axis = var_54, interleave = rotated_23_interleave_0, values = (var_1296_cast_fp16, var_1299_cast_fp16))[name = string("rotated_23_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_379, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_379, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1259, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_1319_begin_0 = const()[name = string("op_1319_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1319_end_0 = const()[name = string("op_1319_end_0"), val = tensor([6, 8, 1024, 128])]; + tensor var_1319_end_mask_0 = const()[name = string("op_1319_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1319_cast_fp16 = slice_by_index(begin = var_1319_begin_0, end = var_1319_end_0, end_mask = var_1319_end_mask_0, x = coreml_update_state_29)[name = string("op_1319_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1319_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1321_begin_0 = const()[name = string("op_1321_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_1321_end_0 = const()[name = string("op_1321_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_1321_end_mask_0 = const()[name = string("op_1321_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1321_cast_fp16 = slice_by_index(begin = var_1321_begin_0, end = var_1321_end_0, end_mask = var_1321_end_mask_0, x = coreml_update_state_29)[name = string("op_1321_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1321_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1330, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1334 = const()[name = string("op_1334"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_1334, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1337 = const()[name = string("op_1337"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1337, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1341 = const()[name = string("op_1341"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_23_cast_fp16 = reshape(shape = var_1341, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; + bool var_1344_transpose_x_1 = const()[name = string("op_1344_transpose_x_1"), val = bool(false)]; + bool var_1344_transpose_y_1 = const()[name = string("op_1344_transpose_y_1"), val = bool(true)]; + tensor var_1344_cast_fp16 = matmul(transpose_x = var_1344_transpose_x_1, transpose_y = var_1344_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1344_cast_fp16")]; + fp16 var_1345_to_fp16 = const()[name = string("op_1345_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_1344_cast_fp16, y = var_1345_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1356_axes_0 = const()[name = string("op_1356_axes_0"), val = tensor([-1])]; + bool var_1356_keep_dims_0 = const()[name = string("op_1356_keep_dims_0"), val = bool(true)]; + tensor var_1356_cast_fp16 = reduce_sum(axes = var_1356_axes_0, keep_dims = var_1356_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1356_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1356_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_1359_perm_0 = const()[name = string("op_1359_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1361 = const()[name = string("op_1361"), val = tensor([1, 1, 3072])]; + tensor var_1359_cast_fp16 = transpose(perm = var_1359_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_14")]; + tensor input_75_cast_fp16 = reshape(shape = var_1361, x = var_1359_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1372_axes_0 = const()[name = string("op_1372_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1372_cast_fp16 = layer_norm(axes = var_1372_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379 = const()[name = string("op_1379"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1380 = transpose(perm = var_1379, x = var_1372_cast_fp16)[name = string("transpose_13")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1380)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1402_axes_0 = const()[name = string("op_1402_axes_0"), val = tensor([2])]; + tensor var_1402 = squeeze(axes = var_1402_axes_0, x = hidden_states_47)[name = string("op_1402")]; + tensor var_1403 = const()[name = string("op_1403"), val = tensor([0, 2, 1])]; + tensor var_1404 = transpose(perm = var_1403, x = var_1402)[name = string("transpose_12")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1404)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1412_axes_0 = const()[name = string("op_1412_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1412_cast_fp16 = layer_norm(axes = var_1412_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1412_cast_fp16")]; + tensor var_1415 = const()[name = string("op_1415"), val = tensor([0, 2, 1])]; + tensor var_1417_axes_0 = const()[name = string("op_1417_axes_0"), val = tensor([2])]; + tensor var_1416 = transpose(perm = var_1415, x = var_1412_cast_fp16)[name = string("transpose_11")]; + tensor var_1417 = expand_dims(axes = var_1417_axes_0, x = var_1416)[name = string("op_1417")]; + string var_1424_pad_type_0 = const()[name = string("op_1424_pad_type_0"), val = string("valid")]; + tensor var_1424_strides_0 = const()[name = string("op_1424_strides_0"), val = tensor([1, 1])]; + tensor var_1424_pad_0 = const()[name = string("op_1424_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1424_dilations_0 = const()[name = string("op_1424_dilations_0"), val = tensor([1, 1])]; + int32 var_1424_groups_0 = const()[name = string("op_1424_groups_0"), val = int32(1)]; + tensor var_1424 = conv(dilations = var_1424_dilations_0, groups = var_1424_groups_0, pad = var_1424_pad_0, pad_type = var_1424_pad_type_0, strides = var_1424_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1417)[name = string("op_1424")]; + tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 24, 1, 128])]; + tensor var_1426 = reshape(shape = var_1425, x = var_1424)[name = string("op_1426")]; + string var_1433_pad_type_0 = const()[name = string("op_1433_pad_type_0"), val = string("valid")]; + tensor var_1433_strides_0 = const()[name = string("op_1433_strides_0"), val = tensor([1, 1])]; + tensor var_1433_pad_0 = const()[name = string("op_1433_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1433_dilations_0 = const()[name = string("op_1433_dilations_0"), val = tensor([1, 1])]; + int32 var_1433_groups_0 = const()[name = string("op_1433_groups_0"), val = int32(1)]; + tensor var_1433 = conv(dilations = var_1433_dilations_0, groups = var_1433_groups_0, pad = var_1433_pad_0, pad_type = var_1433_pad_type_0, strides = var_1433_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1417)[name = string("op_1433")]; + tensor var_1434 = const()[name = string("op_1434"), val = tensor([1, 8, 1, 128])]; + tensor var_1435 = reshape(shape = var_1434, x = var_1433)[name = string("op_1435")]; + string var_1442_pad_type_0 = const()[name = string("op_1442_pad_type_0"), val = string("valid")]; + tensor var_1442_strides_0 = const()[name = string("op_1442_strides_0"), val = tensor([1, 1])]; + tensor var_1442_pad_0 = const()[name = string("op_1442_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1442_dilations_0 = const()[name = string("op_1442_dilations_0"), val = tensor([1, 1])]; + int32 var_1442_groups_0 = const()[name = string("op_1442_groups_0"), val = int32(1)]; + tensor var_1442 = conv(dilations = var_1442_dilations_0, groups = var_1442_groups_0, pad = var_1442_pad_0, pad_type = var_1442_pad_type_0, strides = var_1442_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1417)[name = string("op_1442")]; + tensor var_1443 = const()[name = string("op_1443"), val = tensor([1, 8, 1, 128])]; + tensor var_1444 = reshape(shape = var_1443, x = var_1442)[name = string("op_1444")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1426)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1426)[name = string("x2_25")]; + tensor var_1458_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1458_cast_fp16")]; + tensor var_1459_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1459_cast_fp16")]; + tensor var_1460_cast_fp16 = sub(x = var_1458_cast_fp16, y = var_1459_cast_fp16)[name = string("op_1460_cast_fp16")]; + tensor var_1461_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1461_cast_fp16")]; + tensor var_1462_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1462_cast_fp16")]; + tensor var_1463_cast_fp16 = add(x = var_1461_cast_fp16, y = var_1462_cast_fp16)[name = string("op_1463_cast_fp16")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25_cast_fp16 = concat(axis = var_54, interleave = rotated_25_interleave_0, values = (var_1460_cast_fp16, var_1463_cast_fp16))[name = string("rotated_25_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1435)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1435)[name = string("x2_27")]; + tensor var_1479_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1479_cast_fp16")]; + tensor var_1480_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1480_cast_fp16")]; + tensor var_1481_cast_fp16 = sub(x = var_1479_cast_fp16, y = var_1480_cast_fp16)[name = string("op_1481_cast_fp16")]; + tensor var_1482_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1482_cast_fp16")]; + tensor var_1483_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1483_cast_fp16")]; + tensor var_1484_cast_fp16 = add(x = var_1482_cast_fp16, y = var_1483_cast_fp16)[name = string("op_1484_cast_fp16")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27_cast_fp16 = concat(axis = var_54, interleave = rotated_27_interleave_0, values = (var_1481_cast_fp16, var_1484_cast_fp16))[name = string("rotated_27_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_379, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_379, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1444, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_1504_begin_0 = const()[name = string("op_1504_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1504_end_0 = const()[name = string("op_1504_end_0"), val = tensor([7, 8, 1024, 128])]; + tensor var_1504_end_mask_0 = const()[name = string("op_1504_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1504_cast_fp16 = slice_by_index(begin = var_1504_begin_0, end = var_1504_end_0, end_mask = var_1504_end_mask_0, x = coreml_update_state_31)[name = string("op_1504_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1504_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1506_begin_0 = const()[name = string("op_1506_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_1506_end_0 = const()[name = string("op_1506_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_1506_end_mask_0 = const()[name = string("op_1506_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1506_cast_fp16 = slice_by_index(begin = var_1506_begin_0, end = var_1506_end_0, end_mask = var_1506_end_mask_0, x = coreml_update_state_31)[name = string("op_1506_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1506_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1515 = const()[name = string("op_1515"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1515, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1519 = const()[name = string("op_1519"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_1519, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1522 = const()[name = string("op_1522"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1522, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_1526 = const()[name = string("op_1526"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_27_cast_fp16 = reshape(shape = var_1526, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; + bool var_1529_transpose_x_1 = const()[name = string("op_1529_transpose_x_1"), val = bool(false)]; + bool var_1529_transpose_y_1 = const()[name = string("op_1529_transpose_y_1"), val = bool(true)]; + tensor var_1529_cast_fp16 = matmul(transpose_x = var_1529_transpose_x_1, transpose_y = var_1529_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1529_cast_fp16")]; + fp16 var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_1529_cast_fp16, y = var_1530_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1541_axes_0 = const()[name = string("op_1541_axes_0"), val = tensor([-1])]; + bool var_1541_keep_dims_0 = const()[name = string("op_1541_keep_dims_0"), val = bool(true)]; + tensor var_1541_cast_fp16 = reduce_sum(axes = var_1541_axes_0, keep_dims = var_1541_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1541_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1541_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1544_perm_0 = const()[name = string("op_1544_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([1, 1, 3072])]; + tensor var_1544_cast_fp16 = transpose(perm = var_1544_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_10")]; + tensor input_89_cast_fp16 = reshape(shape = var_1546, x = var_1544_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1557_axes_0 = const()[name = string("op_1557_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1557_cast_fp16 = layer_norm(axes = var_1557_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1557_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1565 = transpose(perm = var_1564, x = var_1557_cast_fp16)[name = string("transpose_9")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1565)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1587_axes_0 = const()[name = string("op_1587_axes_0"), val = tensor([2])]; + tensor var_1587 = squeeze(axes = var_1587_axes_0, x = hidden_states_55)[name = string("op_1587")]; + tensor var_1588 = const()[name = string("op_1588"), val = tensor([0, 2, 1])]; + tensor var_1589 = transpose(perm = var_1588, x = var_1587)[name = string("transpose_8")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1589)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1597_axes_0 = const()[name = string("op_1597_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1597_cast_fp16 = layer_norm(axes = var_1597_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor var_1600 = const()[name = string("op_1600"), val = tensor([0, 2, 1])]; + tensor var_1602_axes_0 = const()[name = string("op_1602_axes_0"), val = tensor([2])]; + tensor var_1601 = transpose(perm = var_1600, x = var_1597_cast_fp16)[name = string("transpose_7")]; + tensor var_1602 = expand_dims(axes = var_1602_axes_0, x = var_1601)[name = string("op_1602")]; + string var_1609_pad_type_0 = const()[name = string("op_1609_pad_type_0"), val = string("valid")]; + tensor var_1609_strides_0 = const()[name = string("op_1609_strides_0"), val = tensor([1, 1])]; + tensor var_1609_pad_0 = const()[name = string("op_1609_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1609_dilations_0 = const()[name = string("op_1609_dilations_0"), val = tensor([1, 1])]; + int32 var_1609_groups_0 = const()[name = string("op_1609_groups_0"), val = int32(1)]; + tensor var_1609 = conv(dilations = var_1609_dilations_0, groups = var_1609_groups_0, pad = var_1609_pad_0, pad_type = var_1609_pad_type_0, strides = var_1609_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1602)[name = string("op_1609")]; + tensor var_1610 = const()[name = string("op_1610"), val = tensor([1, 24, 1, 128])]; + tensor var_1611 = reshape(shape = var_1610, x = var_1609)[name = string("op_1611")]; + string var_1618_pad_type_0 = const()[name = string("op_1618_pad_type_0"), val = string("valid")]; + tensor var_1618_strides_0 = const()[name = string("op_1618_strides_0"), val = tensor([1, 1])]; + tensor var_1618_pad_0 = const()[name = string("op_1618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1618_dilations_0 = const()[name = string("op_1618_dilations_0"), val = tensor([1, 1])]; + int32 var_1618_groups_0 = const()[name = string("op_1618_groups_0"), val = int32(1)]; + tensor var_1618 = conv(dilations = var_1618_dilations_0, groups = var_1618_groups_0, pad = var_1618_pad_0, pad_type = var_1618_pad_type_0, strides = var_1618_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1602)[name = string("op_1618")]; + tensor var_1619 = const()[name = string("op_1619"), val = tensor([1, 8, 1, 128])]; + tensor var_1620 = reshape(shape = var_1619, x = var_1618)[name = string("op_1620")]; + string var_1627_pad_type_0 = const()[name = string("op_1627_pad_type_0"), val = string("valid")]; + tensor var_1627_strides_0 = const()[name = string("op_1627_strides_0"), val = tensor([1, 1])]; + tensor var_1627_pad_0 = const()[name = string("op_1627_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1627_dilations_0 = const()[name = string("op_1627_dilations_0"), val = tensor([1, 1])]; + int32 var_1627_groups_0 = const()[name = string("op_1627_groups_0"), val = int32(1)]; + tensor var_1627 = conv(dilations = var_1627_dilations_0, groups = var_1627_groups_0, pad = var_1627_pad_0, pad_type = var_1627_pad_type_0, strides = var_1627_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1602)[name = string("op_1627")]; + tensor var_1628 = const()[name = string("op_1628"), val = tensor([1, 8, 1, 128])]; + tensor var_1629 = reshape(shape = var_1628, x = var_1627)[name = string("op_1629")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1611)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1611)[name = string("x2_29")]; + tensor var_1643_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1644_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1644_cast_fp16")]; + tensor var_1645_cast_fp16 = sub(x = var_1643_cast_fp16, y = var_1644_cast_fp16)[name = string("op_1645_cast_fp16")]; + tensor var_1646_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1646_cast_fp16")]; + tensor var_1647_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1647_cast_fp16")]; + tensor var_1648_cast_fp16 = add(x = var_1646_cast_fp16, y = var_1647_cast_fp16)[name = string("op_1648_cast_fp16")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29_cast_fp16 = concat(axis = var_54, interleave = rotated_29_interleave_0, values = (var_1645_cast_fp16, var_1648_cast_fp16))[name = string("rotated_29_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1620)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1620)[name = string("x2_31")]; + tensor var_1664_cast_fp16 = mul(x = x1_31, y = cos_3_cast_fp16)[name = string("op_1664_cast_fp16")]; + tensor var_1665_cast_fp16 = mul(x = x2_31, y = sin_3_cast_fp16)[name = string("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = sub(x = var_1664_cast_fp16, y = var_1665_cast_fp16)[name = string("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = mul(x = x2_31, y = cos_3_cast_fp16)[name = string("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = mul(x = x1_31, y = sin_3_cast_fp16)[name = string("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = add(x = var_1667_cast_fp16, y = var_1668_cast_fp16)[name = string("op_1669_cast_fp16")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31_cast_fp16 = concat(axis = var_54, interleave = rotated_31_interleave_0, values = (var_1666_cast_fp16, var_1669_cast_fp16))[name = string("rotated_31_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_379, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31_cast_fp16, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_379, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1629, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_1689_begin_0 = const()[name = string("op_1689_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_1689_end_0 = const()[name = string("op_1689_end_0"), val = tensor([8, 8, 1024, 128])]; + tensor var_1689_end_mask_0 = const()[name = string("op_1689_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1689_cast_fp16 = slice_by_index(begin = var_1689_begin_0, end = var_1689_end_0, end_mask = var_1689_end_mask_0, x = coreml_update_state_33)[name = string("op_1689_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1689_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1691_begin_0 = const()[name = string("op_1691_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_1691_end_0 = const()[name = string("op_1691_end_0"), val = tensor([36, 8, 1024, 128])]; + tensor var_1691_end_mask_0 = const()[name = string("op_1691_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1691_cast_fp16 = slice_by_index(begin = var_1691_begin_0, end = var_1691_end_0, end_mask = var_1691_end_mask_0, x = coreml_update_state_33)[name = string("op_1691_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1691_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1700 = const()[name = string("op_1700"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1700, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1704 = const()[name = string("op_1704"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_31_cast_fp16 = reshape(shape = var_1704, x = x_209_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1707 = const()[name = string("op_1707"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1707, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1711 = const()[name = string("op_1711"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_31_cast_fp16 = reshape(shape = var_1711, x = x_215_cast_fp16)[name = string("value_states_31_cast_fp16")]; + bool var_1714_transpose_x_1 = const()[name = string("op_1714_transpose_x_1"), val = bool(false)]; + bool var_1714_transpose_y_1 = const()[name = string("op_1714_transpose_y_1"), val = bool(true)]; + tensor var_1714_cast_fp16 = matmul(transpose_x = var_1714_transpose_x_1, transpose_y = var_1714_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_31_cast_fp16)[name = string("op_1714_cast_fp16")]; + fp16 var_1715_to_fp16 = const()[name = string("op_1715_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_1714_cast_fp16, y = var_1715_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1726_axes_0 = const()[name = string("op_1726_axes_0"), val = tensor([-1])]; + bool var_1726_keep_dims_0 = const()[name = string("op_1726_keep_dims_0"), val = bool(true)]; + tensor var_1726_cast_fp16 = reduce_sum(axes = var_1726_axes_0, keep_dims = var_1726_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1726_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1726_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_31_cast_fp16, y = value_states_31_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_1729_perm_0 = const()[name = string("op_1729_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1731 = const()[name = string("op_1731"), val = tensor([1, 1, 3072])]; + tensor var_1729_cast_fp16 = transpose(perm = var_1729_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_6")]; + tensor input_103_cast_fp16 = reshape(shape = var_1731, x = var_1729_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1742_axes_0 = const()[name = string("op_1742_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1742_cast_fp16 = layer_norm(axes = var_1742_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1742_cast_fp16")]; + tensor var_1749 = const()[name = string("op_1749"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1750 = transpose(perm = var_1749, x = var_1742_cast_fp16)[name = string("transpose_5")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1750)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1772_axes_0 = const()[name = string("op_1772_axes_0"), val = tensor([2])]; + tensor var_1772 = squeeze(axes = var_1772_axes_0, x = hidden_states_63)[name = string("op_1772")]; + tensor var_1773 = const()[name = string("op_1773"), val = tensor([0, 2, 1])]; + tensor var_1774 = transpose(perm = var_1773, x = var_1772)[name = string("transpose_4")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1774)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1782_axes_0 = const()[name = string("op_1782_axes_0"), val = tensor([-1])]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1782_cast_fp16 = layer_norm(axes = var_1782_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1782_cast_fp16")]; + tensor var_1785 = const()[name = string("op_1785"), val = tensor([0, 2, 1])]; + tensor var_1787_axes_0 = const()[name = string("op_1787_axes_0"), val = tensor([2])]; + tensor var_1786 = transpose(perm = var_1785, x = var_1782_cast_fp16)[name = string("transpose_3")]; + tensor var_1787 = expand_dims(axes = var_1787_axes_0, x = var_1786)[name = string("op_1787")]; + string var_1794_pad_type_0 = const()[name = string("op_1794_pad_type_0"), val = string("valid")]; + tensor var_1794_strides_0 = const()[name = string("op_1794_strides_0"), val = tensor([1, 1])]; + tensor var_1794_pad_0 = const()[name = string("op_1794_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1794_dilations_0 = const()[name = string("op_1794_dilations_0"), val = tensor([1, 1])]; + int32 var_1794_groups_0 = const()[name = string("op_1794_groups_0"), val = int32(1)]; + tensor var_1794 = conv(dilations = var_1794_dilations_0, groups = var_1794_groups_0, pad = var_1794_pad_0, pad_type = var_1794_pad_type_0, strides = var_1794_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_1787)[name = string("op_1794")]; + tensor var_1795 = const()[name = string("op_1795"), val = tensor([1, 24, 1, 128])]; + tensor var_1796 = reshape(shape = var_1795, x = var_1794)[name = string("op_1796")]; + string var_1803_pad_type_0 = const()[name = string("op_1803_pad_type_0"), val = string("valid")]; + tensor var_1803_strides_0 = const()[name = string("op_1803_strides_0"), val = tensor([1, 1])]; + tensor var_1803_pad_0 = const()[name = string("op_1803_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1803_dilations_0 = const()[name = string("op_1803_dilations_0"), val = tensor([1, 1])]; + int32 var_1803_groups_0 = const()[name = string("op_1803_groups_0"), val = int32(1)]; + tensor var_1803 = conv(dilations = var_1803_dilations_0, groups = var_1803_groups_0, pad = var_1803_pad_0, pad_type = var_1803_pad_type_0, strides = var_1803_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_1787)[name = string("op_1803")]; + tensor var_1804 = const()[name = string("op_1804"), val = tensor([1, 8, 1, 128])]; + tensor var_1805 = reshape(shape = var_1804, x = var_1803)[name = string("op_1805")]; + string var_1812_pad_type_0 = const()[name = string("op_1812_pad_type_0"), val = string("valid")]; + tensor var_1812_strides_0 = const()[name = string("op_1812_strides_0"), val = tensor([1, 1])]; + tensor var_1812_pad_0 = const()[name = string("op_1812_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1812_dilations_0 = const()[name = string("op_1812_dilations_0"), val = tensor([1, 1])]; + int32 var_1812_groups_0 = const()[name = string("op_1812_groups_0"), val = int32(1)]; + tensor var_1812 = conv(dilations = var_1812_dilations_0, groups = var_1812_groups_0, pad = var_1812_pad_0, pad_type = var_1812_pad_type_0, strides = var_1812_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_1787)[name = string("op_1812")]; + tensor var_1813 = const()[name = string("op_1813"), val = tensor([1, 8, 1, 128])]; + tensor var_1814 = reshape(shape = var_1813, x = var_1812)[name = string("op_1814")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1796)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1796)[name = string("x2_33")]; + tensor var_1828_cast_fp16 = mul(x = x1_33, y = cos_3_cast_fp16)[name = string("op_1828_cast_fp16")]; + tensor var_1829_cast_fp16 = mul(x = x2_33, y = sin_3_cast_fp16)[name = string("op_1829_cast_fp16")]; + tensor var_1830_cast_fp16 = sub(x = var_1828_cast_fp16, y = var_1829_cast_fp16)[name = string("op_1830_cast_fp16")]; + tensor var_1831_cast_fp16 = mul(x = x2_33, y = cos_3_cast_fp16)[name = string("op_1831_cast_fp16")]; + tensor var_1832_cast_fp16 = mul(x = x1_33, y = sin_3_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor var_1833_cast_fp16 = add(x = var_1831_cast_fp16, y = var_1832_cast_fp16)[name = string("op_1833_cast_fp16")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33_cast_fp16 = concat(axis = var_54, interleave = rotated_33_interleave_0, values = (var_1830_cast_fp16, var_1833_cast_fp16))[name = string("rotated_33_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1805)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1805)[name = string("x2")]; + tensor var_1849_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1849_cast_fp16")]; + tensor var_1850_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1850_cast_fp16")]; + tensor var_1851_cast_fp16 = sub(x = var_1849_cast_fp16, y = var_1850_cast_fp16)[name = string("op_1851_cast_fp16")]; + tensor var_1852_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1852_cast_fp16")]; + tensor var_1853_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1853_cast_fp16")]; + tensor var_1854_cast_fp16 = add(x = var_1852_cast_fp16, y = var_1853_cast_fp16)[name = string("op_1854_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_54, interleave = rotated_interleave_0, values = (var_1851_cast_fp16, var_1854_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; + tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; + tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; + int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; + bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; + tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_379, concat_67_values3_0))[name = string("concat_67")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated_cast_fp16, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_379, concat_71_values3_0))[name = string("concat_71")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_1814, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_1874_begin_0 = const()[name = string("op_1874_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_1874_end_0 = const()[name = string("op_1874_end_0"), val = tensor([9, 8, 1024, 128])]; + tensor var_1874_end_mask_0 = const()[name = string("op_1874_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1874_cast_fp16 = slice_by_index(begin = var_1874_begin_0, end = var_1874_end_0, end_mask = var_1874_end_mask_0, x = coreml_update_state_35)[name = string("op_1874_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1874_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1876_begin_0 = const()[name = string("op_1876_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_1876_end_0 = const()[name = string("op_1876_end_0"), val = tensor([37, 8, 1024, 128])]; + tensor var_1876_end_mask_0 = const()[name = string("op_1876_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1876_cast_fp16 = slice_by_index(begin = var_1876_begin_0, end = var_1876_end_0, end_mask = var_1876_end_mask_0, x = coreml_update_state_35)[name = string("op_1876_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1876_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_1885 = const()[name = string("op_1885"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_1885, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_1889 = const()[name = string("op_1889"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_1889, x = x_237_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_1892 = const()[name = string("op_1892"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_1892, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_1896 = const()[name = string("op_1896"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_cast_fp16 = reshape(shape = var_1896, x = x_243_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_1899_transpose_x_1 = const()[name = string("op_1899_transpose_x_1"), val = bool(false)]; + bool var_1899_transpose_y_1 = const()[name = string("op_1899_transpose_y_1"), val = bool(true)]; + tensor var_1899_cast_fp16 = matmul(transpose_x = var_1899_transpose_x_1, transpose_y = var_1899_transpose_y_1, x = rotated_33_cast_fp16, y = key_states_cast_fp16)[name = string("op_1899_cast_fp16")]; + fp16 var_1900_to_fp16 = const()[name = string("op_1900_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_1899_cast_fp16, y = var_1900_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1911_axes_0 = const()[name = string("op_1911_axes_0"), val = tensor([-1])]; + bool var_1911_keep_dims_0 = const()[name = string("op_1911_keep_dims_0"), val = bool(true)]; + tensor var_1911_cast_fp16 = reduce_sum(axes = var_1911_axes_0, keep_dims = var_1911_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1911_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1911_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_1914_perm_0 = const()[name = string("op_1914_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1916 = const()[name = string("op_1916"), val = tensor([1, 1, 3072])]; + tensor var_1914_cast_fp16 = transpose(perm = var_1914_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_1916, x = var_1914_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_1927_axes_0 = const()[name = string("op_1927_axes_0"), val = tensor([-1])]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_1927_cast_fp16 = layer_norm(axes = var_1927_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_1927_cast_fp16")]; + tensor var_1934 = const()[name = string("op_1934"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_1935 = transpose(perm = var_1934, x = var_1927_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_1935)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1957_axes_0 = const()[name = string("op_1957_axes_0"), val = tensor([2])]; + tensor var_1957 = squeeze(axes = var_1957_axes_0, x = hidden_states_1)[name = string("op_1957")]; + tensor var_1958 = const()[name = string("op_1958"), val = tensor([0, 2, 1])]; + tensor var_1959 = transpose(perm = var_1958, x = var_1957)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_1959)[name = string("op_1960_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; + int32 var_49 = const()[name = string("op_49"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_291_axis_0 = const()[name = string("op_291_axis_0"), val = int32(1)]; + int32 var_291_batch_dims_0 = const()[name = string("op_291_batch_dims_0"), val = int32(0)]; + bool var_291_validate_indices_0 = const()[name = string("op_291_validate_indices_0"), val = bool(false)]; + tensor var_60_to_fp16 = const()[name = string("op_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_291_cast_fp16 = gather(axis = var_291_axis_0, batch_dims = var_291_batch_dims_0, indices = select_0, validate_indices = var_291_validate_indices_0, x = var_60_to_fp16)[name = string("op_291_cast_fp16")]; + tensor var_292 = const()[name = string("op_292"), val = tensor([1, 64, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_292, x = var_291_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_296_axis_0 = const()[name = string("op_296_axis_0"), val = int32(1)]; + int32 var_296_batch_dims_0 = const()[name = string("op_296_batch_dims_0"), val = int32(0)]; + bool var_296_validate_indices_0 = const()[name = string("op_296_validate_indices_0"), val = bool(false)]; + tensor var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_296_cast_fp16 = gather(axis = var_296_axis_0, batch_dims = var_296_batch_dims_0, indices = select_0, validate_indices = var_296_validate_indices_0, x = var_55_to_fp16)[name = string("op_296_cast_fp16")]; + tensor var_297 = const()[name = string("op_297"), val = tensor([1, 64, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_297, x = var_296_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_307_axes_0 = const()[name = string("op_307_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_51_to_fp16 = const()[name = string("op_51_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_307_cast_fp16 = layer_norm(axes = var_307_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_307_cast_fp16")]; + tensor var_311 = const()[name = string("op_311"), val = tensor([0, 2, 1])]; + tensor var_313_axes_0 = const()[name = string("op_313_axes_0"), val = tensor([2])]; + tensor var_312 = transpose(perm = var_311, x = var_307_cast_fp16)[name = string("transpose_64")]; + tensor var_313 = expand_dims(axes = var_313_axes_0, x = var_312)[name = string("op_313")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_313)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_313)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_313)[name = string("value_states_1")]; + tensor var_333 = const()[name = string("op_333"), val = tensor([1, 24, 128, 64])]; + tensor var_334 = reshape(shape = var_333, x = query_states_1)[name = string("op_334")]; + tensor var_335 = const()[name = string("op_335"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 8, 128, 64])]; + tensor var_338 = reshape(shape = var_337, x = key_states_1)[name = string("op_338")]; + tensor var_339 = const()[name = string("op_339"), val = tensor([0, 1, 3, 2])]; + tensor var_341 = const()[name = string("op_341"), val = tensor([1, 8, 128, 64])]; + tensor var_342 = reshape(shape = var_341, x = value_states_1)[name = string("op_342")]; + tensor var_343 = const()[name = string("op_343"), val = tensor([0, 1, 3, 2])]; + tensor var_345 = const()[name = string("op_345"), val = tensor([0, 2, 1, 3])]; + tensor var_347 = const()[name = string("op_347"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_335, x = var_334)[name = string("transpose_63")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_345, x = cos_1_cast_fp16)[name = string("transpose_62")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_347, x = sin_1_cast_fp16)[name = string("transpose_61")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_361 = mul(x = x1_1, y = cos_7)[name = string("op_361")]; + tensor var_362 = mul(x = x2_1, y = sin_7)[name = string("op_362")]; + tensor var_363 = sub(x = var_361, y = var_362)[name = string("op_363")]; + tensor var_364 = mul(x = x2_1, y = cos_7)[name = string("op_364")]; + tensor var_365 = mul(x = x1_1, y = sin_7)[name = string("op_365")]; + tensor var_366 = add(x = var_364, y = var_365)[name = string("op_366")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_49, interleave = rotated_1_interleave_0, values = (var_363, var_366))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_339, x = var_338)[name = string("transpose_60")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_382 = mul(x = x1_3, y = cos_7)[name = string("op_382")]; + tensor var_383 = mul(x = x2_3, y = sin_7)[name = string("op_383")]; + tensor var_384 = sub(x = var_382, y = var_383)[name = string("op_384")]; + tensor var_385 = mul(x = x2_3, y = cos_7)[name = string("op_385")]; + tensor var_386 = mul(x = x1_3, y = sin_7)[name = string("op_386")]; + tensor var_387 = add(x = var_385, y = var_386)[name = string("op_387")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_49, interleave = rotated_3_interleave_0, values = (var_384, var_387))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_396 = add(x = current_pos, y = seq_length_1)[name = string("op_396")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_396, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_396, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_343, x = var_342)[name = string("transpose_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_410_begin_0 = const()[name = string("op_410_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_410_end_0 = const()[name = string("op_410_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_410_end_mask_0 = const()[name = string("op_410_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_410_cast_fp16 = slice_by_index(begin = var_410_begin_0, end = var_410_end_0, end_mask = var_410_end_mask_0, x = coreml_update_state_19)[name = string("op_410_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_410_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_412_begin_0 = const()[name = string("op_412_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_412_end_0 = const()[name = string("op_412_end_0"), val = tensor([29, 8, 1024, 128])]; + tensor var_412_end_mask_0 = const()[name = string("op_412_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = coreml_update_state_19)[name = string("op_412_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_412_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_421 = const()[name = string("op_421"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_421, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_425 = const()[name = string("op_425"), val = tensor([1, -1, 1024, 128])]; + tensor var_426_cast_fp16 = reshape(shape = var_425, x = x_13_cast_fp16)[name = string("op_426_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_428 = const()[name = string("op_428"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_428, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_435_transpose_x_0 = const()[name = string("op_435_transpose_x_0"), val = bool(false)]; + bool var_435_transpose_y_0 = const()[name = string("op_435_transpose_y_0"), val = bool(true)]; + tensor var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_0, transpose_y = var_435_transpose_y_0, x = rotated_1, y = var_426_cast_fp16)[name = string("op_435_cast_fp16")]; + fp16 var_436_to_fp16 = const()[name = string("op_436_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_435_cast_fp16, y = var_436_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_447_axes_0 = const()[name = string("op_447_axes_0"), val = tensor([-1])]; + bool var_447_keep_dims_0 = const()[name = string("op_447_keep_dims_0"), val = bool(true)]; + tensor var_447_cast_fp16 = reduce_sum(axes = var_447_axes_0, keep_dims = var_447_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor var_448_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_447_cast_fp16)[name = string("op_448_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([24, 64, 1024])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_448_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([24, 1024, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 24, 64, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_451_perm_0 = const()[name = string("op_451_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_453 = const()[name = string("op_453"), val = tensor([1, 64, 3072])]; + tensor var_451_cast_fp16 = transpose(perm = var_451_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_58")]; + tensor input_5_cast_fp16 = reshape(shape = var_453, x = var_451_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_464_axes_0 = const()[name = string("op_464_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_464_cast_fp16 = layer_norm(axes = var_464_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_464_cast_fp16")]; + tensor var_471 = const()[name = string("op_471"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_472 = transpose(perm = var_471, x = var_464_cast_fp16)[name = string("transpose_57")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_472)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_494_axes_0 = const()[name = string("op_494_axes_0"), val = tensor([2])]; + tensor var_494 = squeeze(axes = var_494_axes_0, x = hidden_states_7)[name = string("op_494")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([0, 2, 1])]; + tensor var_496 = transpose(perm = var_495, x = var_494)[name = string("transpose_56")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_496)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_504_axes_0 = const()[name = string("op_504_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_504_cast_fp16 = layer_norm(axes = var_504_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_504_cast_fp16")]; + tensor var_508 = const()[name = string("op_508"), val = tensor([0, 2, 1])]; + tensor var_510_axes_0 = const()[name = string("op_510_axes_0"), val = tensor([2])]; + tensor var_509 = transpose(perm = var_508, x = var_504_cast_fp16)[name = string("transpose_55")]; + tensor var_510 = expand_dims(axes = var_510_axes_0, x = var_509)[name = string("op_510")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_510)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_510)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_510)[name = string("value_states_7")]; + tensor var_530 = const()[name = string("op_530"), val = tensor([1, 24, 128, 64])]; + tensor var_531 = reshape(shape = var_530, x = query_states_5)[name = string("op_531")]; + tensor var_532 = const()[name = string("op_532"), val = tensor([0, 1, 3, 2])]; + tensor var_534 = const()[name = string("op_534"), val = tensor([1, 8, 128, 64])]; + tensor var_535 = reshape(shape = var_534, x = key_states_7)[name = string("op_535")]; + tensor var_536 = const()[name = string("op_536"), val = tensor([0, 1, 3, 2])]; + tensor var_538 = const()[name = string("op_538"), val = tensor([1, 8, 128, 64])]; + tensor var_539 = reshape(shape = var_538, x = value_states_7)[name = string("op_539")]; + tensor var_540 = const()[name = string("op_540"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_532, x = var_531)[name = string("transpose_54")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_558 = mul(x = x1_5, y = cos_7)[name = string("op_558")]; + tensor var_559 = mul(x = x2_5, y = sin_7)[name = string("op_559")]; + tensor var_560 = sub(x = var_558, y = var_559)[name = string("op_560")]; + tensor var_561 = mul(x = x2_5, y = cos_7)[name = string("op_561")]; + tensor var_562 = mul(x = x1_5, y = sin_7)[name = string("op_562")]; + tensor var_563 = add(x = var_561, y = var_562)[name = string("op_563")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_49, interleave = rotated_5_interleave_0, values = (var_560, var_563))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_536, x = var_535)[name = string("transpose_53")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_579 = mul(x = x1_7, y = cos_7)[name = string("op_579")]; + tensor var_580 = mul(x = x2_7, y = sin_7)[name = string("op_580")]; + tensor var_581 = sub(x = var_579, y = var_580)[name = string("op_581")]; + tensor var_582 = mul(x = x2_7, y = cos_7)[name = string("op_582")]; + tensor var_583 = mul(x = x1_7, y = sin_7)[name = string("op_583")]; + tensor var_584 = add(x = var_582, y = var_583)[name = string("op_584")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_49, interleave = rotated_7_interleave_0, values = (var_581, var_584))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_396, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_396, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_540, x = var_539)[name = string("transpose_52")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_607_begin_0 = const()[name = string("op_607_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_607_end_0 = const()[name = string("op_607_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_607_end_mask_0 = const()[name = string("op_607_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_607_cast_fp16 = slice_by_index(begin = var_607_begin_0, end = var_607_end_0, end_mask = var_607_end_mask_0, x = coreml_update_state_21)[name = string("op_607_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_607_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_609_begin_0 = const()[name = string("op_609_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_609_end_0 = const()[name = string("op_609_end_0"), val = tensor([30, 8, 1024, 128])]; + tensor var_609_end_mask_0 = const()[name = string("op_609_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = var_609_end_0, end_mask = var_609_end_mask_0, x = coreml_update_state_21)[name = string("op_609_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_609_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_618 = const()[name = string("op_618"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_618, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_622 = const()[name = string("op_622"), val = tensor([1, -1, 1024, 128])]; + tensor var_623_cast_fp16 = reshape(shape = var_622, x = x_41_cast_fp16)[name = string("op_623_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_625 = const()[name = string("op_625"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_625, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_632_transpose_x_0 = const()[name = string("op_632_transpose_x_0"), val = bool(false)]; + bool var_632_transpose_y_0 = const()[name = string("op_632_transpose_y_0"), val = bool(true)]; + tensor var_632_cast_fp16 = matmul(transpose_x = var_632_transpose_x_0, transpose_y = var_632_transpose_y_0, x = rotated_5, y = var_623_cast_fp16)[name = string("op_632_cast_fp16")]; + fp16 var_633_to_fp16 = const()[name = string("op_633_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_632_cast_fp16, y = var_633_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_644_axes_0 = const()[name = string("op_644_axes_0"), val = tensor([-1])]; + bool var_644_keep_dims_0 = const()[name = string("op_644_keep_dims_0"), val = bool(true)]; + tensor var_644_cast_fp16 = reduce_sum(axes = var_644_axes_0, keep_dims = var_644_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_644_cast_fp16")]; + tensor var_645_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_644_cast_fp16)[name = string("op_645_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 64, 1024])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_645_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([24, 1024, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 24, 64, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_648_perm_0 = const()[name = string("op_648_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_650 = const()[name = string("op_650"), val = tensor([1, 64, 3072])]; + tensor var_648_cast_fp16 = transpose(perm = var_648_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_51")]; + tensor input_19_cast_fp16 = reshape(shape = var_650, x = var_648_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_661_axes_0 = const()[name = string("op_661_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_661_cast_fp16 = layer_norm(axes = var_661_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor var_668 = const()[name = string("op_668"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_669 = transpose(perm = var_668, x = var_661_cast_fp16)[name = string("transpose_50")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_669)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_691_axes_0 = const()[name = string("op_691_axes_0"), val = tensor([2])]; + tensor var_691 = squeeze(axes = var_691_axes_0, x = hidden_states_15)[name = string("op_691")]; + tensor var_692 = const()[name = string("op_692"), val = tensor([0, 2, 1])]; + tensor var_693 = transpose(perm = var_692, x = var_691)[name = string("transpose_49")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_693)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_701_axes_0 = const()[name = string("op_701_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_701_cast_fp16 = layer_norm(axes = var_701_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_701_cast_fp16")]; + tensor var_705 = const()[name = string("op_705"), val = tensor([0, 2, 1])]; + tensor var_707_axes_0 = const()[name = string("op_707_axes_0"), val = tensor([2])]; + tensor var_706 = transpose(perm = var_705, x = var_701_cast_fp16)[name = string("transpose_48")]; + tensor var_707 = expand_dims(axes = var_707_axes_0, x = var_706)[name = string("op_707")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_707)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_707)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_707)[name = string("value_states_13")]; + tensor var_727 = const()[name = string("op_727"), val = tensor([1, 24, 128, 64])]; + tensor var_728 = reshape(shape = var_727, x = query_states_9)[name = string("op_728")]; + tensor var_729 = const()[name = string("op_729"), val = tensor([0, 1, 3, 2])]; + tensor var_731 = const()[name = string("op_731"), val = tensor([1, 8, 128, 64])]; + tensor var_732 = reshape(shape = var_731, x = key_states_13)[name = string("op_732")]; + tensor var_733 = const()[name = string("op_733"), val = tensor([0, 1, 3, 2])]; + tensor var_735 = const()[name = string("op_735"), val = tensor([1, 8, 128, 64])]; + tensor var_736 = reshape(shape = var_735, x = value_states_13)[name = string("op_736")]; + tensor var_737 = const()[name = string("op_737"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_729, x = var_728)[name = string("transpose_47")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_755 = mul(x = x1_9, y = cos_7)[name = string("op_755")]; + tensor var_756 = mul(x = x2_9, y = sin_7)[name = string("op_756")]; + tensor var_757 = sub(x = var_755, y = var_756)[name = string("op_757")]; + tensor var_758 = mul(x = x2_9, y = cos_7)[name = string("op_758")]; + tensor var_759 = mul(x = x1_9, y = sin_7)[name = string("op_759")]; + tensor var_760 = add(x = var_758, y = var_759)[name = string("op_760")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_49, interleave = rotated_9_interleave_0, values = (var_757, var_760))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_733, x = var_732)[name = string("transpose_46")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_776 = mul(x = x1_11, y = cos_7)[name = string("op_776")]; + tensor var_777 = mul(x = x2_11, y = sin_7)[name = string("op_777")]; + tensor var_778 = sub(x = var_776, y = var_777)[name = string("op_778")]; + tensor var_779 = mul(x = x2_11, y = cos_7)[name = string("op_779")]; + tensor var_780 = mul(x = x1_11, y = sin_7)[name = string("op_780")]; + tensor var_781 = add(x = var_779, y = var_780)[name = string("op_781")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_49, interleave = rotated_11_interleave_0, values = (var_778, var_781))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_396, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_396, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_737, x = var_736)[name = string("transpose_45")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_804_begin_0 = const()[name = string("op_804_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_804_end_0 = const()[name = string("op_804_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_804_end_mask_0 = const()[name = string("op_804_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_804_cast_fp16 = slice_by_index(begin = var_804_begin_0, end = var_804_end_0, end_mask = var_804_end_mask_0, x = coreml_update_state_23)[name = string("op_804_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_804_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_806_begin_0 = const()[name = string("op_806_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_806_end_0 = const()[name = string("op_806_end_0"), val = tensor([31, 8, 1024, 128])]; + tensor var_806_end_mask_0 = const()[name = string("op_806_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_806_cast_fp16 = slice_by_index(begin = var_806_begin_0, end = var_806_end_0, end_mask = var_806_end_mask_0, x = coreml_update_state_23)[name = string("op_806_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_806_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_815 = const()[name = string("op_815"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_815, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_819 = const()[name = string("op_819"), val = tensor([1, -1, 1024, 128])]; + tensor var_820_cast_fp16 = reshape(shape = var_819, x = x_69_cast_fp16)[name = string("op_820_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_822 = const()[name = string("op_822"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_822, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_829_transpose_x_0 = const()[name = string("op_829_transpose_x_0"), val = bool(false)]; + bool var_829_transpose_y_0 = const()[name = string("op_829_transpose_y_0"), val = bool(true)]; + tensor var_829_cast_fp16 = matmul(transpose_x = var_829_transpose_x_0, transpose_y = var_829_transpose_y_0, x = rotated_9, y = var_820_cast_fp16)[name = string("op_829_cast_fp16")]; + fp16 var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_829_cast_fp16, y = var_830_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_841_axes_0 = const()[name = string("op_841_axes_0"), val = tensor([-1])]; + bool var_841_keep_dims_0 = const()[name = string("op_841_keep_dims_0"), val = bool(true)]; + tensor var_841_cast_fp16 = reduce_sum(axes = var_841_axes_0, keep_dims = var_841_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_841_cast_fp16")]; + tensor var_842_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_841_cast_fp16)[name = string("op_842_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([24, 64, 1024])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_842_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([24, 1024, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 24, 64, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_845_perm_0 = const()[name = string("op_845_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_847 = const()[name = string("op_847"), val = tensor([1, 64, 3072])]; + tensor var_845_cast_fp16 = transpose(perm = var_845_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_44")]; + tensor input_33_cast_fp16 = reshape(shape = var_847, x = var_845_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_858_axes_0 = const()[name = string("op_858_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_858_cast_fp16 = layer_norm(axes = var_858_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_858_cast_fp16")]; + tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_866 = transpose(perm = var_865, x = var_858_cast_fp16)[name = string("transpose_43")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_866)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor([2])]; + tensor var_888 = squeeze(axes = var_888_axes_0, x = hidden_states_23)[name = string("op_888")]; + tensor var_889 = const()[name = string("op_889"), val = tensor([0, 2, 1])]; + tensor var_890 = transpose(perm = var_889, x = var_888)[name = string("transpose_42")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_890)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_898_axes_0 = const()[name = string("op_898_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_898_cast_fp16 = layer_norm(axes = var_898_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_898_cast_fp16")]; + tensor var_902 = const()[name = string("op_902"), val = tensor([0, 2, 1])]; + tensor var_904_axes_0 = const()[name = string("op_904_axes_0"), val = tensor([2])]; + tensor var_903 = transpose(perm = var_902, x = var_898_cast_fp16)[name = string("transpose_41")]; + tensor var_904 = expand_dims(axes = var_904_axes_0, x = var_903)[name = string("op_904")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_904)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_904)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_904)[name = string("value_states_19")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([1, 24, 128, 64])]; + tensor var_925 = reshape(shape = var_924, x = query_states_13)[name = string("op_925")]; + tensor var_926 = const()[name = string("op_926"), val = tensor([0, 1, 3, 2])]; + tensor var_928 = const()[name = string("op_928"), val = tensor([1, 8, 128, 64])]; + tensor var_929 = reshape(shape = var_928, x = key_states_19)[name = string("op_929")]; + tensor var_930 = const()[name = string("op_930"), val = tensor([0, 1, 3, 2])]; + tensor var_932 = const()[name = string("op_932"), val = tensor([1, 8, 128, 64])]; + tensor var_933 = reshape(shape = var_932, x = value_states_19)[name = string("op_933")]; + tensor var_934 = const()[name = string("op_934"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_926, x = var_925)[name = string("transpose_40")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_952 = mul(x = x1_13, y = cos_7)[name = string("op_952")]; + tensor var_953 = mul(x = x2_13, y = sin_7)[name = string("op_953")]; + tensor var_954 = sub(x = var_952, y = var_953)[name = string("op_954")]; + tensor var_955 = mul(x = x2_13, y = cos_7)[name = string("op_955")]; + tensor var_956 = mul(x = x1_13, y = sin_7)[name = string("op_956")]; + tensor var_957 = add(x = var_955, y = var_956)[name = string("op_957")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_49, interleave = rotated_13_interleave_0, values = (var_954, var_957))[name = string("rotated_13")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_930, x = var_929)[name = string("transpose_39")]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; + tensor var_973 = mul(x = x1_15, y = cos_7)[name = string("op_973")]; + tensor var_974 = mul(x = x2_15, y = sin_7)[name = string("op_974")]; + tensor var_975 = sub(x = var_973, y = var_974)[name = string("op_975")]; + tensor var_976 = mul(x = x2_15, y = cos_7)[name = string("op_976")]; + tensor var_977 = mul(x = x1_15, y = sin_7)[name = string("op_977")]; + tensor var_978 = add(x = var_976, y = var_977)[name = string("op_978")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15 = concat(axis = var_49, interleave = rotated_15_interleave_0, values = (var_975, var_978))[name = string("rotated_15")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_396, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_396, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_934, x = var_933)[name = string("transpose_38")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_1001_begin_0 = const()[name = string("op_1001_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1001_end_0 = const()[name = string("op_1001_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_1001_end_mask_0 = const()[name = string("op_1001_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1001_cast_fp16 = slice_by_index(begin = var_1001_begin_0, end = var_1001_end_0, end_mask = var_1001_end_mask_0, x = coreml_update_state_25)[name = string("op_1001_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_1001_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_1003_end_0 = const()[name = string("op_1003_end_0"), val = tensor([32, 8, 1024, 128])]; + tensor var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = coreml_update_state_25)[name = string("op_1003_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_1003_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_1012 = const()[name = string("op_1012"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_1012, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([1, -1, 1024, 128])]; + tensor var_1017_cast_fp16 = reshape(shape = var_1016, x = x_97_cast_fp16)[name = string("op_1017_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_1019 = const()[name = string("op_1019"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_1019, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_1026_transpose_x_0 = const()[name = string("op_1026_transpose_x_0"), val = bool(false)]; + bool var_1026_transpose_y_0 = const()[name = string("op_1026_transpose_y_0"), val = bool(true)]; + tensor var_1026_cast_fp16 = matmul(transpose_x = var_1026_transpose_x_0, transpose_y = var_1026_transpose_y_0, x = rotated_13, y = var_1017_cast_fp16)[name = string("op_1026_cast_fp16")]; + fp16 var_1027_to_fp16 = const()[name = string("op_1027_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_1026_cast_fp16, y = var_1027_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_1038_axes_0 = const()[name = string("op_1038_axes_0"), val = tensor([-1])]; + bool var_1038_keep_dims_0 = const()[name = string("op_1038_keep_dims_0"), val = bool(true)]; + tensor var_1038_cast_fp16 = reduce_sum(axes = var_1038_axes_0, keep_dims = var_1038_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1038_cast_fp16)[name = string("op_1039_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([24, 64, 1024])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1039_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([24, 1024, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 24, 64, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_1042_perm_0 = const()[name = string("op_1042_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1044 = const()[name = string("op_1044"), val = tensor([1, 64, 3072])]; + tensor var_1042_cast_fp16 = transpose(perm = var_1042_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_37")]; + tensor input_47_cast_fp16 = reshape(shape = var_1044, x = var_1042_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1055_axes_0 = const()[name = string("op_1055_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1055_cast_fp16 = layer_norm(axes = var_1055_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor var_1062 = const()[name = string("op_1062"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1063 = transpose(perm = var_1062, x = var_1055_cast_fp16)[name = string("transpose_36")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1063)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1085_axes_0 = const()[name = string("op_1085_axes_0"), val = tensor([2])]; + tensor var_1085 = squeeze(axes = var_1085_axes_0, x = hidden_states_31)[name = string("op_1085")]; + tensor var_1086 = const()[name = string("op_1086"), val = tensor([0, 2, 1])]; + tensor var_1087 = transpose(perm = var_1086, x = var_1085)[name = string("transpose_35")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1087)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1095_axes_0 = const()[name = string("op_1095_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1095_cast_fp16 = layer_norm(axes = var_1095_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1095_cast_fp16")]; + tensor var_1099 = const()[name = string("op_1099"), val = tensor([0, 2, 1])]; + tensor var_1101_axes_0 = const()[name = string("op_1101_axes_0"), val = tensor([2])]; + tensor var_1100 = transpose(perm = var_1099, x = var_1095_cast_fp16)[name = string("transpose_34")]; + tensor var_1101 = expand_dims(axes = var_1101_axes_0, x = var_1100)[name = string("op_1101")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1101)[name = string("query_states_17")]; + string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; + tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; + tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; + int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; + tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1101)[name = string("key_states_25")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1101)[name = string("value_states_25")]; + tensor var_1121 = const()[name = string("op_1121"), val = tensor([1, 24, 128, 64])]; + tensor var_1122 = reshape(shape = var_1121, x = query_states_17)[name = string("op_1122")]; + tensor var_1123 = const()[name = string("op_1123"), val = tensor([0, 1, 3, 2])]; + tensor var_1125 = const()[name = string("op_1125"), val = tensor([1, 8, 128, 64])]; + tensor var_1126 = reshape(shape = var_1125, x = key_states_25)[name = string("op_1126")]; + tensor var_1127 = const()[name = string("op_1127"), val = tensor([0, 1, 3, 2])]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([1, 8, 128, 64])]; + tensor var_1130 = reshape(shape = var_1129, x = value_states_25)[name = string("op_1130")]; + tensor var_1131 = const()[name = string("op_1131"), val = tensor([0, 1, 3, 2])]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_113 = transpose(perm = var_1123, x = var_1122)[name = string("transpose_33")]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; + tensor var_1149 = mul(x = x1_17, y = cos_7)[name = string("op_1149")]; + tensor var_1150 = mul(x = x2_17, y = sin_7)[name = string("op_1150")]; + tensor var_1151 = sub(x = var_1149, y = var_1150)[name = string("op_1151")]; + tensor var_1152 = mul(x = x2_17, y = cos_7)[name = string("op_1152")]; + tensor var_1153 = mul(x = x1_17, y = sin_7)[name = string("op_1153")]; + tensor var_1154 = add(x = var_1152, y = var_1153)[name = string("op_1154")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17 = concat(axis = var_49, interleave = rotated_17_interleave_0, values = (var_1151, var_1154))[name = string("rotated_17")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_117 = transpose(perm = var_1127, x = var_1126)[name = string("transpose_32")]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; + tensor var_1170 = mul(x = x1_19, y = cos_7)[name = string("op_1170")]; + tensor var_1171 = mul(x = x2_19, y = sin_7)[name = string("op_1171")]; + tensor var_1172 = sub(x = var_1170, y = var_1171)[name = string("op_1172")]; + tensor var_1173 = mul(x = x2_19, y = cos_7)[name = string("op_1173")]; + tensor var_1174 = mul(x = x1_19, y = sin_7)[name = string("op_1174")]; + tensor var_1175 = add(x = var_1173, y = var_1174)[name = string("op_1175")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19 = concat(axis = var_49, interleave = rotated_19_interleave_0, values = (var_1172, var_1175))[name = string("rotated_19")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_396, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_396, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_1131, x = var_1130)[name = string("transpose_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_1198_begin_0 = const()[name = string("op_1198_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1198_end_0 = const()[name = string("op_1198_end_0"), val = tensor([5, 8, 1024, 128])]; + tensor var_1198_end_mask_0 = const()[name = string("op_1198_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = coreml_update_state_27)[name = string("op_1198_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1198_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1200_begin_0 = const()[name = string("op_1200_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_1200_end_0 = const()[name = string("op_1200_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_1200_end_mask_0 = const()[name = string("op_1200_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1200_cast_fp16 = slice_by_index(begin = var_1200_begin_0, end = var_1200_end_0, end_mask = var_1200_end_mask_0, x = coreml_update_state_27)[name = string("op_1200_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1200_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1209 = const()[name = string("op_1209"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1209, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, -1, 1024, 128])]; + tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = x_125_cast_fp16)[name = string("op_1214_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1216 = const()[name = string("op_1216"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1216, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + bool var_1223_transpose_x_0 = const()[name = string("op_1223_transpose_x_0"), val = bool(false)]; + bool var_1223_transpose_y_0 = const()[name = string("op_1223_transpose_y_0"), val = bool(true)]; + tensor var_1223_cast_fp16 = matmul(transpose_x = var_1223_transpose_x_0, transpose_y = var_1223_transpose_y_0, x = rotated_17, y = var_1214_cast_fp16)[name = string("op_1223_cast_fp16")]; + fp16 var_1224_to_fp16 = const()[name = string("op_1224_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_1223_cast_fp16, y = var_1224_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1235_axes_0 = const()[name = string("op_1235_axes_0"), val = tensor([-1])]; + bool var_1235_keep_dims_0 = const()[name = string("op_1235_keep_dims_0"), val = bool(true)]; + tensor var_1235_cast_fp16 = reduce_sum(axes = var_1235_axes_0, keep_dims = var_1235_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1235_cast_fp16")]; + tensor var_1236_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1235_cast_fp16)[name = string("op_1236_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([24, 64, 1024])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1236_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([24, 1024, 128])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 24, 64, 128])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_1239_perm_0 = const()[name = string("op_1239_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1241 = const()[name = string("op_1241"), val = tensor([1, 64, 3072])]; + tensor var_1239_cast_fp16 = transpose(perm = var_1239_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_30")]; + tensor input_61_cast_fp16 = reshape(shape = var_1241, x = var_1239_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1252_axes_0 = const()[name = string("op_1252_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1252_cast_fp16 = layer_norm(axes = var_1252_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1252_cast_fp16")]; + tensor var_1259 = const()[name = string("op_1259"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1260 = transpose(perm = var_1259, x = var_1252_cast_fp16)[name = string("transpose_29")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1260)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1282_axes_0 = const()[name = string("op_1282_axes_0"), val = tensor([2])]; + tensor var_1282 = squeeze(axes = var_1282_axes_0, x = hidden_states_39)[name = string("op_1282")]; + tensor var_1283 = const()[name = string("op_1283"), val = tensor([0, 2, 1])]; + tensor var_1284 = transpose(perm = var_1283, x = var_1282)[name = string("transpose_28")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1284)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1292_axes_0 = const()[name = string("op_1292_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1292_cast_fp16 = layer_norm(axes = var_1292_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1292_cast_fp16")]; + tensor var_1296 = const()[name = string("op_1296"), val = tensor([0, 2, 1])]; + tensor var_1298_axes_0 = const()[name = string("op_1298_axes_0"), val = tensor([2])]; + tensor var_1297 = transpose(perm = var_1296, x = var_1292_cast_fp16)[name = string("transpose_27")]; + tensor var_1298 = expand_dims(axes = var_1298_axes_0, x = var_1297)[name = string("op_1298")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1298)[name = string("query_states_21")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1298)[name = string("key_states_31")]; + string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; + tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; + tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; + int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; + tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1298)[name = string("value_states_31")]; + tensor var_1318 = const()[name = string("op_1318"), val = tensor([1, 24, 128, 64])]; + tensor var_1319 = reshape(shape = var_1318, x = query_states_21)[name = string("op_1319")]; + tensor var_1320 = const()[name = string("op_1320"), val = tensor([0, 1, 3, 2])]; + tensor var_1322 = const()[name = string("op_1322"), val = tensor([1, 8, 128, 64])]; + tensor var_1323 = reshape(shape = var_1322, x = key_states_31)[name = string("op_1323")]; + tensor var_1324 = const()[name = string("op_1324"), val = tensor([0, 1, 3, 2])]; + tensor var_1326 = const()[name = string("op_1326"), val = tensor([1, 8, 128, 64])]; + tensor var_1327 = reshape(shape = var_1326, x = value_states_31)[name = string("op_1327")]; + tensor var_1328 = const()[name = string("op_1328"), val = tensor([0, 1, 3, 2])]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_141 = transpose(perm = var_1320, x = var_1319)[name = string("transpose_26")]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; + tensor var_1346 = mul(x = x1_21, y = cos_7)[name = string("op_1346")]; + tensor var_1347 = mul(x = x2_21, y = sin_7)[name = string("op_1347")]; + tensor var_1348 = sub(x = var_1346, y = var_1347)[name = string("op_1348")]; + tensor var_1349 = mul(x = x2_21, y = cos_7)[name = string("op_1349")]; + tensor var_1350 = mul(x = x1_21, y = sin_7)[name = string("op_1350")]; + tensor var_1351 = add(x = var_1349, y = var_1350)[name = string("op_1351")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21 = concat(axis = var_49, interleave = rotated_21_interleave_0, values = (var_1348, var_1351))[name = string("rotated_21")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_145 = transpose(perm = var_1324, x = var_1323)[name = string("transpose_25")]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; + tensor var_1367 = mul(x = x1_23, y = cos_7)[name = string("op_1367")]; + tensor var_1368 = mul(x = x2_23, y = sin_7)[name = string("op_1368")]; + tensor var_1369 = sub(x = var_1367, y = var_1368)[name = string("op_1369")]; + tensor var_1370 = mul(x = x2_23, y = cos_7)[name = string("op_1370")]; + tensor var_1371 = mul(x = x1_23, y = sin_7)[name = string("op_1371")]; + tensor var_1372 = add(x = var_1370, y = var_1371)[name = string("op_1372")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23 = concat(axis = var_49, interleave = rotated_23_interleave_0, values = (var_1369, var_1372))[name = string("rotated_23")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_396, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_396, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_33 = transpose(perm = var_1328, x = var_1327)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; + tensor var_1395_begin_0 = const()[name = string("op_1395_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1395_end_0 = const()[name = string("op_1395_end_0"), val = tensor([6, 8, 1024, 128])]; + tensor var_1395_end_mask_0 = const()[name = string("op_1395_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1395_cast_fp16 = slice_by_index(begin = var_1395_begin_0, end = var_1395_end_0, end_mask = var_1395_end_mask_0, x = coreml_update_state_29)[name = string("op_1395_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1395_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1397_begin_0 = const()[name = string("op_1397_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_1397_end_0 = const()[name = string("op_1397_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_1397_end_mask_0 = const()[name = string("op_1397_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1397_cast_fp16 = slice_by_index(begin = var_1397_begin_0, end = var_1397_end_0, end_mask = var_1397_end_mask_0, x = coreml_update_state_29)[name = string("op_1397_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1397_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1406 = const()[name = string("op_1406"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1406, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1410 = const()[name = string("op_1410"), val = tensor([1, -1, 1024, 128])]; + tensor var_1411_cast_fp16 = reshape(shape = var_1410, x = x_153_cast_fp16)[name = string("op_1411_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1413 = const()[name = string("op_1413"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1413, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + bool var_1420_transpose_x_0 = const()[name = string("op_1420_transpose_x_0"), val = bool(false)]; + bool var_1420_transpose_y_0 = const()[name = string("op_1420_transpose_y_0"), val = bool(true)]; + tensor var_1420_cast_fp16 = matmul(transpose_x = var_1420_transpose_x_0, transpose_y = var_1420_transpose_y_0, x = rotated_21, y = var_1411_cast_fp16)[name = string("op_1420_cast_fp16")]; + fp16 var_1421_to_fp16 = const()[name = string("op_1421_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_11_cast_fp16 = mul(x = var_1420_cast_fp16, y = var_1421_to_fp16)[name = string("attn_weights_11_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1432_axes_0 = const()[name = string("op_1432_axes_0"), val = tensor([-1])]; + bool var_1432_keep_dims_0 = const()[name = string("op_1432_keep_dims_0"), val = bool(true)]; + tensor var_1432_cast_fp16 = reduce_sum(axes = var_1432_axes_0, keep_dims = var_1432_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1432_cast_fp16")]; + tensor var_1433_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1432_cast_fp16)[name = string("op_1433_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([24, 64, 1024])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1433_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([24, 1024, 128])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 24, 64, 128])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_1436_perm_0 = const()[name = string("op_1436_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1438 = const()[name = string("op_1438"), val = tensor([1, 64, 3072])]; + tensor var_1436_cast_fp16 = transpose(perm = var_1436_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_23")]; + tensor input_75_cast_fp16 = reshape(shape = var_1438, x = var_1436_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1449_axes_0 = const()[name = string("op_1449_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1449_cast_fp16 = layer_norm(axes = var_1449_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1449_cast_fp16")]; + tensor var_1456 = const()[name = string("op_1456"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1457 = transpose(perm = var_1456, x = var_1449_cast_fp16)[name = string("transpose_22")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1457)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1479_axes_0 = const()[name = string("op_1479_axes_0"), val = tensor([2])]; + tensor var_1479 = squeeze(axes = var_1479_axes_0, x = hidden_states_47)[name = string("op_1479")]; + tensor var_1480 = const()[name = string("op_1480"), val = tensor([0, 2, 1])]; + tensor var_1481 = transpose(perm = var_1480, x = var_1479)[name = string("transpose_21")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1481)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1489_axes_0 = const()[name = string("op_1489_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1489_cast_fp16 = layer_norm(axes = var_1489_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1489_cast_fp16")]; + tensor var_1493 = const()[name = string("op_1493"), val = tensor([0, 2, 1])]; + tensor var_1495_axes_0 = const()[name = string("op_1495_axes_0"), val = tensor([2])]; + tensor var_1494 = transpose(perm = var_1493, x = var_1489_cast_fp16)[name = string("transpose_20")]; + tensor var_1495 = expand_dims(axes = var_1495_axes_0, x = var_1494)[name = string("op_1495")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1495)[name = string("query_states_25")]; + string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; + tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; + tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; + int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; + tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1495)[name = string("key_states_37")]; + string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; + tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; + tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; + int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; + tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1495)[name = string("value_states_37")]; + tensor var_1515 = const()[name = string("op_1515"), val = tensor([1, 24, 128, 64])]; + tensor var_1516 = reshape(shape = var_1515, x = query_states_25)[name = string("op_1516")]; + tensor var_1517 = const()[name = string("op_1517"), val = tensor([0, 1, 3, 2])]; + tensor var_1519 = const()[name = string("op_1519"), val = tensor([1, 8, 128, 64])]; + tensor var_1520 = reshape(shape = var_1519, x = key_states_37)[name = string("op_1520")]; + tensor var_1521 = const()[name = string("op_1521"), val = tensor([0, 1, 3, 2])]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 8, 128, 64])]; + tensor var_1524 = reshape(shape = var_1523, x = value_states_37)[name = string("op_1524")]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([0, 1, 3, 2])]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_169 = transpose(perm = var_1517, x = var_1516)[name = string("transpose_19")]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; + tensor var_1543 = mul(x = x1_25, y = cos_7)[name = string("op_1543")]; + tensor var_1544 = mul(x = x2_25, y = sin_7)[name = string("op_1544")]; + tensor var_1545 = sub(x = var_1543, y = var_1544)[name = string("op_1545")]; + tensor var_1546 = mul(x = x2_25, y = cos_7)[name = string("op_1546")]; + tensor var_1547 = mul(x = x1_25, y = sin_7)[name = string("op_1547")]; + tensor var_1548 = add(x = var_1546, y = var_1547)[name = string("op_1548")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25 = concat(axis = var_49, interleave = rotated_25_interleave_0, values = (var_1545, var_1548))[name = string("rotated_25")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_173 = transpose(perm = var_1521, x = var_1520)[name = string("transpose_18")]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; + tensor var_1564 = mul(x = x1_27, y = cos_7)[name = string("op_1564")]; + tensor var_1565 = mul(x = x2_27, y = sin_7)[name = string("op_1565")]; + tensor var_1566 = sub(x = var_1564, y = var_1565)[name = string("op_1566")]; + tensor var_1567 = mul(x = x2_27, y = cos_7)[name = string("op_1567")]; + tensor var_1568 = mul(x = x1_27, y = sin_7)[name = string("op_1568")]; + tensor var_1569 = add(x = var_1567, y = var_1568)[name = string("op_1569")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27 = concat(axis = var_49, interleave = rotated_27_interleave_0, values = (var_1566, var_1569))[name = string("rotated_27")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_396, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_396, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_39 = transpose(perm = var_1525, x = var_1524)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; + tensor var_1592_begin_0 = const()[name = string("op_1592_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1592_end_0 = const()[name = string("op_1592_end_0"), val = tensor([7, 8, 1024, 128])]; + tensor var_1592_end_mask_0 = const()[name = string("op_1592_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1592_cast_fp16 = slice_by_index(begin = var_1592_begin_0, end = var_1592_end_0, end_mask = var_1592_end_mask_0, x = coreml_update_state_31)[name = string("op_1592_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1592_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1594_begin_0 = const()[name = string("op_1594_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_1594_end_0 = const()[name = string("op_1594_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_1594_end_mask_0 = const()[name = string("op_1594_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1594_cast_fp16 = slice_by_index(begin = var_1594_begin_0, end = var_1594_end_0, end_mask = var_1594_end_mask_0, x = coreml_update_state_31)[name = string("op_1594_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1594_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1603 = const()[name = string("op_1603"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1603, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1607 = const()[name = string("op_1607"), val = tensor([1, -1, 1024, 128])]; + tensor var_1608_cast_fp16 = reshape(shape = var_1607, x = x_181_cast_fp16)[name = string("op_1608_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1610 = const()[name = string("op_1610"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1610, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_1617_transpose_x_0 = const()[name = string("op_1617_transpose_x_0"), val = bool(false)]; + bool var_1617_transpose_y_0 = const()[name = string("op_1617_transpose_y_0"), val = bool(true)]; + tensor var_1617_cast_fp16 = matmul(transpose_x = var_1617_transpose_x_0, transpose_y = var_1617_transpose_y_0, x = rotated_25, y = var_1608_cast_fp16)[name = string("op_1617_cast_fp16")]; + fp16 var_1618_to_fp16 = const()[name = string("op_1618_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_1617_cast_fp16, y = var_1618_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1629_axes_0 = const()[name = string("op_1629_axes_0"), val = tensor([-1])]; + bool var_1629_keep_dims_0 = const()[name = string("op_1629_keep_dims_0"), val = bool(true)]; + tensor var_1629_cast_fp16 = reduce_sum(axes = var_1629_axes_0, keep_dims = var_1629_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1629_cast_fp16")]; + tensor var_1630_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1629_cast_fp16)[name = string("op_1630_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([24, 64, 1024])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1630_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([24, 1024, 128])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 24, 64, 128])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_1633_perm_0 = const()[name = string("op_1633_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1635 = const()[name = string("op_1635"), val = tensor([1, 64, 3072])]; + tensor var_1633_cast_fp16 = transpose(perm = var_1633_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_16")]; + tensor input_89_cast_fp16 = reshape(shape = var_1635, x = var_1633_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1646_axes_0 = const()[name = string("op_1646_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1646_cast_fp16 = layer_norm(axes = var_1646_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1646_cast_fp16")]; + tensor var_1653 = const()[name = string("op_1653"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1654 = transpose(perm = var_1653, x = var_1646_cast_fp16)[name = string("transpose_15")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1654)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1676_axes_0 = const()[name = string("op_1676_axes_0"), val = tensor([2])]; + tensor var_1676 = squeeze(axes = var_1676_axes_0, x = hidden_states_55)[name = string("op_1676")]; + tensor var_1677 = const()[name = string("op_1677"), val = tensor([0, 2, 1])]; + tensor var_1678 = transpose(perm = var_1677, x = var_1676)[name = string("transpose_14")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1678)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1686_axes_0 = const()[name = string("op_1686_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1686_cast_fp16 = layer_norm(axes = var_1686_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1686_cast_fp16")]; + tensor var_1690 = const()[name = string("op_1690"), val = tensor([0, 2, 1])]; + tensor var_1692_axes_0 = const()[name = string("op_1692_axes_0"), val = tensor([2])]; + tensor var_1691 = transpose(perm = var_1690, x = var_1686_cast_fp16)[name = string("transpose_13")]; + tensor var_1692 = expand_dims(axes = var_1692_axes_0, x = var_1691)[name = string("op_1692")]; + string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; + tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; + tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; + int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; + tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1692)[name = string("query_states_29")]; + string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; + tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; + tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; + int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; + tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1692)[name = string("key_states_43")]; + string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; + tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; + tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; + int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; + tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1692)[name = string("value_states_43")]; + tensor var_1712 = const()[name = string("op_1712"), val = tensor([1, 24, 128, 64])]; + tensor var_1713 = reshape(shape = var_1712, x = query_states_29)[name = string("op_1713")]; + tensor var_1714 = const()[name = string("op_1714"), val = tensor([0, 1, 3, 2])]; + tensor var_1716 = const()[name = string("op_1716"), val = tensor([1, 8, 128, 64])]; + tensor var_1717 = reshape(shape = var_1716, x = key_states_43)[name = string("op_1717")]; + tensor var_1718 = const()[name = string("op_1718"), val = tensor([0, 1, 3, 2])]; + tensor var_1720 = const()[name = string("op_1720"), val = tensor([1, 8, 128, 64])]; + tensor var_1721 = reshape(shape = var_1720, x = value_states_43)[name = string("op_1721")]; + tensor var_1722 = const()[name = string("op_1722"), val = tensor([0, 1, 3, 2])]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_197 = transpose(perm = var_1714, x = var_1713)[name = string("transpose_12")]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; + tensor var_1740 = mul(x = x1_29, y = cos_7)[name = string("op_1740")]; + tensor var_1741 = mul(x = x2_29, y = sin_7)[name = string("op_1741")]; + tensor var_1742 = sub(x = var_1740, y = var_1741)[name = string("op_1742")]; + tensor var_1743 = mul(x = x2_29, y = cos_7)[name = string("op_1743")]; + tensor var_1744 = mul(x = x1_29, y = sin_7)[name = string("op_1744")]; + tensor var_1745 = add(x = var_1743, y = var_1744)[name = string("op_1745")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29 = concat(axis = var_49, interleave = rotated_29_interleave_0, values = (var_1742, var_1745))[name = string("rotated_29")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_201 = transpose(perm = var_1718, x = var_1717)[name = string("transpose_11")]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = x_201)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = x_201)[name = string("x2_31")]; + tensor var_1761 = mul(x = x1_31, y = cos_7)[name = string("op_1761")]; + tensor var_1762 = mul(x = x2_31, y = sin_7)[name = string("op_1762")]; + tensor var_1763 = sub(x = var_1761, y = var_1762)[name = string("op_1763")]; + tensor var_1764 = mul(x = x2_31, y = cos_7)[name = string("op_1764")]; + tensor var_1765 = mul(x = x1_31, y = sin_7)[name = string("op_1765")]; + tensor var_1766 = add(x = var_1764, y = var_1765)[name = string("op_1766")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31 = concat(axis = var_49, interleave = rotated_31_interleave_0, values = (var_1763, var_1766))[name = string("rotated_31")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_396, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_396, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_45 = transpose(perm = var_1722, x = var_1721)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; + tensor var_1789_begin_0 = const()[name = string("op_1789_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_1789_end_0 = const()[name = string("op_1789_end_0"), val = tensor([8, 8, 1024, 128])]; + tensor var_1789_end_mask_0 = const()[name = string("op_1789_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1789_cast_fp16 = slice_by_index(begin = var_1789_begin_0, end = var_1789_end_0, end_mask = var_1789_end_mask_0, x = coreml_update_state_33)[name = string("op_1789_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1789_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1791_begin_0 = const()[name = string("op_1791_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_1791_end_0 = const()[name = string("op_1791_end_0"), val = tensor([36, 8, 1024, 128])]; + tensor var_1791_end_mask_0 = const()[name = string("op_1791_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1791_cast_fp16 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, x = coreml_update_state_33)[name = string("op_1791_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1791_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1800, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1804 = const()[name = string("op_1804"), val = tensor([1, -1, 1024, 128])]; + tensor var_1805_cast_fp16 = reshape(shape = var_1804, x = x_209_cast_fp16)[name = string("op_1805_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1807 = const()[name = string("op_1807"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1807, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + bool var_1814_transpose_x_0 = const()[name = string("op_1814_transpose_x_0"), val = bool(false)]; + bool var_1814_transpose_y_0 = const()[name = string("op_1814_transpose_y_0"), val = bool(true)]; + tensor var_1814_cast_fp16 = matmul(transpose_x = var_1814_transpose_x_0, transpose_y = var_1814_transpose_y_0, x = rotated_29, y = var_1805_cast_fp16)[name = string("op_1814_cast_fp16")]; + fp16 var_1815_to_fp16 = const()[name = string("op_1815_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_15_cast_fp16 = mul(x = var_1814_cast_fp16, y = var_1815_to_fp16)[name = string("attn_weights_15_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_15_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1826_axes_0 = const()[name = string("op_1826_axes_0"), val = tensor([-1])]; + bool var_1826_keep_dims_0 = const()[name = string("op_1826_keep_dims_0"), val = bool(true)]; + tensor var_1826_cast_fp16 = reduce_sum(axes = var_1826_axes_0, keep_dims = var_1826_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1826_cast_fp16")]; + tensor var_1827_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1826_cast_fp16)[name = string("op_1827_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([24, 64, 1024])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1827_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([24, 1024, 128])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 24, 64, 128])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_1830_perm_0 = const()[name = string("op_1830_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1832 = const()[name = string("op_1832"), val = tensor([1, 64, 3072])]; + tensor var_1830_cast_fp16 = transpose(perm = var_1830_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_9")]; + tensor input_103_cast_fp16 = reshape(shape = var_1832, x = var_1830_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1843_axes_0 = const()[name = string("op_1843_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1843_cast_fp16 = layer_norm(axes = var_1843_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1843_cast_fp16")]; + tensor var_1850 = const()[name = string("op_1850"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1851 = transpose(perm = var_1850, x = var_1843_cast_fp16)[name = string("transpose_8")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1851)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1873_axes_0 = const()[name = string("op_1873_axes_0"), val = tensor([2])]; + tensor var_1873 = squeeze(axes = var_1873_axes_0, x = hidden_states_63)[name = string("op_1873")]; + tensor var_1874 = const()[name = string("op_1874"), val = tensor([0, 2, 1])]; + tensor var_1875 = transpose(perm = var_1874, x = var_1873)[name = string("transpose_7")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1875)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1883_axes_0 = const()[name = string("op_1883_axes_0"), val = tensor([-1])]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1883_cast_fp16 = layer_norm(axes = var_1883_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1883_cast_fp16")]; + tensor var_1887 = const()[name = string("op_1887"), val = tensor([0, 2, 1])]; + tensor var_1889_axes_0 = const()[name = string("op_1889_axes_0"), val = tensor([2])]; + tensor var_1888 = transpose(perm = var_1887, x = var_1883_cast_fp16)[name = string("transpose_6")]; + tensor var_1889 = expand_dims(axes = var_1889_axes_0, x = var_1888)[name = string("op_1889")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_1889)[name = string("query_states_33")]; + string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; + tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; + tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; + int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; + tensor key_states_49 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_1889)[name = string("key_states_49")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_1889)[name = string("value_states_49")]; + tensor var_1909 = const()[name = string("op_1909"), val = tensor([1, 24, 128, 64])]; + tensor var_1910 = reshape(shape = var_1909, x = query_states_33)[name = string("op_1910")]; + tensor var_1911 = const()[name = string("op_1911"), val = tensor([0, 1, 3, 2])]; + tensor var_1913 = const()[name = string("op_1913"), val = tensor([1, 8, 128, 64])]; + tensor var_1914 = reshape(shape = var_1913, x = key_states_49)[name = string("op_1914")]; + tensor var_1915 = const()[name = string("op_1915"), val = tensor([0, 1, 3, 2])]; + tensor var_1917 = const()[name = string("op_1917"), val = tensor([1, 8, 128, 64])]; + tensor var_1918 = reshape(shape = var_1917, x = value_states_49)[name = string("op_1918")]; + tensor var_1919 = const()[name = string("op_1919"), val = tensor([0, 1, 3, 2])]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_225 = transpose(perm = var_1911, x = var_1910)[name = string("transpose_5")]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = x_225)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = x_225)[name = string("x2_33")]; + tensor var_1937 = mul(x = x1_33, y = cos_7)[name = string("op_1937")]; + tensor var_1938 = mul(x = x2_33, y = sin_7)[name = string("op_1938")]; + tensor var_1939 = sub(x = var_1937, y = var_1938)[name = string("op_1939")]; + tensor var_1940 = mul(x = x2_33, y = cos_7)[name = string("op_1940")]; + tensor var_1941 = mul(x = x1_33, y = sin_7)[name = string("op_1941")]; + tensor var_1942 = add(x = var_1940, y = var_1941)[name = string("op_1942")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33 = concat(axis = var_49, interleave = rotated_33_interleave_0, values = (var_1939, var_1942))[name = string("rotated_33")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_229 = transpose(perm = var_1915, x = var_1914)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_229)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_229)[name = string("x2")]; + tensor var_1958 = mul(x = x1, y = cos_7)[name = string("op_1958")]; + tensor var_1959 = mul(x = x2, y = sin_7)[name = string("op_1959")]; + tensor var_1960 = sub(x = var_1958, y = var_1959)[name = string("op_1960")]; + tensor var_1961 = mul(x = x2, y = cos_7)[name = string("op_1961")]; + tensor var_1962 = mul(x = x1, y = sin_7)[name = string("op_1962")]; + tensor var_1963 = add(x = var_1961, y = var_1962)[name = string("op_1963")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_49, interleave = rotated_interleave_0, values = (var_1960, var_1963))[name = string("rotated")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_396, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_396, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_51 = transpose(perm = var_1919, x = var_1918)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_51, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; + tensor var_1986_begin_0 = const()[name = string("op_1986_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_1986_end_0 = const()[name = string("op_1986_end_0"), val = tensor([9, 8, 1024, 128])]; + tensor var_1986_end_mask_0 = const()[name = string("op_1986_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = coreml_update_state_35)[name = string("op_1986_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1986_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1988_begin_0 = const()[name = string("op_1988_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_1988_end_0 = const()[name = string("op_1988_end_0"), val = tensor([37, 8, 1024, 128])]; + tensor var_1988_end_mask_0 = const()[name = string("op_1988_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1988_cast_fp16 = slice_by_index(begin = var_1988_begin_0, end = var_1988_end_0, end_mask = var_1988_end_mask_0, x = coreml_update_state_35)[name = string("op_1988_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1988_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_1997 = const()[name = string("op_1997"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_1997, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_2001 = const()[name = string("op_2001"), val = tensor([1, -1, 1024, 128])]; + tensor var_2002_cast_fp16 = reshape(shape = var_2001, x = x_237_cast_fp16)[name = string("op_2002_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2004 = const()[name = string("op_2004"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_2004, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + bool var_2011_transpose_x_0 = const()[name = string("op_2011_transpose_x_0"), val = bool(false)]; + bool var_2011_transpose_y_0 = const()[name = string("op_2011_transpose_y_0"), val = bool(true)]; + tensor var_2011_cast_fp16 = matmul(transpose_x = var_2011_transpose_x_0, transpose_y = var_2011_transpose_y_0, x = rotated_33, y = var_2002_cast_fp16)[name = string("op_2011_cast_fp16")]; + fp16 var_2012_to_fp16 = const()[name = string("op_2012_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_cast_fp16 = mul(x = var_2011_cast_fp16, y = var_2012_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_2023_axes_0 = const()[name = string("op_2023_axes_0"), val = tensor([-1])]; + bool var_2023_keep_dims_0 = const()[name = string("op_2023_keep_dims_0"), val = bool(true)]; + tensor var_2023_cast_fp16 = reduce_sum(axes = var_2023_axes_0, keep_dims = var_2023_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_2023_cast_fp16")]; + tensor var_2024_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_2023_cast_fp16)[name = string("op_2024_cast_fp16")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([24, 64, 1024])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_2024_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([24, 1024, 128])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_243_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 24, 64, 128])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_2027_perm_0 = const()[name = string("op_2027_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2029 = const()[name = string("op_2029"), val = tensor([1, 64, 3072])]; + tensor var_2027_cast_fp16 = transpose(perm = var_2027_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_2029, x = var_2027_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor([-1])]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, epsilon = var_51_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_2040_cast_fp16")]; + tensor var_2047 = const()[name = string("op_2047"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_2048 = transpose(perm = var_2047, x = var_2040_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_2048)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_2070_axes_0 = const()[name = string("op_2070_axes_0"), val = tensor([2])]; + tensor var_2070 = squeeze(axes = var_2070_axes_0, x = hidden_states_1)[name = string("op_2070")]; + tensor var_2071 = const()[name = string("op_2071"), val = tensor([0, 2, 1])]; + tensor var_2072 = transpose(perm = var_2071, x = var_2070)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_2072)[name = string("op_2073_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file