diff --git "a/Hermes_FFN_PF_lut6_chunk_02of03.mlmodelc/model.mil" "b/Hermes_FFN_PF_lut6_chunk_02of03.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Hermes_FFN_PF_lut6_chunk_02of03.mlmodelc/model.mil" @@ -0,0 +1,3697 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; + int32 var_60 = const()[name = string("op_60"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_294_axis_0 = const()[name = string("op_294_axis_0"), val = int32(1)]; + int32 var_294_batch_dims_0 = const()[name = string("op_294_batch_dims_0"), val = int32(0)]; + bool var_294_validate_indices_0 = const()[name = string("op_294_validate_indices_0"), val = bool(false)]; + tensor var_65_to_fp16 = const()[name = string("op_65_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_294_cast_fp16 = gather(axis = var_294_axis_0, batch_dims = var_294_batch_dims_0, indices = select_0, validate_indices = var_294_validate_indices_0, x = var_65_to_fp16)[name = string("op_294_cast_fp16")]; + tensor var_295 = const()[name = string("op_295"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_295, x = var_294_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_299_axis_0 = const()[name = string("op_299_axis_0"), val = int32(1)]; + int32 var_299_batch_dims_0 = const()[name = string("op_299_batch_dims_0"), val = int32(0)]; + bool var_299_validate_indices_0 = const()[name = string("op_299_validate_indices_0"), val = bool(false)]; + tensor var_59_to_fp16 = const()[name = string("op_59_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_299_cast_fp16 = gather(axis = var_299_axis_0, batch_dims = var_299_batch_dims_0, indices = select_0, validate_indices = var_299_validate_indices_0, x = var_59_to_fp16)[name = string("op_299_cast_fp16")]; + tensor var_300 = const()[name = string("op_300"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_300, x = var_299_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_308_axes_0 = const()[name = string("op_308_axes_0"), val = tensor([-1])]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_308_cast_fp16 = layer_norm(axes = var_308_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_308_cast_fp16")]; + tensor var_311 = const()[name = string("op_311"), val = tensor([0, 2, 1])]; + tensor var_313_axes_0 = const()[name = string("op_313_axes_0"), val = tensor([2])]; + tensor var_312 = transpose(perm = var_311, x = var_308_cast_fp16)[name = string("transpose_35")]; + tensor var_313 = expand_dims(axes = var_313_axes_0, x = var_312)[name = string("op_313")]; + string var_320_pad_type_0 = const()[name = string("op_320_pad_type_0"), val = string("valid")]; + tensor var_320_strides_0 = const()[name = string("op_320_strides_0"), val = tensor([1, 1])]; + tensor var_320_pad_0 = const()[name = string("op_320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_320_dilations_0 = const()[name = string("op_320_dilations_0"), val = tensor([1, 1])]; + int32 var_320_groups_0 = const()[name = string("op_320_groups_0"), val = int32(1)]; + tensor var_320 = conv(dilations = var_320_dilations_0, groups = var_320_groups_0, pad = var_320_pad_0, pad_type = var_320_pad_type_0, strides = var_320_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_313)[name = string("op_320")]; + tensor var_321 = const()[name = string("op_321"), val = tensor([1, 24, 1, 128])]; + tensor var_322 = reshape(shape = var_321, x = var_320)[name = string("op_322")]; + string var_329_pad_type_0 = const()[name = string("op_329_pad_type_0"), val = string("valid")]; + tensor var_329_strides_0 = const()[name = string("op_329_strides_0"), val = tensor([1, 1])]; + tensor var_329_pad_0 = const()[name = string("op_329_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_329_dilations_0 = const()[name = string("op_329_dilations_0"), val = tensor([1, 1])]; + int32 var_329_groups_0 = const()[name = string("op_329_groups_0"), val = int32(1)]; + tensor var_329 = conv(dilations = var_329_dilations_0, groups = var_329_groups_0, pad = var_329_pad_0, pad_type = var_329_pad_type_0, strides = var_329_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_313)[name = string("op_329")]; + tensor var_330 = const()[name = string("op_330"), val = tensor([1, 8, 1, 128])]; + tensor var_331 = reshape(shape = var_330, x = var_329)[name = string("op_331")]; + string var_338_pad_type_0 = const()[name = string("op_338_pad_type_0"), val = string("valid")]; + tensor var_338_strides_0 = const()[name = string("op_338_strides_0"), val = tensor([1, 1])]; + tensor var_338_pad_0 = const()[name = string("op_338_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_338_dilations_0 = const()[name = string("op_338_dilations_0"), val = tensor([1, 1])]; + int32 var_338_groups_0 = const()[name = string("op_338_groups_0"), val = int32(1)]; + tensor var_338 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_313)[name = string("op_338")]; + tensor var_339 = const()[name = string("op_339"), val = tensor([1, 8, 1, 128])]; + tensor var_340 = reshape(shape = var_339, x = var_338)[name = string("op_340")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_322)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_322)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_354_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_354_cast_fp16")]; + tensor var_355_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_355_cast_fp16")]; + tensor var_356_cast_fp16 = sub(x = var_354_cast_fp16, y = var_355_cast_fp16)[name = string("op_356_cast_fp16")]; + tensor var_357_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_357_cast_fp16")]; + tensor var_358_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_358_cast_fp16")]; + tensor var_359_cast_fp16 = add(x = var_357_cast_fp16, y = var_358_cast_fp16)[name = string("op_359_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_60, interleave = rotated_1_interleave_0, values = (var_356_cast_fp16, var_359_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_331)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_331)[name = string("x2_3")]; + tensor var_375_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_375_cast_fp16")]; + tensor var_376_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_376_cast_fp16")]; + tensor var_377_cast_fp16 = sub(x = var_375_cast_fp16, y = var_376_cast_fp16)[name = string("op_377_cast_fp16")]; + tensor var_378_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_378_cast_fp16")]; + tensor var_379_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_379_cast_fp16")]; + tensor var_380_cast_fp16 = add(x = var_378_cast_fp16, y = var_379_cast_fp16)[name = string("op_380_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_60, interleave = rotated_3_interleave_0, values = (var_377_cast_fp16, var_380_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_384 = const()[name = string("op_384"), val = int32(1)]; + tensor var_385 = add(x = current_pos, y = var_384)[name = string("op_385")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([9])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([10])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_385, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([37])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([38])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_385, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_340, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_400_begin_0 = const()[name = string("op_400_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_400_end_0 = const()[name = string("op_400_end_0"), val = tensor([10, 8, 1024, 128])]; + tensor var_400_end_mask_0 = const()[name = string("op_400_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = coreml_update_state_19)[name = string("op_400_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_400_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_402_begin_0 = const()[name = string("op_402_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_402_end_0 = const()[name = string("op_402_end_0"), val = tensor([38, 8, 1024, 128])]; + tensor var_402_end_mask_0 = const()[name = string("op_402_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_402_cast_fp16 = slice_by_index(begin = var_402_begin_0, end = var_402_end_0, end_mask = var_402_end_mask_0, x = coreml_update_state_19)[name = string("op_402_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_402_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_411 = const()[name = string("op_411"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_411, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_415 = const()[name = string("op_415"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_415, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_418 = const()[name = string("op_418"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_418, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_422 = const()[name = string("op_422"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_422, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_425_transpose_x_1 = const()[name = string("op_425_transpose_x_1"), val = bool(false)]; + bool var_425_transpose_y_1 = const()[name = string("op_425_transpose_y_1"), val = bool(true)]; + tensor var_425_cast_fp16 = matmul(transpose_x = var_425_transpose_x_1, transpose_y = var_425_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_425_cast_fp16")]; + fp16 var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_425_cast_fp16, y = var_426_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_437_axes_0 = const()[name = string("op_437_axes_0"), val = tensor([-1])]; + bool var_437_keep_dims_0 = const()[name = string("op_437_keep_dims_0"), val = bool(true)]; + tensor var_437_cast_fp16 = reduce_sum(axes = var_437_axes_0, keep_dims = var_437_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_437_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_437_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_440_perm_0 = const()[name = string("op_440_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_442 = const()[name = string("op_442"), val = tensor([1, 1, 3072])]; + tensor var_440_cast_fp16 = transpose(perm = var_440_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_34")]; + tensor input_5_cast_fp16 = reshape(shape = var_442, x = var_440_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_453_axes_0 = const()[name = string("op_453_axes_0"), val = tensor([-1])]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_453_cast_fp16 = layer_norm(axes = var_453_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_453_cast_fp16")]; + tensor var_460 = const()[name = string("op_460"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_461 = transpose(perm = var_460, x = var_453_cast_fp16)[name = string("transpose_33")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_461)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_483_axes_0 = const()[name = string("op_483_axes_0"), val = tensor([2])]; + tensor var_483 = squeeze(axes = var_483_axes_0, x = hidden_states_7)[name = string("op_483")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([0, 2, 1])]; + tensor var_485 = transpose(perm = var_484, x = var_483)[name = string("transpose_32")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_485)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_493_axes_0 = const()[name = string("op_493_axes_0"), val = tensor([-1])]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_493_cast_fp16 = layer_norm(axes = var_493_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_493_cast_fp16")]; + tensor var_496 = const()[name = string("op_496"), val = tensor([0, 2, 1])]; + tensor var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor([2])]; + tensor var_497 = transpose(perm = var_496, x = var_493_cast_fp16)[name = string("transpose_31")]; + tensor var_498 = expand_dims(axes = var_498_axes_0, x = var_497)[name = string("op_498")]; + string var_505_pad_type_0 = const()[name = string("op_505_pad_type_0"), val = string("valid")]; + tensor var_505_strides_0 = const()[name = string("op_505_strides_0"), val = tensor([1, 1])]; + tensor var_505_pad_0 = const()[name = string("op_505_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_505_dilations_0 = const()[name = string("op_505_dilations_0"), val = tensor([1, 1])]; + int32 var_505_groups_0 = const()[name = string("op_505_groups_0"), val = int32(1)]; + tensor var_505 = conv(dilations = var_505_dilations_0, groups = var_505_groups_0, pad = var_505_pad_0, pad_type = var_505_pad_type_0, strides = var_505_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_498)[name = string("op_505")]; + tensor var_506 = const()[name = string("op_506"), val = tensor([1, 24, 1, 128])]; + tensor var_507 = reshape(shape = var_506, x = var_505)[name = string("op_507")]; + string var_514_pad_type_0 = const()[name = string("op_514_pad_type_0"), val = string("valid")]; + tensor var_514_strides_0 = const()[name = string("op_514_strides_0"), val = tensor([1, 1])]; + tensor var_514_pad_0 = const()[name = string("op_514_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_514_dilations_0 = const()[name = string("op_514_dilations_0"), val = tensor([1, 1])]; + int32 var_514_groups_0 = const()[name = string("op_514_groups_0"), val = int32(1)]; + tensor var_514 = conv(dilations = var_514_dilations_0, groups = var_514_groups_0, pad = var_514_pad_0, pad_type = var_514_pad_type_0, strides = var_514_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_498)[name = string("op_514")]; + tensor var_515 = const()[name = string("op_515"), val = tensor([1, 8, 1, 128])]; + tensor var_516 = reshape(shape = var_515, x = var_514)[name = string("op_516")]; + string var_523_pad_type_0 = const()[name = string("op_523_pad_type_0"), val = string("valid")]; + tensor var_523_strides_0 = const()[name = string("op_523_strides_0"), val = tensor([1, 1])]; + tensor var_523_pad_0 = const()[name = string("op_523_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_523_dilations_0 = const()[name = string("op_523_dilations_0"), val = tensor([1, 1])]; + int32 var_523_groups_0 = const()[name = string("op_523_groups_0"), val = int32(1)]; + tensor var_523 = conv(dilations = var_523_dilations_0, groups = var_523_groups_0, pad = var_523_pad_0, pad_type = var_523_pad_type_0, strides = var_523_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_498)[name = string("op_523")]; + tensor var_524 = const()[name = string("op_524"), val = tensor([1, 8, 1, 128])]; + tensor var_525 = reshape(shape = var_524, x = var_523)[name = string("op_525")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_507)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_507)[name = string("x2_5")]; + tensor var_539_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_540_cast_fp16")]; + tensor var_541_cast_fp16 = sub(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = string("op_541_cast_fp16")]; + tensor var_542_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_542_cast_fp16")]; + tensor var_543_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_543_cast_fp16")]; + tensor var_544_cast_fp16 = add(x = var_542_cast_fp16, y = var_543_cast_fp16)[name = string("op_544_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_60, interleave = rotated_5_interleave_0, values = (var_541_cast_fp16, var_544_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_516)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_516)[name = string("x2_7")]; + tensor var_560_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_560_cast_fp16")]; + tensor var_561_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_561_cast_fp16")]; + tensor var_562_cast_fp16 = sub(x = var_560_cast_fp16, y = var_561_cast_fp16)[name = string("op_562_cast_fp16")]; + tensor var_563_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = add(x = var_563_cast_fp16, y = var_564_cast_fp16)[name = string("op_565_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_60, interleave = rotated_7_interleave_0, values = (var_562_cast_fp16, var_565_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([10])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([11])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_385, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([38])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([39])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_385, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_525, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_585_begin_0 = const()[name = string("op_585_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_585_end_0 = const()[name = string("op_585_end_0"), val = tensor([11, 8, 1024, 128])]; + tensor var_585_end_mask_0 = const()[name = string("op_585_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_585_cast_fp16 = slice_by_index(begin = var_585_begin_0, end = var_585_end_0, end_mask = var_585_end_mask_0, x = coreml_update_state_21)[name = string("op_585_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_585_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_587_begin_0 = const()[name = string("op_587_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_587_end_0 = const()[name = string("op_587_end_0"), val = tensor([39, 8, 1024, 128])]; + tensor var_587_end_mask_0 = const()[name = string("op_587_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_587_cast_fp16 = slice_by_index(begin = var_587_begin_0, end = var_587_end_0, end_mask = var_587_end_mask_0, x = coreml_update_state_21)[name = string("op_587_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_587_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_596 = const()[name = string("op_596"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_596, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_600 = const()[name = string("op_600"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_600, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_603 = const()[name = string("op_603"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_603, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_607 = const()[name = string("op_607"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_607, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_610_transpose_x_1 = const()[name = string("op_610_transpose_x_1"), val = bool(false)]; + bool var_610_transpose_y_1 = const()[name = string("op_610_transpose_y_1"), val = bool(true)]; + tensor var_610_cast_fp16 = matmul(transpose_x = var_610_transpose_x_1, transpose_y = var_610_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_610_cast_fp16")]; + fp16 var_611_to_fp16 = const()[name = string("op_611_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_622_axes_0 = const()[name = string("op_622_axes_0"), val = tensor([-1])]; + bool var_622_keep_dims_0 = const()[name = string("op_622_keep_dims_0"), val = bool(true)]; + tensor var_622_cast_fp16 = reduce_sum(axes = var_622_axes_0, keep_dims = var_622_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_622_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_622_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_625_perm_0 = const()[name = string("op_625_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_627 = const()[name = string("op_627"), val = tensor([1, 1, 3072])]; + tensor var_625_cast_fp16 = transpose(perm = var_625_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_30")]; + tensor input_19_cast_fp16 = reshape(shape = var_627, x = var_625_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_638_axes_0 = const()[name = string("op_638_axes_0"), val = tensor([-1])]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_638_cast_fp16 = layer_norm(axes = var_638_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_638_cast_fp16")]; + tensor var_645 = const()[name = string("op_645"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_646 = transpose(perm = var_645, x = var_638_cast_fp16)[name = string("transpose_29")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_646)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_668_axes_0 = const()[name = string("op_668_axes_0"), val = tensor([2])]; + tensor var_668 = squeeze(axes = var_668_axes_0, x = hidden_states_15)[name = string("op_668")]; + tensor var_669 = const()[name = string("op_669"), val = tensor([0, 2, 1])]; + tensor var_670 = transpose(perm = var_669, x = var_668)[name = string("transpose_28")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_670)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_678_axes_0 = const()[name = string("op_678_axes_0"), val = tensor([-1])]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_678_cast_fp16 = layer_norm(axes = var_678_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_678_cast_fp16")]; + tensor var_681 = const()[name = string("op_681"), val = tensor([0, 2, 1])]; + tensor var_683_axes_0 = const()[name = string("op_683_axes_0"), val = tensor([2])]; + tensor var_682 = transpose(perm = var_681, x = var_678_cast_fp16)[name = string("transpose_27")]; + tensor var_683 = expand_dims(axes = var_683_axes_0, x = var_682)[name = string("op_683")]; + string var_690_pad_type_0 = const()[name = string("op_690_pad_type_0"), val = string("valid")]; + tensor var_690_strides_0 = const()[name = string("op_690_strides_0"), val = tensor([1, 1])]; + tensor var_690_pad_0 = const()[name = string("op_690_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_690_dilations_0 = const()[name = string("op_690_dilations_0"), val = tensor([1, 1])]; + int32 var_690_groups_0 = const()[name = string("op_690_groups_0"), val = int32(1)]; + tensor var_690 = conv(dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_683)[name = string("op_690")]; + tensor var_691 = const()[name = string("op_691"), val = tensor([1, 24, 1, 128])]; + tensor var_692 = reshape(shape = var_691, x = var_690)[name = string("op_692")]; + string var_699_pad_type_0 = const()[name = string("op_699_pad_type_0"), val = string("valid")]; + tensor var_699_strides_0 = const()[name = string("op_699_strides_0"), val = tensor([1, 1])]; + tensor var_699_pad_0 = const()[name = string("op_699_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_699_dilations_0 = const()[name = string("op_699_dilations_0"), val = tensor([1, 1])]; + int32 var_699_groups_0 = const()[name = string("op_699_groups_0"), val = int32(1)]; + tensor var_699 = conv(dilations = var_699_dilations_0, groups = var_699_groups_0, pad = var_699_pad_0, pad_type = var_699_pad_type_0, strides = var_699_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_683)[name = string("op_699")]; + tensor var_700 = const()[name = string("op_700"), val = tensor([1, 8, 1, 128])]; + tensor var_701 = reshape(shape = var_700, x = var_699)[name = string("op_701")]; + string var_708_pad_type_0 = const()[name = string("op_708_pad_type_0"), val = string("valid")]; + tensor var_708_strides_0 = const()[name = string("op_708_strides_0"), val = tensor([1, 1])]; + tensor var_708_pad_0 = const()[name = string("op_708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_708_dilations_0 = const()[name = string("op_708_dilations_0"), val = tensor([1, 1])]; + int32 var_708_groups_0 = const()[name = string("op_708_groups_0"), val = int32(1)]; + tensor var_708 = conv(dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_683)[name = string("op_708")]; + tensor var_709 = const()[name = string("op_709"), val = tensor([1, 8, 1, 128])]; + tensor var_710 = reshape(shape = var_709, x = var_708)[name = string("op_710")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_692)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_692)[name = string("x2_9")]; + tensor var_724_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_724_cast_fp16")]; + tensor var_725_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_725_cast_fp16")]; + tensor var_726_cast_fp16 = sub(x = var_724_cast_fp16, y = var_725_cast_fp16)[name = string("op_726_cast_fp16")]; + tensor var_727_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_727_cast_fp16")]; + tensor var_728_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_728_cast_fp16")]; + tensor var_729_cast_fp16 = add(x = var_727_cast_fp16, y = var_728_cast_fp16)[name = string("op_729_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_60, interleave = rotated_9_interleave_0, values = (var_726_cast_fp16, var_729_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_701)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_701)[name = string("x2_11")]; + tensor var_745_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_745_cast_fp16")]; + tensor var_746_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_746_cast_fp16")]; + tensor var_747_cast_fp16 = sub(x = var_745_cast_fp16, y = var_746_cast_fp16)[name = string("op_747_cast_fp16")]; + tensor var_748_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_748_cast_fp16")]; + tensor var_749_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_749_cast_fp16")]; + tensor var_750_cast_fp16 = add(x = var_748_cast_fp16, y = var_749_cast_fp16)[name = string("op_750_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_60, interleave = rotated_11_interleave_0, values = (var_747_cast_fp16, var_750_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([11])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([12])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_385, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([39])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([40])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_385, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_710, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_770_begin_0 = const()[name = string("op_770_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_770_end_0 = const()[name = string("op_770_end_0"), val = tensor([12, 8, 1024, 128])]; + tensor var_770_end_mask_0 = const()[name = string("op_770_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_770_cast_fp16 = slice_by_index(begin = var_770_begin_0, end = var_770_end_0, end_mask = var_770_end_mask_0, x = coreml_update_state_23)[name = string("op_770_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_770_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_772_begin_0 = const()[name = string("op_772_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_772_end_0 = const()[name = string("op_772_end_0"), val = tensor([40, 8, 1024, 128])]; + tensor var_772_end_mask_0 = const()[name = string("op_772_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_772_cast_fp16 = slice_by_index(begin = var_772_begin_0, end = var_772_end_0, end_mask = var_772_end_mask_0, x = coreml_update_state_23)[name = string("op_772_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_772_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_781 = const()[name = string("op_781"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_781, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_785 = const()[name = string("op_785"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_785, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_788 = const()[name = string("op_788"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_788, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_792 = const()[name = string("op_792"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_792, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_795_transpose_x_1 = const()[name = string("op_795_transpose_x_1"), val = bool(false)]; + bool var_795_transpose_y_1 = const()[name = string("op_795_transpose_y_1"), val = bool(true)]; + tensor var_795_cast_fp16 = matmul(transpose_x = var_795_transpose_x_1, transpose_y = var_795_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_795_cast_fp16")]; + fp16 var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_795_cast_fp16, y = var_796_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_807_axes_0 = const()[name = string("op_807_axes_0"), val = tensor([-1])]; + bool var_807_keep_dims_0 = const()[name = string("op_807_keep_dims_0"), val = bool(true)]; + tensor var_807_cast_fp16 = reduce_sum(axes = var_807_axes_0, keep_dims = var_807_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_807_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_807_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_810_perm_0 = const()[name = string("op_810_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_812 = const()[name = string("op_812"), val = tensor([1, 1, 3072])]; + tensor var_810_cast_fp16 = transpose(perm = var_810_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_26")]; + tensor input_33_cast_fp16 = reshape(shape = var_812, x = var_810_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_823_axes_0 = const()[name = string("op_823_axes_0"), val = tensor([-1])]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_823_cast_fp16 = layer_norm(axes = var_823_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_823_cast_fp16")]; + tensor var_830 = const()[name = string("op_830"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_831 = transpose(perm = var_830, x = var_823_cast_fp16)[name = string("transpose_25")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_831)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_853_axes_0 = const()[name = string("op_853_axes_0"), val = tensor([2])]; + tensor var_853 = squeeze(axes = var_853_axes_0, x = hidden_states_23)[name = string("op_853")]; + tensor var_854 = const()[name = string("op_854"), val = tensor([0, 2, 1])]; + tensor var_855 = transpose(perm = var_854, x = var_853)[name = string("transpose_24")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_855)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_863_axes_0 = const()[name = string("op_863_axes_0"), val = tensor([-1])]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_863_cast_fp16 = layer_norm(axes = var_863_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_863_cast_fp16")]; + tensor var_866 = const()[name = string("op_866"), val = tensor([0, 2, 1])]; + tensor var_868_axes_0 = const()[name = string("op_868_axes_0"), val = tensor([2])]; + tensor var_867 = transpose(perm = var_866, x = var_863_cast_fp16)[name = string("transpose_23")]; + tensor var_868 = expand_dims(axes = var_868_axes_0, x = var_867)[name = string("op_868")]; + string var_875_pad_type_0 = const()[name = string("op_875_pad_type_0"), val = string("valid")]; + tensor var_875_strides_0 = const()[name = string("op_875_strides_0"), val = tensor([1, 1])]; + tensor var_875_pad_0 = const()[name = string("op_875_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_875_dilations_0 = const()[name = string("op_875_dilations_0"), val = tensor([1, 1])]; + int32 var_875_groups_0 = const()[name = string("op_875_groups_0"), val = int32(1)]; + tensor var_875 = conv(dilations = var_875_dilations_0, groups = var_875_groups_0, pad = var_875_pad_0, pad_type = var_875_pad_type_0, strides = var_875_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_868)[name = string("op_875")]; + tensor var_876 = const()[name = string("op_876"), val = tensor([1, 24, 1, 128])]; + tensor var_877 = reshape(shape = var_876, x = var_875)[name = string("op_877")]; + string var_884_pad_type_0 = const()[name = string("op_884_pad_type_0"), val = string("valid")]; + tensor var_884_strides_0 = const()[name = string("op_884_strides_0"), val = tensor([1, 1])]; + tensor var_884_pad_0 = const()[name = string("op_884_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_884_dilations_0 = const()[name = string("op_884_dilations_0"), val = tensor([1, 1])]; + int32 var_884_groups_0 = const()[name = string("op_884_groups_0"), val = int32(1)]; + tensor var_884 = conv(dilations = var_884_dilations_0, groups = var_884_groups_0, pad = var_884_pad_0, pad_type = var_884_pad_type_0, strides = var_884_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_868)[name = string("op_884")]; + tensor var_885 = const()[name = string("op_885"), val = tensor([1, 8, 1, 128])]; + tensor var_886 = reshape(shape = var_885, x = var_884)[name = string("op_886")]; + string var_893_pad_type_0 = const()[name = string("op_893_pad_type_0"), val = string("valid")]; + tensor var_893_strides_0 = const()[name = string("op_893_strides_0"), val = tensor([1, 1])]; + tensor var_893_pad_0 = const()[name = string("op_893_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_893_dilations_0 = const()[name = string("op_893_dilations_0"), val = tensor([1, 1])]; + int32 var_893_groups_0 = const()[name = string("op_893_groups_0"), val = int32(1)]; + tensor var_893 = conv(dilations = var_893_dilations_0, groups = var_893_groups_0, pad = var_893_pad_0, pad_type = var_893_pad_type_0, strides = var_893_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_868)[name = string("op_893")]; + tensor var_894 = const()[name = string("op_894"), val = tensor([1, 8, 1, 128])]; + tensor var_895 = reshape(shape = var_894, x = var_893)[name = string("op_895")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_877)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_877)[name = string("x2_13")]; + tensor var_909_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_909_cast_fp16")]; + tensor var_910_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_910_cast_fp16")]; + tensor var_911_cast_fp16 = sub(x = var_909_cast_fp16, y = var_910_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor var_912_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_912_cast_fp16")]; + tensor var_913_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_913_cast_fp16")]; + tensor var_914_cast_fp16 = add(x = var_912_cast_fp16, y = var_913_cast_fp16)[name = string("op_914_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_60, interleave = rotated_13_interleave_0, values = (var_911_cast_fp16, var_914_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_886)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_886)[name = string("x2_15")]; + tensor var_930_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_930_cast_fp16")]; + tensor var_931_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_931_cast_fp16")]; + tensor var_932_cast_fp16 = sub(x = var_930_cast_fp16, y = var_931_cast_fp16)[name = string("op_932_cast_fp16")]; + tensor var_933_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_933_cast_fp16")]; + tensor var_934_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_934_cast_fp16")]; + tensor var_935_cast_fp16 = add(x = var_933_cast_fp16, y = var_934_cast_fp16)[name = string("op_935_cast_fp16")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15_cast_fp16 = concat(axis = var_60, interleave = rotated_15_interleave_0, values = (var_932_cast_fp16, var_935_cast_fp16))[name = string("rotated_15_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([12])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([13])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_385, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([40])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([41])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_385, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_895, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_955_begin_0 = const()[name = string("op_955_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_955_end_0 = const()[name = string("op_955_end_0"), val = tensor([13, 8, 1024, 128])]; + tensor var_955_end_mask_0 = const()[name = string("op_955_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_955_cast_fp16 = slice_by_index(begin = var_955_begin_0, end = var_955_end_0, end_mask = var_955_end_mask_0, x = coreml_update_state_25)[name = string("op_955_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_955_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_957_begin_0 = const()[name = string("op_957_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_957_end_0 = const()[name = string("op_957_end_0"), val = tensor([41, 8, 1024, 128])]; + tensor var_957_end_mask_0 = const()[name = string("op_957_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_957_cast_fp16 = slice_by_index(begin = var_957_begin_0, end = var_957_end_0, end_mask = var_957_end_mask_0, x = coreml_update_state_25)[name = string("op_957_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_957_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_966 = const()[name = string("op_966"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_966, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_970 = const()[name = string("op_970"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_970, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_973 = const()[name = string("op_973"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_973, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_977 = const()[name = string("op_977"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_977, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_980_transpose_x_1 = const()[name = string("op_980_transpose_x_1"), val = bool(false)]; + bool var_980_transpose_y_1 = const()[name = string("op_980_transpose_y_1"), val = bool(true)]; + tensor var_980_cast_fp16 = matmul(transpose_x = var_980_transpose_x_1, transpose_y = var_980_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_980_cast_fp16")]; + fp16 var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_980_cast_fp16, y = var_981_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_992_axes_0 = const()[name = string("op_992_axes_0"), val = tensor([-1])]; + bool var_992_keep_dims_0 = const()[name = string("op_992_keep_dims_0"), val = bool(true)]; + tensor var_992_cast_fp16 = reduce_sum(axes = var_992_axes_0, keep_dims = var_992_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_992_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_992_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_995_perm_0 = const()[name = string("op_995_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_997 = const()[name = string("op_997"), val = tensor([1, 1, 3072])]; + tensor var_995_cast_fp16 = transpose(perm = var_995_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_22")]; + tensor input_47_cast_fp16 = reshape(shape = var_997, x = var_995_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1008_axes_0 = const()[name = string("op_1008_axes_0"), val = tensor([-1])]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1008_cast_fp16 = layer_norm(axes = var_1008_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1008_cast_fp16")]; + tensor var_1015 = const()[name = string("op_1015"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1016 = transpose(perm = var_1015, x = var_1008_cast_fp16)[name = string("transpose_21")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1016)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1038_axes_0 = const()[name = string("op_1038_axes_0"), val = tensor([2])]; + tensor var_1038 = squeeze(axes = var_1038_axes_0, x = hidden_states_31)[name = string("op_1038")]; + tensor var_1039 = const()[name = string("op_1039"), val = tensor([0, 2, 1])]; + tensor var_1040 = transpose(perm = var_1039, x = var_1038)[name = string("transpose_20")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1040)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1048_axes_0 = const()[name = string("op_1048_axes_0"), val = tensor([-1])]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1048_cast_fp16 = layer_norm(axes = var_1048_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1051 = const()[name = string("op_1051"), val = tensor([0, 2, 1])]; + tensor var_1053_axes_0 = const()[name = string("op_1053_axes_0"), val = tensor([2])]; + tensor var_1052 = transpose(perm = var_1051, x = var_1048_cast_fp16)[name = string("transpose_19")]; + tensor var_1053 = expand_dims(axes = var_1053_axes_0, x = var_1052)[name = string("op_1053")]; + string var_1060_pad_type_0 = const()[name = string("op_1060_pad_type_0"), val = string("valid")]; + tensor var_1060_strides_0 = const()[name = string("op_1060_strides_0"), val = tensor([1, 1])]; + tensor var_1060_pad_0 = const()[name = string("op_1060_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1060_dilations_0 = const()[name = string("op_1060_dilations_0"), val = tensor([1, 1])]; + int32 var_1060_groups_0 = const()[name = string("op_1060_groups_0"), val = int32(1)]; + tensor var_1060 = conv(dilations = var_1060_dilations_0, groups = var_1060_groups_0, pad = var_1060_pad_0, pad_type = var_1060_pad_type_0, strides = var_1060_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1053)[name = string("op_1060")]; + tensor var_1061 = const()[name = string("op_1061"), val = tensor([1, 24, 1, 128])]; + tensor var_1062 = reshape(shape = var_1061, x = var_1060)[name = string("op_1062")]; + string var_1069_pad_type_0 = const()[name = string("op_1069_pad_type_0"), val = string("valid")]; + tensor var_1069_strides_0 = const()[name = string("op_1069_strides_0"), val = tensor([1, 1])]; + tensor var_1069_pad_0 = const()[name = string("op_1069_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1069_dilations_0 = const()[name = string("op_1069_dilations_0"), val = tensor([1, 1])]; + int32 var_1069_groups_0 = const()[name = string("op_1069_groups_0"), val = int32(1)]; + tensor var_1069 = conv(dilations = var_1069_dilations_0, groups = var_1069_groups_0, pad = var_1069_pad_0, pad_type = var_1069_pad_type_0, strides = var_1069_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1053)[name = string("op_1069")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([1, 8, 1, 128])]; + tensor var_1071 = reshape(shape = var_1070, x = var_1069)[name = string("op_1071")]; + string var_1078_pad_type_0 = const()[name = string("op_1078_pad_type_0"), val = string("valid")]; + tensor var_1078_strides_0 = const()[name = string("op_1078_strides_0"), val = tensor([1, 1])]; + tensor var_1078_pad_0 = const()[name = string("op_1078_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1078_dilations_0 = const()[name = string("op_1078_dilations_0"), val = tensor([1, 1])]; + int32 var_1078_groups_0 = const()[name = string("op_1078_groups_0"), val = int32(1)]; + tensor var_1078 = conv(dilations = var_1078_dilations_0, groups = var_1078_groups_0, pad = var_1078_pad_0, pad_type = var_1078_pad_type_0, strides = var_1078_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1053)[name = string("op_1078")]; + tensor var_1079 = const()[name = string("op_1079"), val = tensor([1, 8, 1, 128])]; + tensor var_1080 = reshape(shape = var_1079, x = var_1078)[name = string("op_1080")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1062)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1062)[name = string("x2_17")]; + tensor var_1094_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor var_1095_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1095_cast_fp16")]; + tensor var_1096_cast_fp16 = sub(x = var_1094_cast_fp16, y = var_1095_cast_fp16)[name = string("op_1096_cast_fp16")]; + tensor var_1097_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1097_cast_fp16")]; + tensor var_1098_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1098_cast_fp16")]; + tensor var_1099_cast_fp16 = add(x = var_1097_cast_fp16, y = var_1098_cast_fp16)[name = string("op_1099_cast_fp16")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17_cast_fp16 = concat(axis = var_60, interleave = rotated_17_interleave_0, values = (var_1096_cast_fp16, var_1099_cast_fp16))[name = string("rotated_17_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1071)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1071)[name = string("x2_19")]; + tensor var_1115_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1115_cast_fp16")]; + tensor var_1116_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1116_cast_fp16")]; + tensor var_1117_cast_fp16 = sub(x = var_1115_cast_fp16, y = var_1116_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor var_1118_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1118_cast_fp16")]; + tensor var_1119_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1119_cast_fp16")]; + tensor var_1120_cast_fp16 = add(x = var_1118_cast_fp16, y = var_1119_cast_fp16)[name = string("op_1120_cast_fp16")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19_cast_fp16 = concat(axis = var_60, interleave = rotated_19_interleave_0, values = (var_1117_cast_fp16, var_1120_cast_fp16))[name = string("rotated_19_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([13])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([14])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_385, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([41])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([42])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_385, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1080, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_1140_begin_0 = const()[name = string("op_1140_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_1140_end_0 = const()[name = string("op_1140_end_0"), val = tensor([14, 8, 1024, 128])]; + tensor var_1140_end_mask_0 = const()[name = string("op_1140_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1140_cast_fp16 = slice_by_index(begin = var_1140_begin_0, end = var_1140_end_0, end_mask = var_1140_end_mask_0, x = coreml_update_state_27)[name = string("op_1140_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1140_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = string("op_1142_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_1142_end_0 = const()[name = string("op_1142_end_0"), val = tensor([42, 8, 1024, 128])]; + tensor var_1142_end_mask_0 = const()[name = string("op_1142_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = coreml_update_state_27)[name = string("op_1142_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1142_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1151 = const()[name = string("op_1151"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1151, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1155 = const()[name = string("op_1155"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_1155, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1158 = const()[name = string("op_1158"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1158, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1162 = const()[name = string("op_1162"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_19_cast_fp16 = reshape(shape = var_1162, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; + bool var_1165_transpose_x_1 = const()[name = string("op_1165_transpose_x_1"), val = bool(false)]; + bool var_1165_transpose_y_1 = const()[name = string("op_1165_transpose_y_1"), val = bool(true)]; + tensor var_1165_cast_fp16 = matmul(transpose_x = var_1165_transpose_x_1, transpose_y = var_1165_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1165_cast_fp16")]; + fp16 var_1166_to_fp16 = const()[name = string("op_1166_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_1165_cast_fp16, y = var_1166_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1177_axes_0 = const()[name = string("op_1177_axes_0"), val = tensor([-1])]; + bool var_1177_keep_dims_0 = const()[name = string("op_1177_keep_dims_0"), val = bool(true)]; + tensor var_1177_cast_fp16 = reduce_sum(axes = var_1177_axes_0, keep_dims = var_1177_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1177_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1177_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1180_perm_0 = const()[name = string("op_1180_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1182 = const()[name = string("op_1182"), val = tensor([1, 1, 3072])]; + tensor var_1180_cast_fp16 = transpose(perm = var_1180_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_18")]; + tensor input_61_cast_fp16 = reshape(shape = var_1182, x = var_1180_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1193_axes_0 = const()[name = string("op_1193_axes_0"), val = tensor([-1])]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1193_cast_fp16 = layer_norm(axes = var_1193_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1193_cast_fp16")]; + tensor var_1200 = const()[name = string("op_1200"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1201 = transpose(perm = var_1200, x = var_1193_cast_fp16)[name = string("transpose_17")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1201)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1223_axes_0 = const()[name = string("op_1223_axes_0"), val = tensor([2])]; + tensor var_1223 = squeeze(axes = var_1223_axes_0, x = hidden_states_39)[name = string("op_1223")]; + tensor var_1224 = const()[name = string("op_1224"), val = tensor([0, 2, 1])]; + tensor var_1225 = transpose(perm = var_1224, x = var_1223)[name = string("transpose_16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1225)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1233_axes_0 = const()[name = string("op_1233_axes_0"), val = tensor([-1])]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1233_cast_fp16 = layer_norm(axes = var_1233_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1233_cast_fp16")]; + tensor var_1236 = const()[name = string("op_1236"), val = tensor([0, 2, 1])]; + tensor var_1238_axes_0 = const()[name = string("op_1238_axes_0"), val = tensor([2])]; + tensor var_1237 = transpose(perm = var_1236, x = var_1233_cast_fp16)[name = string("transpose_15")]; + tensor var_1238 = expand_dims(axes = var_1238_axes_0, x = var_1237)[name = string("op_1238")]; + string var_1245_pad_type_0 = const()[name = string("op_1245_pad_type_0"), val = string("valid")]; + tensor var_1245_strides_0 = const()[name = string("op_1245_strides_0"), val = tensor([1, 1])]; + tensor var_1245_pad_0 = const()[name = string("op_1245_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1245_dilations_0 = const()[name = string("op_1245_dilations_0"), val = tensor([1, 1])]; + int32 var_1245_groups_0 = const()[name = string("op_1245_groups_0"), val = int32(1)]; + tensor var_1245 = conv(dilations = var_1245_dilations_0, groups = var_1245_groups_0, pad = var_1245_pad_0, pad_type = var_1245_pad_type_0, strides = var_1245_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_1238)[name = string("op_1245")]; + tensor var_1246 = const()[name = string("op_1246"), val = tensor([1, 24, 1, 128])]; + tensor var_1247 = reshape(shape = var_1246, x = var_1245)[name = string("op_1247")]; + string var_1254_pad_type_0 = const()[name = string("op_1254_pad_type_0"), val = string("valid")]; + tensor var_1254_strides_0 = const()[name = string("op_1254_strides_0"), val = tensor([1, 1])]; + tensor var_1254_pad_0 = const()[name = string("op_1254_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1254_dilations_0 = const()[name = string("op_1254_dilations_0"), val = tensor([1, 1])]; + int32 var_1254_groups_0 = const()[name = string("op_1254_groups_0"), val = int32(1)]; + tensor var_1254 = conv(dilations = var_1254_dilations_0, groups = var_1254_groups_0, pad = var_1254_pad_0, pad_type = var_1254_pad_type_0, strides = var_1254_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_1238)[name = string("op_1254")]; + tensor var_1255 = const()[name = string("op_1255"), val = tensor([1, 8, 1, 128])]; + tensor var_1256 = reshape(shape = var_1255, x = var_1254)[name = string("op_1256")]; + string var_1263_pad_type_0 = const()[name = string("op_1263_pad_type_0"), val = string("valid")]; + tensor var_1263_strides_0 = const()[name = string("op_1263_strides_0"), val = tensor([1, 1])]; + tensor var_1263_pad_0 = const()[name = string("op_1263_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1263_dilations_0 = const()[name = string("op_1263_dilations_0"), val = tensor([1, 1])]; + int32 var_1263_groups_0 = const()[name = string("op_1263_groups_0"), val = int32(1)]; + tensor var_1263 = conv(dilations = var_1263_dilations_0, groups = var_1263_groups_0, pad = var_1263_pad_0, pad_type = var_1263_pad_type_0, strides = var_1263_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_1238)[name = string("op_1263")]; + tensor var_1264 = const()[name = string("op_1264"), val = tensor([1, 8, 1, 128])]; + tensor var_1265 = reshape(shape = var_1264, x = var_1263)[name = string("op_1265")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1247)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1247)[name = string("x2_21")]; + tensor var_1279_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1279_cast_fp16")]; + tensor var_1280_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1280_cast_fp16")]; + tensor var_1281_cast_fp16 = sub(x = var_1279_cast_fp16, y = var_1280_cast_fp16)[name = string("op_1281_cast_fp16")]; + tensor var_1282_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1282_cast_fp16")]; + tensor var_1283_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1283_cast_fp16")]; + tensor var_1284_cast_fp16 = add(x = var_1282_cast_fp16, y = var_1283_cast_fp16)[name = string("op_1284_cast_fp16")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21_cast_fp16 = concat(axis = var_60, interleave = rotated_21_interleave_0, values = (var_1281_cast_fp16, var_1284_cast_fp16))[name = string("rotated_21_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1256)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1256)[name = string("x2_23")]; + tensor var_1300_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1300_cast_fp16")]; + tensor var_1301_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1301_cast_fp16")]; + tensor var_1302_cast_fp16 = sub(x = var_1300_cast_fp16, y = var_1301_cast_fp16)[name = string("op_1302_cast_fp16")]; + tensor var_1303_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1303_cast_fp16")]; + tensor var_1304_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1304_cast_fp16")]; + tensor var_1305_cast_fp16 = add(x = var_1303_cast_fp16, y = var_1304_cast_fp16)[name = string("op_1305_cast_fp16")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23_cast_fp16 = concat(axis = var_60, interleave = rotated_23_interleave_0, values = (var_1302_cast_fp16, var_1305_cast_fp16))[name = string("rotated_23_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([14])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([15])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_385, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([42])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([43])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_385, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1265, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_1325_begin_0 = const()[name = string("op_1325_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_1325_end_0 = const()[name = string("op_1325_end_0"), val = tensor([15, 8, 1024, 128])]; + tensor var_1325_end_mask_0 = const()[name = string("op_1325_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1325_cast_fp16 = slice_by_index(begin = var_1325_begin_0, end = var_1325_end_0, end_mask = var_1325_end_mask_0, x = coreml_update_state_29)[name = string("op_1325_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1325_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1327_begin_0 = const()[name = string("op_1327_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_1327_end_0 = const()[name = string("op_1327_end_0"), val = tensor([43, 8, 1024, 128])]; + tensor var_1327_end_mask_0 = const()[name = string("op_1327_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1327_cast_fp16 = slice_by_index(begin = var_1327_begin_0, end = var_1327_end_0, end_mask = var_1327_end_mask_0, x = coreml_update_state_29)[name = string("op_1327_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1327_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1336 = const()[name = string("op_1336"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1336, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1340 = const()[name = string("op_1340"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_1340, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1343 = const()[name = string("op_1343"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1343, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1347 = const()[name = string("op_1347"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_23_cast_fp16 = reshape(shape = var_1347, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; + bool var_1350_transpose_x_1 = const()[name = string("op_1350_transpose_x_1"), val = bool(false)]; + bool var_1350_transpose_y_1 = const()[name = string("op_1350_transpose_y_1"), val = bool(true)]; + tensor var_1350_cast_fp16 = matmul(transpose_x = var_1350_transpose_x_1, transpose_y = var_1350_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1350_cast_fp16")]; + fp16 var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_1350_cast_fp16, y = var_1351_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1362_axes_0 = const()[name = string("op_1362_axes_0"), val = tensor([-1])]; + bool var_1362_keep_dims_0 = const()[name = string("op_1362_keep_dims_0"), val = bool(true)]; + tensor var_1362_cast_fp16 = reduce_sum(axes = var_1362_axes_0, keep_dims = var_1362_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1362_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1362_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_1365_perm_0 = const()[name = string("op_1365_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1367 = const()[name = string("op_1367"), val = tensor([1, 1, 3072])]; + tensor var_1365_cast_fp16 = transpose(perm = var_1365_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_14")]; + tensor input_75_cast_fp16 = reshape(shape = var_1367, x = var_1365_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1378_axes_0 = const()[name = string("op_1378_axes_0"), val = tensor([-1])]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1378_cast_fp16 = layer_norm(axes = var_1378_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1378_cast_fp16")]; + tensor var_1385 = const()[name = string("op_1385"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1386 = transpose(perm = var_1385, x = var_1378_cast_fp16)[name = string("transpose_13")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1386)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1408_axes_0 = const()[name = string("op_1408_axes_0"), val = tensor([2])]; + tensor var_1408 = squeeze(axes = var_1408_axes_0, x = hidden_states_47)[name = string("op_1408")]; + tensor var_1409 = const()[name = string("op_1409"), val = tensor([0, 2, 1])]; + tensor var_1410 = transpose(perm = var_1409, x = var_1408)[name = string("transpose_12")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1410)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1418_axes_0 = const()[name = string("op_1418_axes_0"), val = tensor([-1])]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1418_cast_fp16 = layer_norm(axes = var_1418_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1418_cast_fp16")]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([0, 2, 1])]; + tensor var_1423_axes_0 = const()[name = string("op_1423_axes_0"), val = tensor([2])]; + tensor var_1422 = transpose(perm = var_1421, x = var_1418_cast_fp16)[name = string("transpose_11")]; + tensor var_1423 = expand_dims(axes = var_1423_axes_0, x = var_1422)[name = string("op_1423")]; + string var_1430_pad_type_0 = const()[name = string("op_1430_pad_type_0"), val = string("valid")]; + tensor var_1430_strides_0 = const()[name = string("op_1430_strides_0"), val = tensor([1, 1])]; + tensor var_1430_pad_0 = const()[name = string("op_1430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1430_dilations_0 = const()[name = string("op_1430_dilations_0"), val = tensor([1, 1])]; + int32 var_1430_groups_0 = const()[name = string("op_1430_groups_0"), val = int32(1)]; + tensor var_1430 = conv(dilations = var_1430_dilations_0, groups = var_1430_groups_0, pad = var_1430_pad_0, pad_type = var_1430_pad_type_0, strides = var_1430_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_1423)[name = string("op_1430")]; + tensor var_1431 = const()[name = string("op_1431"), val = tensor([1, 24, 1, 128])]; + tensor var_1432 = reshape(shape = var_1431, x = var_1430)[name = string("op_1432")]; + string var_1439_pad_type_0 = const()[name = string("op_1439_pad_type_0"), val = string("valid")]; + tensor var_1439_strides_0 = const()[name = string("op_1439_strides_0"), val = tensor([1, 1])]; + tensor var_1439_pad_0 = const()[name = string("op_1439_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1439_dilations_0 = const()[name = string("op_1439_dilations_0"), val = tensor([1, 1])]; + int32 var_1439_groups_0 = const()[name = string("op_1439_groups_0"), val = int32(1)]; + tensor var_1439 = conv(dilations = var_1439_dilations_0, groups = var_1439_groups_0, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1439_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_1423)[name = string("op_1439")]; + tensor var_1440 = const()[name = string("op_1440"), val = tensor([1, 8, 1, 128])]; + tensor var_1441 = reshape(shape = var_1440, x = var_1439)[name = string("op_1441")]; + string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")]; + tensor var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor([1, 1])]; + tensor var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor([1, 1])]; + int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)]; + tensor var_1448 = conv(dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_1423)[name = string("op_1448")]; + tensor var_1449 = const()[name = string("op_1449"), val = tensor([1, 8, 1, 128])]; + tensor var_1450 = reshape(shape = var_1449, x = var_1448)[name = string("op_1450")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1432)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1432)[name = string("x2_25")]; + tensor var_1464_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1464_cast_fp16")]; + tensor var_1465_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1465_cast_fp16")]; + tensor var_1466_cast_fp16 = sub(x = var_1464_cast_fp16, y = var_1465_cast_fp16)[name = string("op_1466_cast_fp16")]; + tensor var_1467_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1467_cast_fp16")]; + tensor var_1468_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1468_cast_fp16")]; + tensor var_1469_cast_fp16 = add(x = var_1467_cast_fp16, y = var_1468_cast_fp16)[name = string("op_1469_cast_fp16")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25_cast_fp16 = concat(axis = var_60, interleave = rotated_25_interleave_0, values = (var_1466_cast_fp16, var_1469_cast_fp16))[name = string("rotated_25_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1441)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1441)[name = string("x2_27")]; + tensor var_1485_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1485_cast_fp16")]; + tensor var_1486_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1486_cast_fp16")]; + tensor var_1487_cast_fp16 = sub(x = var_1485_cast_fp16, y = var_1486_cast_fp16)[name = string("op_1487_cast_fp16")]; + tensor var_1488_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1488_cast_fp16")]; + tensor var_1489_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1489_cast_fp16")]; + tensor var_1490_cast_fp16 = add(x = var_1488_cast_fp16, y = var_1489_cast_fp16)[name = string("op_1490_cast_fp16")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27_cast_fp16 = concat(axis = var_60, interleave = rotated_27_interleave_0, values = (var_1487_cast_fp16, var_1490_cast_fp16))[name = string("rotated_27_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([15])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([16])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_385, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([43])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([44])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_385, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1450, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_1510_begin_0 = const()[name = string("op_1510_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_1510_end_0 = const()[name = string("op_1510_end_0"), val = tensor([16, 8, 1024, 128])]; + tensor var_1510_end_mask_0 = const()[name = string("op_1510_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1510_cast_fp16 = slice_by_index(begin = var_1510_begin_0, end = var_1510_end_0, end_mask = var_1510_end_mask_0, x = coreml_update_state_31)[name = string("op_1510_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1510_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1512_begin_0 = const()[name = string("op_1512_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_1512_end_0 = const()[name = string("op_1512_end_0"), val = tensor([44, 8, 1024, 128])]; + tensor var_1512_end_mask_0 = const()[name = string("op_1512_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1512_cast_fp16 = slice_by_index(begin = var_1512_begin_0, end = var_1512_end_0, end_mask = var_1512_end_mask_0, x = coreml_update_state_31)[name = string("op_1512_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1512_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1521 = const()[name = string("op_1521"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1521, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_1525, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1528 = const()[name = string("op_1528"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1528, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_1532 = const()[name = string("op_1532"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_27_cast_fp16 = reshape(shape = var_1532, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; + bool var_1535_transpose_x_1 = const()[name = string("op_1535_transpose_x_1"), val = bool(false)]; + bool var_1535_transpose_y_1 = const()[name = string("op_1535_transpose_y_1"), val = bool(true)]; + tensor var_1535_cast_fp16 = matmul(transpose_x = var_1535_transpose_x_1, transpose_y = var_1535_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1535_cast_fp16")]; + fp16 var_1536_to_fp16 = const()[name = string("op_1536_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_1535_cast_fp16, y = var_1536_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1547_axes_0 = const()[name = string("op_1547_axes_0"), val = tensor([-1])]; + bool var_1547_keep_dims_0 = const()[name = string("op_1547_keep_dims_0"), val = bool(true)]; + tensor var_1547_cast_fp16 = reduce_sum(axes = var_1547_axes_0, keep_dims = var_1547_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1547_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1547_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1550_perm_0 = const()[name = string("op_1550_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1552 = const()[name = string("op_1552"), val = tensor([1, 1, 3072])]; + tensor var_1550_cast_fp16 = transpose(perm = var_1550_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_10")]; + tensor input_89_cast_fp16 = reshape(shape = var_1552, x = var_1550_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1563_axes_0 = const()[name = string("op_1563_axes_0"), val = tensor([-1])]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1563_cast_fp16 = layer_norm(axes = var_1563_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1563_cast_fp16")]; + tensor var_1570 = const()[name = string("op_1570"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1571 = transpose(perm = var_1570, x = var_1563_cast_fp16)[name = string("transpose_9")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1571)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1593_axes_0 = const()[name = string("op_1593_axes_0"), val = tensor([2])]; + tensor var_1593 = squeeze(axes = var_1593_axes_0, x = hidden_states_55)[name = string("op_1593")]; + tensor var_1594 = const()[name = string("op_1594"), val = tensor([0, 2, 1])]; + tensor var_1595 = transpose(perm = var_1594, x = var_1593)[name = string("transpose_8")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1595)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1603_axes_0 = const()[name = string("op_1603_axes_0"), val = tensor([-1])]; + tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1603_cast_fp16 = layer_norm(axes = var_1603_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1603_cast_fp16")]; + tensor var_1606 = const()[name = string("op_1606"), val = tensor([0, 2, 1])]; + tensor var_1608_axes_0 = const()[name = string("op_1608_axes_0"), val = tensor([2])]; + tensor var_1607 = transpose(perm = var_1606, x = var_1603_cast_fp16)[name = string("transpose_7")]; + tensor var_1608 = expand_dims(axes = var_1608_axes_0, x = var_1607)[name = string("op_1608")]; + string var_1615_pad_type_0 = const()[name = string("op_1615_pad_type_0"), val = string("valid")]; + tensor var_1615_strides_0 = const()[name = string("op_1615_strides_0"), val = tensor([1, 1])]; + tensor var_1615_pad_0 = const()[name = string("op_1615_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1615_dilations_0 = const()[name = string("op_1615_dilations_0"), val = tensor([1, 1])]; + int32 var_1615_groups_0 = const()[name = string("op_1615_groups_0"), val = int32(1)]; + tensor var_1615 = conv(dilations = var_1615_dilations_0, groups = var_1615_groups_0, pad = var_1615_pad_0, pad_type = var_1615_pad_type_0, strides = var_1615_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_1608)[name = string("op_1615")]; + tensor var_1616 = const()[name = string("op_1616"), val = tensor([1, 24, 1, 128])]; + tensor var_1617 = reshape(shape = var_1616, x = var_1615)[name = string("op_1617")]; + string var_1624_pad_type_0 = const()[name = string("op_1624_pad_type_0"), val = string("valid")]; + tensor var_1624_strides_0 = const()[name = string("op_1624_strides_0"), val = tensor([1, 1])]; + tensor var_1624_pad_0 = const()[name = string("op_1624_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1624_dilations_0 = const()[name = string("op_1624_dilations_0"), val = tensor([1, 1])]; + int32 var_1624_groups_0 = const()[name = string("op_1624_groups_0"), val = int32(1)]; + tensor var_1624 = conv(dilations = var_1624_dilations_0, groups = var_1624_groups_0, pad = var_1624_pad_0, pad_type = var_1624_pad_type_0, strides = var_1624_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_1608)[name = string("op_1624")]; + tensor var_1625 = const()[name = string("op_1625"), val = tensor([1, 8, 1, 128])]; + tensor var_1626 = reshape(shape = var_1625, x = var_1624)[name = string("op_1626")]; + string var_1633_pad_type_0 = const()[name = string("op_1633_pad_type_0"), val = string("valid")]; + tensor var_1633_strides_0 = const()[name = string("op_1633_strides_0"), val = tensor([1, 1])]; + tensor var_1633_pad_0 = const()[name = string("op_1633_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1633_dilations_0 = const()[name = string("op_1633_dilations_0"), val = tensor([1, 1])]; + int32 var_1633_groups_0 = const()[name = string("op_1633_groups_0"), val = int32(1)]; + tensor var_1633 = conv(dilations = var_1633_dilations_0, groups = var_1633_groups_0, pad = var_1633_pad_0, pad_type = var_1633_pad_type_0, strides = var_1633_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_1608)[name = string("op_1633")]; + tensor var_1634 = const()[name = string("op_1634"), val = tensor([1, 8, 1, 128])]; + tensor var_1635 = reshape(shape = var_1634, x = var_1633)[name = string("op_1635")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1617)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1617)[name = string("x2_29")]; + tensor var_1649_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1649_cast_fp16")]; + tensor var_1650_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1650_cast_fp16")]; + tensor var_1651_cast_fp16 = sub(x = var_1649_cast_fp16, y = var_1650_cast_fp16)[name = string("op_1651_cast_fp16")]; + tensor var_1652_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1652_cast_fp16")]; + tensor var_1653_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1654_cast_fp16 = add(x = var_1652_cast_fp16, y = var_1653_cast_fp16)[name = string("op_1654_cast_fp16")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29_cast_fp16 = concat(axis = var_60, interleave = rotated_29_interleave_0, values = (var_1651_cast_fp16, var_1654_cast_fp16))[name = string("rotated_29_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1626)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1626)[name = string("x2_31")]; + tensor var_1670_cast_fp16 = mul(x = x1_31, y = cos_3_cast_fp16)[name = string("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = mul(x = x2_31, y = sin_3_cast_fp16)[name = string("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = sub(x = var_1670_cast_fp16, y = var_1671_cast_fp16)[name = string("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = mul(x = x2_31, y = cos_3_cast_fp16)[name = string("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = mul(x = x1_31, y = sin_3_cast_fp16)[name = string("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = add(x = var_1673_cast_fp16, y = var_1674_cast_fp16)[name = string("op_1675_cast_fp16")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31_cast_fp16 = concat(axis = var_60, interleave = rotated_31_interleave_0, values = (var_1672_cast_fp16, var_1675_cast_fp16))[name = string("rotated_31_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([16])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([17])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_385, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31_cast_fp16, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([44])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([45])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_385, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1635, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_1695_begin_0 = const()[name = string("op_1695_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_1695_end_0 = const()[name = string("op_1695_end_0"), val = tensor([17, 8, 1024, 128])]; + tensor var_1695_end_mask_0 = const()[name = string("op_1695_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1695_cast_fp16 = slice_by_index(begin = var_1695_begin_0, end = var_1695_end_0, end_mask = var_1695_end_mask_0, x = coreml_update_state_33)[name = string("op_1695_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1695_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1697_begin_0 = const()[name = string("op_1697_begin_0"), val = tensor([44, 0, 0, 0])]; + tensor var_1697_end_0 = const()[name = string("op_1697_end_0"), val = tensor([45, 8, 1024, 128])]; + tensor var_1697_end_mask_0 = const()[name = string("op_1697_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1697_cast_fp16 = slice_by_index(begin = var_1697_begin_0, end = var_1697_end_0, end_mask = var_1697_end_mask_0, x = coreml_update_state_33)[name = string("op_1697_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1697_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1706 = const()[name = string("op_1706"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1706, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1710 = const()[name = string("op_1710"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_31_cast_fp16 = reshape(shape = var_1710, x = x_209_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1713 = const()[name = string("op_1713"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1713, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1717 = const()[name = string("op_1717"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_31_cast_fp16 = reshape(shape = var_1717, x = x_215_cast_fp16)[name = string("value_states_31_cast_fp16")]; + bool var_1720_transpose_x_1 = const()[name = string("op_1720_transpose_x_1"), val = bool(false)]; + bool var_1720_transpose_y_1 = const()[name = string("op_1720_transpose_y_1"), val = bool(true)]; + tensor var_1720_cast_fp16 = matmul(transpose_x = var_1720_transpose_x_1, transpose_y = var_1720_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_31_cast_fp16)[name = string("op_1720_cast_fp16")]; + fp16 var_1721_to_fp16 = const()[name = string("op_1721_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_1720_cast_fp16, y = var_1721_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1732_axes_0 = const()[name = string("op_1732_axes_0"), val = tensor([-1])]; + bool var_1732_keep_dims_0 = const()[name = string("op_1732_keep_dims_0"), val = bool(true)]; + tensor var_1732_cast_fp16 = reduce_sum(axes = var_1732_axes_0, keep_dims = var_1732_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1732_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1732_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_31_cast_fp16, y = value_states_31_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_1735_perm_0 = const()[name = string("op_1735_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1737 = const()[name = string("op_1737"), val = tensor([1, 1, 3072])]; + tensor var_1735_cast_fp16 = transpose(perm = var_1735_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_6")]; + tensor input_103_cast_fp16 = reshape(shape = var_1737, x = var_1735_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1748_axes_0 = const()[name = string("op_1748_axes_0"), val = tensor([-1])]; + tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1748_cast_fp16 = layer_norm(axes = var_1748_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1748_cast_fp16")]; + tensor var_1755 = const()[name = string("op_1755"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1756 = transpose(perm = var_1755, x = var_1748_cast_fp16)[name = string("transpose_5")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1756)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1778_axes_0 = const()[name = string("op_1778_axes_0"), val = tensor([2])]; + tensor var_1778 = squeeze(axes = var_1778_axes_0, x = hidden_states_63)[name = string("op_1778")]; + tensor var_1779 = const()[name = string("op_1779"), val = tensor([0, 2, 1])]; + tensor var_1780 = transpose(perm = var_1779, x = var_1778)[name = string("transpose_4")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1780)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1788_axes_0 = const()[name = string("op_1788_axes_0"), val = tensor([-1])]; + tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1788_cast_fp16 = layer_norm(axes = var_1788_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1788_cast_fp16")]; + tensor var_1791 = const()[name = string("op_1791"), val = tensor([0, 2, 1])]; + tensor var_1793_axes_0 = const()[name = string("op_1793_axes_0"), val = tensor([2])]; + tensor var_1792 = transpose(perm = var_1791, x = var_1788_cast_fp16)[name = string("transpose_3")]; + tensor var_1793 = expand_dims(axes = var_1793_axes_0, x = var_1792)[name = string("op_1793")]; + string var_1800_pad_type_0 = const()[name = string("op_1800_pad_type_0"), val = string("valid")]; + tensor var_1800_strides_0 = const()[name = string("op_1800_strides_0"), val = tensor([1, 1])]; + tensor var_1800_pad_0 = const()[name = string("op_1800_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1800_dilations_0 = const()[name = string("op_1800_dilations_0"), val = tensor([1, 1])]; + int32 var_1800_groups_0 = const()[name = string("op_1800_groups_0"), val = int32(1)]; + tensor var_1800 = conv(dilations = var_1800_dilations_0, groups = var_1800_groups_0, pad = var_1800_pad_0, pad_type = var_1800_pad_type_0, strides = var_1800_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_1793)[name = string("op_1800")]; + tensor var_1801 = const()[name = string("op_1801"), val = tensor([1, 24, 1, 128])]; + tensor var_1802 = reshape(shape = var_1801, x = var_1800)[name = string("op_1802")]; + string var_1809_pad_type_0 = const()[name = string("op_1809_pad_type_0"), val = string("valid")]; + tensor var_1809_strides_0 = const()[name = string("op_1809_strides_0"), val = tensor([1, 1])]; + tensor var_1809_pad_0 = const()[name = string("op_1809_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1809_dilations_0 = const()[name = string("op_1809_dilations_0"), val = tensor([1, 1])]; + int32 var_1809_groups_0 = const()[name = string("op_1809_groups_0"), val = int32(1)]; + tensor var_1809 = conv(dilations = var_1809_dilations_0, groups = var_1809_groups_0, pad = var_1809_pad_0, pad_type = var_1809_pad_type_0, strides = var_1809_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_1793)[name = string("op_1809")]; + tensor var_1810 = const()[name = string("op_1810"), val = tensor([1, 8, 1, 128])]; + tensor var_1811 = reshape(shape = var_1810, x = var_1809)[name = string("op_1811")]; + string var_1818_pad_type_0 = const()[name = string("op_1818_pad_type_0"), val = string("valid")]; + tensor var_1818_strides_0 = const()[name = string("op_1818_strides_0"), val = tensor([1, 1])]; + tensor var_1818_pad_0 = const()[name = string("op_1818_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1818_dilations_0 = const()[name = string("op_1818_dilations_0"), val = tensor([1, 1])]; + int32 var_1818_groups_0 = const()[name = string("op_1818_groups_0"), val = int32(1)]; + tensor var_1818 = conv(dilations = var_1818_dilations_0, groups = var_1818_groups_0, pad = var_1818_pad_0, pad_type = var_1818_pad_type_0, strides = var_1818_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_1793)[name = string("op_1818")]; + tensor var_1819 = const()[name = string("op_1819"), val = tensor([1, 8, 1, 128])]; + tensor var_1820 = reshape(shape = var_1819, x = var_1818)[name = string("op_1820")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1802)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1802)[name = string("x2_33")]; + tensor var_1834_cast_fp16 = mul(x = x1_33, y = cos_3_cast_fp16)[name = string("op_1834_cast_fp16")]; + tensor var_1835_cast_fp16 = mul(x = x2_33, y = sin_3_cast_fp16)[name = string("op_1835_cast_fp16")]; + tensor var_1836_cast_fp16 = sub(x = var_1834_cast_fp16, y = var_1835_cast_fp16)[name = string("op_1836_cast_fp16")]; + tensor var_1837_cast_fp16 = mul(x = x2_33, y = cos_3_cast_fp16)[name = string("op_1837_cast_fp16")]; + tensor var_1838_cast_fp16 = mul(x = x1_33, y = sin_3_cast_fp16)[name = string("op_1838_cast_fp16")]; + tensor var_1839_cast_fp16 = add(x = var_1837_cast_fp16, y = var_1838_cast_fp16)[name = string("op_1839_cast_fp16")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33_cast_fp16 = concat(axis = var_60, interleave = rotated_33_interleave_0, values = (var_1836_cast_fp16, var_1839_cast_fp16))[name = string("rotated_33_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1811)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1811)[name = string("x2")]; + tensor var_1855_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor var_1856_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1856_cast_fp16")]; + tensor var_1857_cast_fp16 = sub(x = var_1855_cast_fp16, y = var_1856_cast_fp16)[name = string("op_1857_cast_fp16")]; + tensor var_1858_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1858_cast_fp16")]; + tensor var_1859_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1859_cast_fp16")]; + tensor var_1860_cast_fp16 = add(x = var_1858_cast_fp16, y = var_1859_cast_fp16)[name = string("op_1860_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_60, interleave = rotated_interleave_0, values = (var_1857_cast_fp16, var_1860_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([17])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([18])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; + tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; + tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; + int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; + bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; + tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_385, concat_67_values3_0))[name = string("concat_67")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated_cast_fp16, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([45])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([46])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_385, concat_71_values3_0))[name = string("concat_71")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_1820, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_1880_begin_0 = const()[name = string("op_1880_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_1880_end_0 = const()[name = string("op_1880_end_0"), val = tensor([18, 8, 1024, 128])]; + tensor var_1880_end_mask_0 = const()[name = string("op_1880_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1880_cast_fp16 = slice_by_index(begin = var_1880_begin_0, end = var_1880_end_0, end_mask = var_1880_end_mask_0, x = coreml_update_state_35)[name = string("op_1880_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1880_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1882_begin_0 = const()[name = string("op_1882_begin_0"), val = tensor([45, 0, 0, 0])]; + tensor var_1882_end_0 = const()[name = string("op_1882_end_0"), val = tensor([46, 8, 1024, 128])]; + tensor var_1882_end_mask_0 = const()[name = string("op_1882_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1882_cast_fp16 = slice_by_index(begin = var_1882_begin_0, end = var_1882_end_0, end_mask = var_1882_end_mask_0, x = coreml_update_state_35)[name = string("op_1882_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1882_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_1891 = const()[name = string("op_1891"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_1891, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_1895 = const()[name = string("op_1895"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_1895, x = x_237_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_1898 = const()[name = string("op_1898"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_1898, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_1902 = const()[name = string("op_1902"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_cast_fp16 = reshape(shape = var_1902, x = x_243_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_1905_transpose_x_1 = const()[name = string("op_1905_transpose_x_1"), val = bool(false)]; + bool var_1905_transpose_y_1 = const()[name = string("op_1905_transpose_y_1"), val = bool(true)]; + tensor var_1905_cast_fp16 = matmul(transpose_x = var_1905_transpose_x_1, transpose_y = var_1905_transpose_y_1, x = rotated_33_cast_fp16, y = key_states_cast_fp16)[name = string("op_1905_cast_fp16")]; + fp16 var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_1905_cast_fp16, y = var_1906_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1917_axes_0 = const()[name = string("op_1917_axes_0"), val = tensor([-1])]; + bool var_1917_keep_dims_0 = const()[name = string("op_1917_keep_dims_0"), val = bool(true)]; + tensor var_1917_cast_fp16 = reduce_sum(axes = var_1917_axes_0, keep_dims = var_1917_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1917_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1917_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_1920_perm_0 = const()[name = string("op_1920_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1922 = const()[name = string("op_1922"), val = tensor([1, 1, 3072])]; + tensor var_1920_cast_fp16 = transpose(perm = var_1920_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_1922, x = var_1920_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_1933_axes_0 = const()[name = string("op_1933_axes_0"), val = tensor([-1])]; + tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_1933_cast_fp16 = layer_norm(axes = var_1933_axes_0, epsilon = var_55_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_1933_cast_fp16")]; + tensor var_1940 = const()[name = string("op_1940"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_1941 = transpose(perm = var_1940, x = var_1933_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_1941)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1963_axes_0 = const()[name = string("op_1963_axes_0"), val = tensor([2])]; + tensor var_1963 = squeeze(axes = var_1963_axes_0, x = hidden_states_1)[name = string("op_1963")]; + tensor var_1964 = const()[name = string("op_1964"), val = tensor([0, 2, 1])]; + tensor var_1965 = transpose(perm = var_1964, x = var_1963)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_1965)[name = string("op_1966_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7078016))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7127232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9486592))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9503040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11862400))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30753280))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30884416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49758848))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49889984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68764416))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75891584))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75940800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78300160))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78316608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80675968))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80692416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99566848))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99697984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118572416))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137577984))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137627200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144705152))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144754368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147113728))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147130176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149489536))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149505984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168380416))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168511552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187385984))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206391552))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206440768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213518720))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213567936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215927296))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215943744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218303104))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218319552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237193984))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237325120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256199552))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256330688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275205120))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282332288))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284740864))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284757312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287116672))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306007552))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306138688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325013120))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325144256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344018688))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344067904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351145856))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353554432))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353570880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355930240))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374821120))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374952256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393826688))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393957824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412832256))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412881472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419959424))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420008640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422368000))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422384448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424743808))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443634688))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462640256))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462771392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481645824))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481695040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488772992))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488822208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491181568))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491198016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493557376))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493573824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512448256))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531453824))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531584960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550459392))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550508608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557586560))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557635776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559995136))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560011584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562370944))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581261824))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581392960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600267392))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600398528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619272960))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; + int32 var_55 = const()[name = string("op_55"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_297_axis_0 = const()[name = string("op_297_axis_0"), val = int32(1)]; + int32 var_297_batch_dims_0 = const()[name = string("op_297_batch_dims_0"), val = int32(0)]; + bool var_297_validate_indices_0 = const()[name = string("op_297_validate_indices_0"), val = bool(false)]; + tensor var_66_to_fp16 = const()[name = string("op_66_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652876672)))]; + tensor var_297_cast_fp16 = gather(axis = var_297_axis_0, batch_dims = var_297_batch_dims_0, indices = select_0, validate_indices = var_297_validate_indices_0, x = var_66_to_fp16)[name = string("op_297_cast_fp16")]; + tensor var_298 = const()[name = string("op_298"), val = tensor([1, 64, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_298, x = var_297_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_302_axis_0 = const()[name = string("op_302_axis_0"), val = int32(1)]; + int32 var_302_batch_dims_0 = const()[name = string("op_302_batch_dims_0"), val = int32(0)]; + bool var_302_validate_indices_0 = const()[name = string("op_302_validate_indices_0"), val = bool(false)]; + tensor var_61_to_fp16 = const()[name = string("op_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619322176)))]; + tensor var_302_cast_fp16 = gather(axis = var_302_axis_0, batch_dims = var_302_batch_dims_0, indices = select_0, validate_indices = var_302_validate_indices_0, x = var_61_to_fp16)[name = string("op_302_cast_fp16")]; + tensor var_303 = const()[name = string("op_303"), val = tensor([1, 64, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_303, x = var_302_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_313_axes_0 = const()[name = string("op_313_axes_0"), val = tensor([-1])]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686431168)))]; + fp16 var_57_to_fp16 = const()[name = string("op_57_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_313_cast_fp16 = layer_norm(axes = var_313_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_313_cast_fp16")]; + tensor var_317 = const()[name = string("op_317"), val = tensor([0, 2, 1])]; + tensor var_319_axes_0 = const()[name = string("op_319_axes_0"), val = tensor([2])]; + tensor var_318 = transpose(perm = var_317, x = var_313_cast_fp16)[name = string("transpose_64")]; + tensor var_319 = expand_dims(axes = var_319_axes_0, x = var_318)[name = string("op_319")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_319)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_319)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_319)[name = string("value_states_1")]; + tensor var_339 = const()[name = string("op_339"), val = tensor([1, 24, 128, 64])]; + tensor var_340 = reshape(shape = var_339, x = query_states_1)[name = string("op_340")]; + tensor var_341 = const()[name = string("op_341"), val = tensor([0, 1, 3, 2])]; + tensor var_343 = const()[name = string("op_343"), val = tensor([1, 8, 128, 64])]; + tensor var_344 = reshape(shape = var_343, x = key_states_1)[name = string("op_344")]; + tensor var_345 = const()[name = string("op_345"), val = tensor([0, 1, 3, 2])]; + tensor var_347 = const()[name = string("op_347"), val = tensor([1, 8, 128, 64])]; + tensor var_348 = reshape(shape = var_347, x = value_states_1)[name = string("op_348")]; + tensor var_349 = const()[name = string("op_349"), val = tensor([0, 1, 3, 2])]; + tensor var_351 = const()[name = string("op_351"), val = tensor([0, 2, 1, 3])]; + tensor var_353 = const()[name = string("op_353"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_341, x = var_340)[name = string("transpose_63")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_351, x = cos_1_cast_fp16)[name = string("transpose_62")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_353, x = sin_1_cast_fp16)[name = string("transpose_61")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_367 = mul(x = x1_1, y = cos_7)[name = string("op_367")]; + tensor var_368 = mul(x = x2_1, y = sin_7)[name = string("op_368")]; + tensor var_369 = sub(x = var_367, y = var_368)[name = string("op_369")]; + tensor var_370 = mul(x = x2_1, y = cos_7)[name = string("op_370")]; + tensor var_371 = mul(x = x1_1, y = sin_7)[name = string("op_371")]; + tensor var_372 = add(x = var_370, y = var_371)[name = string("op_372")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_55, interleave = rotated_1_interleave_0, values = (var_369, var_372))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_345, x = var_344)[name = string("transpose_60")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_388 = mul(x = x1_3, y = cos_7)[name = string("op_388")]; + tensor var_389 = mul(x = x2_3, y = sin_7)[name = string("op_389")]; + tensor var_390 = sub(x = var_388, y = var_389)[name = string("op_390")]; + tensor var_391 = mul(x = x2_3, y = cos_7)[name = string("op_391")]; + tensor var_392 = mul(x = x1_3, y = sin_7)[name = string("op_392")]; + tensor var_393 = add(x = var_391, y = var_392)[name = string("op_393")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_55, interleave = rotated_3_interleave_0, values = (var_390, var_393))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_402 = add(x = current_pos, y = seq_length_1)[name = string("op_402")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([9])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([10])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_402, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([37])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([38])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_402, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_349, x = var_348)[name = string("transpose_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_416_begin_0 = const()[name = string("op_416_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_416_end_0 = const()[name = string("op_416_end_0"), val = tensor([10, 8, 1024, 128])]; + tensor var_416_end_mask_0 = const()[name = string("op_416_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = coreml_update_state_19)[name = string("op_416_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_416_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_418_end_0 = const()[name = string("op_418_end_0"), val = tensor([38, 8, 1024, 128])]; + tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = var_418_end_0, end_mask = var_418_end_mask_0, x = coreml_update_state_19)[name = string("op_418_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_418_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_427 = const()[name = string("op_427"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_427, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_431 = const()[name = string("op_431"), val = tensor([1, -1, 1024, 128])]; + tensor var_432_cast_fp16 = reshape(shape = var_431, x = x_13_cast_fp16)[name = string("op_432_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_434 = const()[name = string("op_434"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_434, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_441_transpose_x_0 = const()[name = string("op_441_transpose_x_0"), val = bool(false)]; + bool var_441_transpose_y_0 = const()[name = string("op_441_transpose_y_0"), val = bool(true)]; + tensor var_441_cast_fp16 = matmul(transpose_x = var_441_transpose_x_0, transpose_y = var_441_transpose_y_0, x = rotated_1, y = var_432_cast_fp16)[name = string("op_441_cast_fp16")]; + fp16 var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_441_cast_fp16, y = var_442_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_453_axes_0 = const()[name = string("op_453_axes_0"), val = tensor([-1])]; + bool var_453_keep_dims_0 = const()[name = string("op_453_keep_dims_0"), val = bool(true)]; + tensor var_453_cast_fp16 = reduce_sum(axes = var_453_axes_0, keep_dims = var_453_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_453_cast_fp16")]; + tensor var_454_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_453_cast_fp16)[name = string("op_454_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([24, 64, 1024])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_454_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([24, 1024, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 24, 64, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_457_perm_0 = const()[name = string("op_457_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_459 = const()[name = string("op_459"), val = tensor([1, 64, 3072])]; + tensor var_457_cast_fp16 = transpose(perm = var_457_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_58")]; + tensor input_5_cast_fp16 = reshape(shape = var_459, x = var_457_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693515328))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693564544)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_470_axes_0 = const()[name = string("op_470_axes_0"), val = tensor([-1])]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693570752)))]; + tensor var_470_cast_fp16 = layer_norm(axes = var_470_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_470_cast_fp16")]; + tensor var_477 = const()[name = string("op_477"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_478 = transpose(perm = var_477, x = var_470_cast_fp16)[name = string("transpose_57")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_478)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_500_axes_0 = const()[name = string("op_500_axes_0"), val = tensor([2])]; + tensor var_500 = squeeze(axes = var_500_axes_0, x = hidden_states_7)[name = string("op_500")]; + tensor var_501 = const()[name = string("op_501"), val = tensor([0, 2, 1])]; + tensor var_502 = transpose(perm = var_501, x = var_500)[name = string("transpose_56")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_502)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_510_axes_0 = const()[name = string("op_510_axes_0"), val = tensor([-1])]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693576960)))]; + tensor var_510_cast_fp16 = layer_norm(axes = var_510_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_510_cast_fp16")]; + tensor var_514 = const()[name = string("op_514"), val = tensor([0, 2, 1])]; + tensor var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor([2])]; + tensor var_515 = transpose(perm = var_514, x = var_510_cast_fp16)[name = string("transpose_55")]; + tensor var_516 = expand_dims(axes = var_516_axes_0, x = var_515)[name = string("op_516")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_516)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_516)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_516)[name = string("value_states_7")]; + tensor var_536 = const()[name = string("op_536"), val = tensor([1, 24, 128, 64])]; + tensor var_537 = reshape(shape = var_536, x = query_states_5)[name = string("op_537")]; + tensor var_538 = const()[name = string("op_538"), val = tensor([0, 1, 3, 2])]; + tensor var_540 = const()[name = string("op_540"), val = tensor([1, 8, 128, 64])]; + tensor var_541 = reshape(shape = var_540, x = key_states_7)[name = string("op_541")]; + tensor var_542 = const()[name = string("op_542"), val = tensor([0, 1, 3, 2])]; + tensor var_544 = const()[name = string("op_544"), val = tensor([1, 8, 128, 64])]; + tensor var_545 = reshape(shape = var_544, x = value_states_7)[name = string("op_545")]; + tensor var_546 = const()[name = string("op_546"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_538, x = var_537)[name = string("transpose_54")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_564 = mul(x = x1_5, y = cos_7)[name = string("op_564")]; + tensor var_565 = mul(x = x2_5, y = sin_7)[name = string("op_565")]; + tensor var_566 = sub(x = var_564, y = var_565)[name = string("op_566")]; + tensor var_567 = mul(x = x2_5, y = cos_7)[name = string("op_567")]; + tensor var_568 = mul(x = x1_5, y = sin_7)[name = string("op_568")]; + tensor var_569 = add(x = var_567, y = var_568)[name = string("op_569")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_55, interleave = rotated_5_interleave_0, values = (var_566, var_569))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_542, x = var_541)[name = string("transpose_53")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_585 = mul(x = x1_7, y = cos_7)[name = string("op_585")]; + tensor var_586 = mul(x = x2_7, y = sin_7)[name = string("op_586")]; + tensor var_587 = sub(x = var_585, y = var_586)[name = string("op_587")]; + tensor var_588 = mul(x = x2_7, y = cos_7)[name = string("op_588")]; + tensor var_589 = mul(x = x1_7, y = sin_7)[name = string("op_589")]; + tensor var_590 = add(x = var_588, y = var_589)[name = string("op_590")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_55, interleave = rotated_7_interleave_0, values = (var_587, var_590))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([10])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([11])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_402, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([38])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([39])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_402, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_546, x = var_545)[name = string("transpose_52")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_613_begin_0 = const()[name = string("op_613_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_613_end_0 = const()[name = string("op_613_end_0"), val = tensor([11, 8, 1024, 128])]; + tensor var_613_end_mask_0 = const()[name = string("op_613_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_613_cast_fp16 = slice_by_index(begin = var_613_begin_0, end = var_613_end_0, end_mask = var_613_end_mask_0, x = coreml_update_state_21)[name = string("op_613_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_613_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_615_begin_0 = const()[name = string("op_615_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_615_end_0 = const()[name = string("op_615_end_0"), val = tensor([39, 8, 1024, 128])]; + tensor var_615_end_mask_0 = const()[name = string("op_615_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_615_cast_fp16 = slice_by_index(begin = var_615_begin_0, end = var_615_end_0, end_mask = var_615_end_mask_0, x = coreml_update_state_21)[name = string("op_615_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_615_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_624 = const()[name = string("op_624"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_624, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_628 = const()[name = string("op_628"), val = tensor([1, -1, 1024, 128])]; + tensor var_629_cast_fp16 = reshape(shape = var_628, x = x_41_cast_fp16)[name = string("op_629_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_631 = const()[name = string("op_631"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_631, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_638_transpose_x_0 = const()[name = string("op_638_transpose_x_0"), val = bool(false)]; + bool var_638_transpose_y_0 = const()[name = string("op_638_transpose_y_0"), val = bool(true)]; + tensor var_638_cast_fp16 = matmul(transpose_x = var_638_transpose_x_0, transpose_y = var_638_transpose_y_0, x = rotated_5, y = var_629_cast_fp16)[name = string("op_638_cast_fp16")]; + fp16 var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_650_axes_0 = const()[name = string("op_650_axes_0"), val = tensor([-1])]; + bool var_650_keep_dims_0 = const()[name = string("op_650_keep_dims_0"), val = bool(true)]; + tensor var_650_cast_fp16 = reduce_sum(axes = var_650_axes_0, keep_dims = var_650_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_650_cast_fp16")]; + tensor var_651_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_650_cast_fp16)[name = string("op_651_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 64, 1024])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_651_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([24, 1024, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 24, 64, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_654_perm_0 = const()[name = string("op_654_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_656 = const()[name = string("op_656"), val = tensor([1, 64, 3072])]; + tensor var_654_cast_fp16 = transpose(perm = var_654_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_51")]; + tensor input_19_cast_fp16 = reshape(shape = var_656, x = var_654_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693583168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700661120))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_667_axes_0 = const()[name = string("op_667_axes_0"), val = tensor([-1])]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700710336)))]; + tensor var_667_cast_fp16 = layer_norm(axes = var_667_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_667_cast_fp16")]; + tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_675 = transpose(perm = var_674, x = var_667_cast_fp16)[name = string("transpose_50")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_675)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_697_axes_0 = const()[name = string("op_697_axes_0"), val = tensor([2])]; + tensor var_697 = squeeze(axes = var_697_axes_0, x = hidden_states_15)[name = string("op_697")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([0, 2, 1])]; + tensor var_699 = transpose(perm = var_698, x = var_697)[name = string("transpose_49")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_699)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_707_axes_0 = const()[name = string("op_707_axes_0"), val = tensor([-1])]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700716544)))]; + tensor var_707_cast_fp16 = layer_norm(axes = var_707_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_707_cast_fp16")]; + tensor var_711 = const()[name = string("op_711"), val = tensor([0, 2, 1])]; + tensor var_713_axes_0 = const()[name = string("op_713_axes_0"), val = tensor([2])]; + tensor var_712 = transpose(perm = var_711, x = var_707_cast_fp16)[name = string("transpose_48")]; + tensor var_713 = expand_dims(axes = var_713_axes_0, x = var_712)[name = string("op_713")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_713)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_713)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_713)[name = string("value_states_13")]; + tensor var_733 = const()[name = string("op_733"), val = tensor([1, 24, 128, 64])]; + tensor var_734 = reshape(shape = var_733, x = query_states_9)[name = string("op_734")]; + tensor var_735 = const()[name = string("op_735"), val = tensor([0, 1, 3, 2])]; + tensor var_737 = const()[name = string("op_737"), val = tensor([1, 8, 128, 64])]; + tensor var_738 = reshape(shape = var_737, x = key_states_13)[name = string("op_738")]; + tensor var_739 = const()[name = string("op_739"), val = tensor([0, 1, 3, 2])]; + tensor var_741 = const()[name = string("op_741"), val = tensor([1, 8, 128, 64])]; + tensor var_742 = reshape(shape = var_741, x = value_states_13)[name = string("op_742")]; + tensor var_743 = const()[name = string("op_743"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_735, x = var_734)[name = string("transpose_47")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_761 = mul(x = x1_9, y = cos_7)[name = string("op_761")]; + tensor var_762 = mul(x = x2_9, y = sin_7)[name = string("op_762")]; + tensor var_763 = sub(x = var_761, y = var_762)[name = string("op_763")]; + tensor var_764 = mul(x = x2_9, y = cos_7)[name = string("op_764")]; + tensor var_765 = mul(x = x1_9, y = sin_7)[name = string("op_765")]; + tensor var_766 = add(x = var_764, y = var_765)[name = string("op_766")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_55, interleave = rotated_9_interleave_0, values = (var_763, var_766))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_739, x = var_738)[name = string("transpose_46")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_782 = mul(x = x1_11, y = cos_7)[name = string("op_782")]; + tensor var_783 = mul(x = x2_11, y = sin_7)[name = string("op_783")]; + tensor var_784 = sub(x = var_782, y = var_783)[name = string("op_784")]; + tensor var_785 = mul(x = x2_11, y = cos_7)[name = string("op_785")]; + tensor var_786 = mul(x = x1_11, y = sin_7)[name = string("op_786")]; + tensor var_787 = add(x = var_785, y = var_786)[name = string("op_787")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_55, interleave = rotated_11_interleave_0, values = (var_784, var_787))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([11])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([12])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_402, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([39])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([40])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_402, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_743, x = var_742)[name = string("transpose_45")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_810_begin_0 = const()[name = string("op_810_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_810_end_0 = const()[name = string("op_810_end_0"), val = tensor([12, 8, 1024, 128])]; + tensor var_810_end_mask_0 = const()[name = string("op_810_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_810_cast_fp16 = slice_by_index(begin = var_810_begin_0, end = var_810_end_0, end_mask = var_810_end_mask_0, x = coreml_update_state_23)[name = string("op_810_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_810_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_812_begin_0 = const()[name = string("op_812_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_812_end_0 = const()[name = string("op_812_end_0"), val = tensor([40, 8, 1024, 128])]; + tensor var_812_end_mask_0 = const()[name = string("op_812_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_812_cast_fp16 = slice_by_index(begin = var_812_begin_0, end = var_812_end_0, end_mask = var_812_end_mask_0, x = coreml_update_state_23)[name = string("op_812_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_812_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_821 = const()[name = string("op_821"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_821, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_825 = const()[name = string("op_825"), val = tensor([1, -1, 1024, 128])]; + tensor var_826_cast_fp16 = reshape(shape = var_825, x = x_69_cast_fp16)[name = string("op_826_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_828 = const()[name = string("op_828"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_828, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_835_transpose_x_0 = const()[name = string("op_835_transpose_x_0"), val = bool(false)]; + bool var_835_transpose_y_0 = const()[name = string("op_835_transpose_y_0"), val = bool(true)]; + tensor var_835_cast_fp16 = matmul(transpose_x = var_835_transpose_x_0, transpose_y = var_835_transpose_y_0, x = rotated_9, y = var_826_cast_fp16)[name = string("op_835_cast_fp16")]; + fp16 var_836_to_fp16 = const()[name = string("op_836_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_835_cast_fp16, y = var_836_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_847_axes_0 = const()[name = string("op_847_axes_0"), val = tensor([-1])]; + bool var_847_keep_dims_0 = const()[name = string("op_847_keep_dims_0"), val = bool(true)]; + tensor var_847_cast_fp16 = reduce_sum(axes = var_847_axes_0, keep_dims = var_847_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_847_cast_fp16")]; + tensor var_848_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_847_cast_fp16)[name = string("op_848_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([24, 64, 1024])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_848_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([24, 1024, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 24, 64, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_851_perm_0 = const()[name = string("op_851_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_853 = const()[name = string("op_853"), val = tensor([1, 64, 3072])]; + tensor var_851_cast_fp16 = transpose(perm = var_851_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_44")]; + tensor input_33_cast_fp16 = reshape(shape = var_853, x = var_851_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700722752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707800704))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_864_axes_0 = const()[name = string("op_864_axes_0"), val = tensor([-1])]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707849920)))]; + tensor var_864_cast_fp16 = layer_norm(axes = var_864_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_864_cast_fp16")]; + tensor var_871 = const()[name = string("op_871"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_872 = transpose(perm = var_871, x = var_864_cast_fp16)[name = string("transpose_43")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_872)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_894_axes_0 = const()[name = string("op_894_axes_0"), val = tensor([2])]; + tensor var_894 = squeeze(axes = var_894_axes_0, x = hidden_states_23)[name = string("op_894")]; + tensor var_895 = const()[name = string("op_895"), val = tensor([0, 2, 1])]; + tensor var_896 = transpose(perm = var_895, x = var_894)[name = string("transpose_42")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_896)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_904_axes_0 = const()[name = string("op_904_axes_0"), val = tensor([-1])]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707856128)))]; + tensor var_904_cast_fp16 = layer_norm(axes = var_904_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_904_cast_fp16")]; + tensor var_908 = const()[name = string("op_908"), val = tensor([0, 2, 1])]; + tensor var_910_axes_0 = const()[name = string("op_910_axes_0"), val = tensor([2])]; + tensor var_909 = transpose(perm = var_908, x = var_904_cast_fp16)[name = string("transpose_41")]; + tensor var_910 = expand_dims(axes = var_910_axes_0, x = var_909)[name = string("op_910")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_910)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_910)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_910)[name = string("value_states_19")]; + tensor var_930 = const()[name = string("op_930"), val = tensor([1, 24, 128, 64])]; + tensor var_931 = reshape(shape = var_930, x = query_states_13)[name = string("op_931")]; + tensor var_932 = const()[name = string("op_932"), val = tensor([0, 1, 3, 2])]; + tensor var_934 = const()[name = string("op_934"), val = tensor([1, 8, 128, 64])]; + tensor var_935 = reshape(shape = var_934, x = key_states_19)[name = string("op_935")]; + tensor var_936 = const()[name = string("op_936"), val = tensor([0, 1, 3, 2])]; + tensor var_938 = const()[name = string("op_938"), val = tensor([1, 8, 128, 64])]; + tensor var_939 = reshape(shape = var_938, x = value_states_19)[name = string("op_939")]; + tensor var_940 = const()[name = string("op_940"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_932, x = var_931)[name = string("transpose_40")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_958 = mul(x = x1_13, y = cos_7)[name = string("op_958")]; + tensor var_959 = mul(x = x2_13, y = sin_7)[name = string("op_959")]; + tensor var_960 = sub(x = var_958, y = var_959)[name = string("op_960")]; + tensor var_961 = mul(x = x2_13, y = cos_7)[name = string("op_961")]; + tensor var_962 = mul(x = x1_13, y = sin_7)[name = string("op_962")]; + tensor var_963 = add(x = var_961, y = var_962)[name = string("op_963")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_55, interleave = rotated_13_interleave_0, values = (var_960, var_963))[name = string("rotated_13")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_936, x = var_935)[name = string("transpose_39")]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; + tensor var_979 = mul(x = x1_15, y = cos_7)[name = string("op_979")]; + tensor var_980 = mul(x = x2_15, y = sin_7)[name = string("op_980")]; + tensor var_981 = sub(x = var_979, y = var_980)[name = string("op_981")]; + tensor var_982 = mul(x = x2_15, y = cos_7)[name = string("op_982")]; + tensor var_983 = mul(x = x1_15, y = sin_7)[name = string("op_983")]; + tensor var_984 = add(x = var_982, y = var_983)[name = string("op_984")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15 = concat(axis = var_55, interleave = rotated_15_interleave_0, values = (var_981, var_984))[name = string("rotated_15")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([12])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([13])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_402, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([40])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([41])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_402, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_940, x = var_939)[name = string("transpose_38")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([13, 8, 1024, 128])]; + tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_25)[name = string("op_1007_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_1007_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_1009_begin_0 = const()[name = string("op_1009_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_1009_end_0 = const()[name = string("op_1009_end_0"), val = tensor([41, 8, 1024, 128])]; + tensor var_1009_end_mask_0 = const()[name = string("op_1009_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1009_cast_fp16 = slice_by_index(begin = var_1009_begin_0, end = var_1009_end_0, end_mask = var_1009_end_mask_0, x = coreml_update_state_25)[name = string("op_1009_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_1009_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_1018 = const()[name = string("op_1018"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_1018, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, -1, 1024, 128])]; + tensor var_1023_cast_fp16 = reshape(shape = var_1022, x = x_97_cast_fp16)[name = string("op_1023_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_1025 = const()[name = string("op_1025"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_1025, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_1032_transpose_x_0 = const()[name = string("op_1032_transpose_x_0"), val = bool(false)]; + bool var_1032_transpose_y_0 = const()[name = string("op_1032_transpose_y_0"), val = bool(true)]; + tensor var_1032_cast_fp16 = matmul(transpose_x = var_1032_transpose_x_0, transpose_y = var_1032_transpose_y_0, x = rotated_13, y = var_1023_cast_fp16)[name = string("op_1032_cast_fp16")]; + fp16 var_1033_to_fp16 = const()[name = string("op_1033_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_1032_cast_fp16, y = var_1033_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_1044_axes_0 = const()[name = string("op_1044_axes_0"), val = tensor([-1])]; + bool var_1044_keep_dims_0 = const()[name = string("op_1044_keep_dims_0"), val = bool(true)]; + tensor var_1044_cast_fp16 = reduce_sum(axes = var_1044_axes_0, keep_dims = var_1044_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1044_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([24, 64, 1024])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1045_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([24, 1024, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 24, 64, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_1048_perm_0 = const()[name = string("op_1048_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1050 = const()[name = string("op_1050"), val = tensor([1, 64, 3072])]; + tensor var_1048_cast_fp16 = transpose(perm = var_1048_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_37")]; + tensor input_47_cast_fp16 = reshape(shape = var_1050, x = var_1048_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707862336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714940288))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1061_axes_0 = const()[name = string("op_1061_axes_0"), val = tensor([-1])]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714989504)))]; + tensor var_1061_cast_fp16 = layer_norm(axes = var_1061_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1061_cast_fp16")]; + tensor var_1068 = const()[name = string("op_1068"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1069 = transpose(perm = var_1068, x = var_1061_cast_fp16)[name = string("transpose_36")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1069)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1091_axes_0 = const()[name = string("op_1091_axes_0"), val = tensor([2])]; + tensor var_1091 = squeeze(axes = var_1091_axes_0, x = hidden_states_31)[name = string("op_1091")]; + tensor var_1092 = const()[name = string("op_1092"), val = tensor([0, 2, 1])]; + tensor var_1093 = transpose(perm = var_1092, x = var_1091)[name = string("transpose_35")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1093)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1101_axes_0 = const()[name = string("op_1101_axes_0"), val = tensor([-1])]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714995712)))]; + tensor var_1101_cast_fp16 = layer_norm(axes = var_1101_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1105 = const()[name = string("op_1105"), val = tensor([0, 2, 1])]; + tensor var_1107_axes_0 = const()[name = string("op_1107_axes_0"), val = tensor([2])]; + tensor var_1106 = transpose(perm = var_1105, x = var_1101_cast_fp16)[name = string("transpose_34")]; + tensor var_1107 = expand_dims(axes = var_1107_axes_0, x = var_1106)[name = string("op_1107")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1107)[name = string("query_states_17")]; + string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; + tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; + tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; + int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; + tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1107)[name = string("key_states_25")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1107)[name = string("value_states_25")]; + tensor var_1127 = const()[name = string("op_1127"), val = tensor([1, 24, 128, 64])]; + tensor var_1128 = reshape(shape = var_1127, x = query_states_17)[name = string("op_1128")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([0, 1, 3, 2])]; + tensor var_1131 = const()[name = string("op_1131"), val = tensor([1, 8, 128, 64])]; + tensor var_1132 = reshape(shape = var_1131, x = key_states_25)[name = string("op_1132")]; + tensor var_1133 = const()[name = string("op_1133"), val = tensor([0, 1, 3, 2])]; + tensor var_1135 = const()[name = string("op_1135"), val = tensor([1, 8, 128, 64])]; + tensor var_1136 = reshape(shape = var_1135, x = value_states_25)[name = string("op_1136")]; + tensor var_1137 = const()[name = string("op_1137"), val = tensor([0, 1, 3, 2])]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_113 = transpose(perm = var_1129, x = var_1128)[name = string("transpose_33")]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; + tensor var_1155 = mul(x = x1_17, y = cos_7)[name = string("op_1155")]; + tensor var_1156 = mul(x = x2_17, y = sin_7)[name = string("op_1156")]; + tensor var_1157 = sub(x = var_1155, y = var_1156)[name = string("op_1157")]; + tensor var_1158 = mul(x = x2_17, y = cos_7)[name = string("op_1158")]; + tensor var_1159 = mul(x = x1_17, y = sin_7)[name = string("op_1159")]; + tensor var_1160 = add(x = var_1158, y = var_1159)[name = string("op_1160")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17 = concat(axis = var_55, interleave = rotated_17_interleave_0, values = (var_1157, var_1160))[name = string("rotated_17")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_117 = transpose(perm = var_1133, x = var_1132)[name = string("transpose_32")]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; + tensor var_1176 = mul(x = x1_19, y = cos_7)[name = string("op_1176")]; + tensor var_1177 = mul(x = x2_19, y = sin_7)[name = string("op_1177")]; + tensor var_1178 = sub(x = var_1176, y = var_1177)[name = string("op_1178")]; + tensor var_1179 = mul(x = x2_19, y = cos_7)[name = string("op_1179")]; + tensor var_1180 = mul(x = x1_19, y = sin_7)[name = string("op_1180")]; + tensor var_1181 = add(x = var_1179, y = var_1180)[name = string("op_1181")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19 = concat(axis = var_55, interleave = rotated_19_interleave_0, values = (var_1178, var_1181))[name = string("rotated_19")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([13])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([14])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_402, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([41])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([42])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_402, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_1137, x = var_1136)[name = string("transpose_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_1204_begin_0 = const()[name = string("op_1204_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_1204_end_0 = const()[name = string("op_1204_end_0"), val = tensor([14, 8, 1024, 128])]; + tensor var_1204_end_mask_0 = const()[name = string("op_1204_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1204_cast_fp16 = slice_by_index(begin = var_1204_begin_0, end = var_1204_end_0, end_mask = var_1204_end_mask_0, x = coreml_update_state_27)[name = string("op_1204_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1204_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = string("op_1206_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_1206_end_0 = const()[name = string("op_1206_end_0"), val = tensor([42, 8, 1024, 128])]; + tensor var_1206_end_mask_0 = const()[name = string("op_1206_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = coreml_update_state_27)[name = string("op_1206_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1206_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1215 = const()[name = string("op_1215"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1215, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1219 = const()[name = string("op_1219"), val = tensor([1, -1, 1024, 128])]; + tensor var_1220_cast_fp16 = reshape(shape = var_1219, x = x_125_cast_fp16)[name = string("op_1220_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1222 = const()[name = string("op_1222"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1222, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + bool var_1229_transpose_x_0 = const()[name = string("op_1229_transpose_x_0"), val = bool(false)]; + bool var_1229_transpose_y_0 = const()[name = string("op_1229_transpose_y_0"), val = bool(true)]; + tensor var_1229_cast_fp16 = matmul(transpose_x = var_1229_transpose_x_0, transpose_y = var_1229_transpose_y_0, x = rotated_17, y = var_1220_cast_fp16)[name = string("op_1229_cast_fp16")]; + fp16 var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_1229_cast_fp16, y = var_1230_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1241_axes_0 = const()[name = string("op_1241_axes_0"), val = tensor([-1])]; + bool var_1241_keep_dims_0 = const()[name = string("op_1241_keep_dims_0"), val = bool(true)]; + tensor var_1241_cast_fp16 = reduce_sum(axes = var_1241_axes_0, keep_dims = var_1241_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1241_cast_fp16")]; + tensor var_1242_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1241_cast_fp16)[name = string("op_1242_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([24, 64, 1024])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1242_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([24, 1024, 128])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 24, 64, 128])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_1245_perm_0 = const()[name = string("op_1245_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1247 = const()[name = string("op_1247"), val = tensor([1, 64, 3072])]; + tensor var_1245_cast_fp16 = transpose(perm = var_1245_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_30")]; + tensor input_61_cast_fp16 = reshape(shape = var_1247, x = var_1245_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715001920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722079872))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1258_axes_0 = const()[name = string("op_1258_axes_0"), val = tensor([-1])]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722129088)))]; + tensor var_1258_cast_fp16 = layer_norm(axes = var_1258_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1258_cast_fp16")]; + tensor var_1265 = const()[name = string("op_1265"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1266 = transpose(perm = var_1265, x = var_1258_cast_fp16)[name = string("transpose_29")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1266)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1288_axes_0 = const()[name = string("op_1288_axes_0"), val = tensor([2])]; + tensor var_1288 = squeeze(axes = var_1288_axes_0, x = hidden_states_39)[name = string("op_1288")]; + tensor var_1289 = const()[name = string("op_1289"), val = tensor([0, 2, 1])]; + tensor var_1290 = transpose(perm = var_1289, x = var_1288)[name = string("transpose_28")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1290)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1298_axes_0 = const()[name = string("op_1298_axes_0"), val = tensor([-1])]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722135296)))]; + tensor var_1298_cast_fp16 = layer_norm(axes = var_1298_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor var_1302 = const()[name = string("op_1302"), val = tensor([0, 2, 1])]; + tensor var_1304_axes_0 = const()[name = string("op_1304_axes_0"), val = tensor([2])]; + tensor var_1303 = transpose(perm = var_1302, x = var_1298_cast_fp16)[name = string("transpose_27")]; + tensor var_1304 = expand_dims(axes = var_1304_axes_0, x = var_1303)[name = string("op_1304")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_1304)[name = string("query_states_21")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_1304)[name = string("key_states_31")]; + string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; + tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; + tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; + int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; + tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_1304)[name = string("value_states_31")]; + tensor var_1324 = const()[name = string("op_1324"), val = tensor([1, 24, 128, 64])]; + tensor var_1325 = reshape(shape = var_1324, x = query_states_21)[name = string("op_1325")]; + tensor var_1326 = const()[name = string("op_1326"), val = tensor([0, 1, 3, 2])]; + tensor var_1328 = const()[name = string("op_1328"), val = tensor([1, 8, 128, 64])]; + tensor var_1329 = reshape(shape = var_1328, x = key_states_31)[name = string("op_1329")]; + tensor var_1330 = const()[name = string("op_1330"), val = tensor([0, 1, 3, 2])]; + tensor var_1332 = const()[name = string("op_1332"), val = tensor([1, 8, 128, 64])]; + tensor var_1333 = reshape(shape = var_1332, x = value_states_31)[name = string("op_1333")]; + tensor var_1334 = const()[name = string("op_1334"), val = tensor([0, 1, 3, 2])]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_141 = transpose(perm = var_1326, x = var_1325)[name = string("transpose_26")]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; + tensor var_1352 = mul(x = x1_21, y = cos_7)[name = string("op_1352")]; + tensor var_1353 = mul(x = x2_21, y = sin_7)[name = string("op_1353")]; + tensor var_1354 = sub(x = var_1352, y = var_1353)[name = string("op_1354")]; + tensor var_1355 = mul(x = x2_21, y = cos_7)[name = string("op_1355")]; + tensor var_1356 = mul(x = x1_21, y = sin_7)[name = string("op_1356")]; + tensor var_1357 = add(x = var_1355, y = var_1356)[name = string("op_1357")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21 = concat(axis = var_55, interleave = rotated_21_interleave_0, values = (var_1354, var_1357))[name = string("rotated_21")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_145 = transpose(perm = var_1330, x = var_1329)[name = string("transpose_25")]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; + tensor var_1373 = mul(x = x1_23, y = cos_7)[name = string("op_1373")]; + tensor var_1374 = mul(x = x2_23, y = sin_7)[name = string("op_1374")]; + tensor var_1375 = sub(x = var_1373, y = var_1374)[name = string("op_1375")]; + tensor var_1376 = mul(x = x2_23, y = cos_7)[name = string("op_1376")]; + tensor var_1377 = mul(x = x1_23, y = sin_7)[name = string("op_1377")]; + tensor var_1378 = add(x = var_1376, y = var_1377)[name = string("op_1378")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23 = concat(axis = var_55, interleave = rotated_23_interleave_0, values = (var_1375, var_1378))[name = string("rotated_23")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([14])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([15])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_402, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([42])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([43])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_402, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_33 = transpose(perm = var_1334, x = var_1333)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; + tensor var_1401_begin_0 = const()[name = string("op_1401_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_1401_end_0 = const()[name = string("op_1401_end_0"), val = tensor([15, 8, 1024, 128])]; + tensor var_1401_end_mask_0 = const()[name = string("op_1401_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1401_cast_fp16 = slice_by_index(begin = var_1401_begin_0, end = var_1401_end_0, end_mask = var_1401_end_mask_0, x = coreml_update_state_29)[name = string("op_1401_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1401_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1403_begin_0 = const()[name = string("op_1403_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_1403_end_0 = const()[name = string("op_1403_end_0"), val = tensor([43, 8, 1024, 128])]; + tensor var_1403_end_mask_0 = const()[name = string("op_1403_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1403_cast_fp16 = slice_by_index(begin = var_1403_begin_0, end = var_1403_end_0, end_mask = var_1403_end_mask_0, x = coreml_update_state_29)[name = string("op_1403_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1403_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1412 = const()[name = string("op_1412"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1412, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1416 = const()[name = string("op_1416"), val = tensor([1, -1, 1024, 128])]; + tensor var_1417_cast_fp16 = reshape(shape = var_1416, x = x_153_cast_fp16)[name = string("op_1417_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1419 = const()[name = string("op_1419"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1419, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + bool var_1426_transpose_x_0 = const()[name = string("op_1426_transpose_x_0"), val = bool(false)]; + bool var_1426_transpose_y_0 = const()[name = string("op_1426_transpose_y_0"), val = bool(true)]; + tensor var_1426_cast_fp16 = matmul(transpose_x = var_1426_transpose_x_0, transpose_y = var_1426_transpose_y_0, x = rotated_21, y = var_1417_cast_fp16)[name = string("op_1426_cast_fp16")]; + fp16 var_1427_to_fp16 = const()[name = string("op_1427_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_11_cast_fp16 = mul(x = var_1426_cast_fp16, y = var_1427_to_fp16)[name = string("attn_weights_11_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1438_axes_0 = const()[name = string("op_1438_axes_0"), val = tensor([-1])]; + bool var_1438_keep_dims_0 = const()[name = string("op_1438_keep_dims_0"), val = bool(true)]; + tensor var_1438_cast_fp16 = reduce_sum(axes = var_1438_axes_0, keep_dims = var_1438_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1438_cast_fp16")]; + tensor var_1439_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1438_cast_fp16)[name = string("op_1439_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([24, 64, 1024])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1439_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([24, 1024, 128])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 24, 64, 128])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_1442_perm_0 = const()[name = string("op_1442_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1444 = const()[name = string("op_1444"), val = tensor([1, 64, 3072])]; + tensor var_1442_cast_fp16 = transpose(perm = var_1442_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_23")]; + tensor input_75_cast_fp16 = reshape(shape = var_1444, x = var_1442_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(722141504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729219456))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1455_axes_0 = const()[name = string("op_1455_axes_0"), val = tensor([-1])]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729268672)))]; + tensor var_1455_cast_fp16 = layer_norm(axes = var_1455_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1455_cast_fp16")]; + tensor var_1462 = const()[name = string("op_1462"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1463 = transpose(perm = var_1462, x = var_1455_cast_fp16)[name = string("transpose_22")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1463)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1485_axes_0 = const()[name = string("op_1485_axes_0"), val = tensor([2])]; + tensor var_1485 = squeeze(axes = var_1485_axes_0, x = hidden_states_47)[name = string("op_1485")]; + tensor var_1486 = const()[name = string("op_1486"), val = tensor([0, 2, 1])]; + tensor var_1487 = transpose(perm = var_1486, x = var_1485)[name = string("transpose_21")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1487)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1495_axes_0 = const()[name = string("op_1495_axes_0"), val = tensor([-1])]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729274880)))]; + tensor var_1495_cast_fp16 = layer_norm(axes = var_1495_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1495_cast_fp16")]; + tensor var_1499 = const()[name = string("op_1499"), val = tensor([0, 2, 1])]; + tensor var_1501_axes_0 = const()[name = string("op_1501_axes_0"), val = tensor([2])]; + tensor var_1500 = transpose(perm = var_1499, x = var_1495_cast_fp16)[name = string("transpose_20")]; + tensor var_1501 = expand_dims(axes = var_1501_axes_0, x = var_1500)[name = string("op_1501")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_1501)[name = string("query_states_25")]; + string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; + tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; + tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; + int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; + tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_1501)[name = string("key_states_37")]; + string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; + tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; + tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; + int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; + tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_1501)[name = string("value_states_37")]; + tensor var_1521 = const()[name = string("op_1521"), val = tensor([1, 24, 128, 64])]; + tensor var_1522 = reshape(shape = var_1521, x = query_states_25)[name = string("op_1522")]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([0, 1, 3, 2])]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([1, 8, 128, 64])]; + tensor var_1526 = reshape(shape = var_1525, x = key_states_37)[name = string("op_1526")]; + tensor var_1527 = const()[name = string("op_1527"), val = tensor([0, 1, 3, 2])]; + tensor var_1529 = const()[name = string("op_1529"), val = tensor([1, 8, 128, 64])]; + tensor var_1530 = reshape(shape = var_1529, x = value_states_37)[name = string("op_1530")]; + tensor var_1531 = const()[name = string("op_1531"), val = tensor([0, 1, 3, 2])]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_169 = transpose(perm = var_1523, x = var_1522)[name = string("transpose_19")]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; + tensor var_1549 = mul(x = x1_25, y = cos_7)[name = string("op_1549")]; + tensor var_1550 = mul(x = x2_25, y = sin_7)[name = string("op_1550")]; + tensor var_1551 = sub(x = var_1549, y = var_1550)[name = string("op_1551")]; + tensor var_1552 = mul(x = x2_25, y = cos_7)[name = string("op_1552")]; + tensor var_1553 = mul(x = x1_25, y = sin_7)[name = string("op_1553")]; + tensor var_1554 = add(x = var_1552, y = var_1553)[name = string("op_1554")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25 = concat(axis = var_55, interleave = rotated_25_interleave_0, values = (var_1551, var_1554))[name = string("rotated_25")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_173 = transpose(perm = var_1527, x = var_1526)[name = string("transpose_18")]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; + tensor var_1570 = mul(x = x1_27, y = cos_7)[name = string("op_1570")]; + tensor var_1571 = mul(x = x2_27, y = sin_7)[name = string("op_1571")]; + tensor var_1572 = sub(x = var_1570, y = var_1571)[name = string("op_1572")]; + tensor var_1573 = mul(x = x2_27, y = cos_7)[name = string("op_1573")]; + tensor var_1574 = mul(x = x1_27, y = sin_7)[name = string("op_1574")]; + tensor var_1575 = add(x = var_1573, y = var_1574)[name = string("op_1575")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27 = concat(axis = var_55, interleave = rotated_27_interleave_0, values = (var_1572, var_1575))[name = string("rotated_27")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([15])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([16])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_402, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([43])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([44])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_402, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_39 = transpose(perm = var_1531, x = var_1530)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; + tensor var_1598_begin_0 = const()[name = string("op_1598_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_1598_end_0 = const()[name = string("op_1598_end_0"), val = tensor([16, 8, 1024, 128])]; + tensor var_1598_end_mask_0 = const()[name = string("op_1598_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1598_cast_fp16 = slice_by_index(begin = var_1598_begin_0, end = var_1598_end_0, end_mask = var_1598_end_mask_0, x = coreml_update_state_31)[name = string("op_1598_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1598_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1600_begin_0 = const()[name = string("op_1600_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_1600_end_0 = const()[name = string("op_1600_end_0"), val = tensor([44, 8, 1024, 128])]; + tensor var_1600_end_mask_0 = const()[name = string("op_1600_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1600_cast_fp16 = slice_by_index(begin = var_1600_begin_0, end = var_1600_end_0, end_mask = var_1600_end_mask_0, x = coreml_update_state_31)[name = string("op_1600_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1600_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1609 = const()[name = string("op_1609"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1609, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1613 = const()[name = string("op_1613"), val = tensor([1, -1, 1024, 128])]; + tensor var_1614_cast_fp16 = reshape(shape = var_1613, x = x_181_cast_fp16)[name = string("op_1614_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1616 = const()[name = string("op_1616"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1616, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_1623_transpose_x_0 = const()[name = string("op_1623_transpose_x_0"), val = bool(false)]; + bool var_1623_transpose_y_0 = const()[name = string("op_1623_transpose_y_0"), val = bool(true)]; + tensor var_1623_cast_fp16 = matmul(transpose_x = var_1623_transpose_x_0, transpose_y = var_1623_transpose_y_0, x = rotated_25, y = var_1614_cast_fp16)[name = string("op_1623_cast_fp16")]; + fp16 var_1624_to_fp16 = const()[name = string("op_1624_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_1623_cast_fp16, y = var_1624_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1635_axes_0 = const()[name = string("op_1635_axes_0"), val = tensor([-1])]; + bool var_1635_keep_dims_0 = const()[name = string("op_1635_keep_dims_0"), val = bool(true)]; + tensor var_1635_cast_fp16 = reduce_sum(axes = var_1635_axes_0, keep_dims = var_1635_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1635_cast_fp16")]; + tensor var_1636_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1635_cast_fp16)[name = string("op_1636_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([24, 64, 1024])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1636_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([24, 1024, 128])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 24, 64, 128])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_1639_perm_0 = const()[name = string("op_1639_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1641 = const()[name = string("op_1641"), val = tensor([1, 64, 3072])]; + tensor var_1639_cast_fp16 = transpose(perm = var_1639_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_16")]; + tensor input_89_cast_fp16 = reshape(shape = var_1641, x = var_1639_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729281088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736359040))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1652_axes_0 = const()[name = string("op_1652_axes_0"), val = tensor([-1])]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736408256)))]; + tensor var_1652_cast_fp16 = layer_norm(axes = var_1652_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1652_cast_fp16")]; + tensor var_1659 = const()[name = string("op_1659"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1660 = transpose(perm = var_1659, x = var_1652_cast_fp16)[name = string("transpose_15")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1660)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1682_axes_0 = const()[name = string("op_1682_axes_0"), val = tensor([2])]; + tensor var_1682 = squeeze(axes = var_1682_axes_0, x = hidden_states_55)[name = string("op_1682")]; + tensor var_1683 = const()[name = string("op_1683"), val = tensor([0, 2, 1])]; + tensor var_1684 = transpose(perm = var_1683, x = var_1682)[name = string("transpose_14")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1684)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1692_axes_0 = const()[name = string("op_1692_axes_0"), val = tensor([-1])]; + tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736414464)))]; + tensor var_1692_cast_fp16 = layer_norm(axes = var_1692_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1692_cast_fp16")]; + tensor var_1696 = const()[name = string("op_1696"), val = tensor([0, 2, 1])]; + tensor var_1698_axes_0 = const()[name = string("op_1698_axes_0"), val = tensor([2])]; + tensor var_1697 = transpose(perm = var_1696, x = var_1692_cast_fp16)[name = string("transpose_13")]; + tensor var_1698 = expand_dims(axes = var_1698_axes_0, x = var_1697)[name = string("op_1698")]; + string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; + tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; + tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; + int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; + tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_1698)[name = string("query_states_29")]; + string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; + tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; + tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; + int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; + tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_1698)[name = string("key_states_43")]; + string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; + tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; + tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; + int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; + tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_1698)[name = string("value_states_43")]; + tensor var_1718 = const()[name = string("op_1718"), val = tensor([1, 24, 128, 64])]; + tensor var_1719 = reshape(shape = var_1718, x = query_states_29)[name = string("op_1719")]; + tensor var_1720 = const()[name = string("op_1720"), val = tensor([0, 1, 3, 2])]; + tensor var_1722 = const()[name = string("op_1722"), val = tensor([1, 8, 128, 64])]; + tensor var_1723 = reshape(shape = var_1722, x = key_states_43)[name = string("op_1723")]; + tensor var_1724 = const()[name = string("op_1724"), val = tensor([0, 1, 3, 2])]; + tensor var_1726 = const()[name = string("op_1726"), val = tensor([1, 8, 128, 64])]; + tensor var_1727 = reshape(shape = var_1726, x = value_states_43)[name = string("op_1727")]; + tensor var_1728 = const()[name = string("op_1728"), val = tensor([0, 1, 3, 2])]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_197 = transpose(perm = var_1720, x = var_1719)[name = string("transpose_12")]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; + tensor var_1746 = mul(x = x1_29, y = cos_7)[name = string("op_1746")]; + tensor var_1747 = mul(x = x2_29, y = sin_7)[name = string("op_1747")]; + tensor var_1748 = sub(x = var_1746, y = var_1747)[name = string("op_1748")]; + tensor var_1749 = mul(x = x2_29, y = cos_7)[name = string("op_1749")]; + tensor var_1750 = mul(x = x1_29, y = sin_7)[name = string("op_1750")]; + tensor var_1751 = add(x = var_1749, y = var_1750)[name = string("op_1751")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29 = concat(axis = var_55, interleave = rotated_29_interleave_0, values = (var_1748, var_1751))[name = string("rotated_29")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_201 = transpose(perm = var_1724, x = var_1723)[name = string("transpose_11")]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = x_201)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = x_201)[name = string("x2_31")]; + tensor var_1767 = mul(x = x1_31, y = cos_7)[name = string("op_1767")]; + tensor var_1768 = mul(x = x2_31, y = sin_7)[name = string("op_1768")]; + tensor var_1769 = sub(x = var_1767, y = var_1768)[name = string("op_1769")]; + tensor var_1770 = mul(x = x2_31, y = cos_7)[name = string("op_1770")]; + tensor var_1771 = mul(x = x1_31, y = sin_7)[name = string("op_1771")]; + tensor var_1772 = add(x = var_1770, y = var_1771)[name = string("op_1772")]; + bool rotated_31_interleave_0 = const()[name = string("rotated_31_interleave_0"), val = bool(false)]; + tensor rotated_31 = concat(axis = var_55, interleave = rotated_31_interleave_0, values = (var_1769, var_1772))[name = string("rotated_31")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([16])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([17])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_402, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_31, x = coreml_update_state_31)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([44])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([45])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_402, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_45 = transpose(perm = var_1728, x = var_1727)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_32)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; + tensor var_1795_begin_0 = const()[name = string("op_1795_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_1795_end_0 = const()[name = string("op_1795_end_0"), val = tensor([17, 8, 1024, 128])]; + tensor var_1795_end_mask_0 = const()[name = string("op_1795_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1795_cast_fp16 = slice_by_index(begin = var_1795_begin_0, end = var_1795_end_0, end_mask = var_1795_end_mask_0, x = coreml_update_state_33)[name = string("op_1795_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_1795_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_1797_begin_0 = const()[name = string("op_1797_begin_0"), val = tensor([44, 0, 0, 0])]; + tensor var_1797_end_0 = const()[name = string("op_1797_end_0"), val = tensor([45, 8, 1024, 128])]; + tensor var_1797_end_mask_0 = const()[name = string("op_1797_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1797_cast_fp16 = slice_by_index(begin = var_1797_begin_0, end = var_1797_end_0, end_mask = var_1797_end_mask_0, x = coreml_update_state_33)[name = string("op_1797_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_1797_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1806 = const()[name = string("op_1806"), val = tensor([1, 3, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1806, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1810 = const()[name = string("op_1810"), val = tensor([1, -1, 1024, 128])]; + tensor var_1811_cast_fp16 = reshape(shape = var_1810, x = x_209_cast_fp16)[name = string("op_1811_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1813 = const()[name = string("op_1813"), val = tensor([1, 3, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1813, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + bool var_1820_transpose_x_0 = const()[name = string("op_1820_transpose_x_0"), val = bool(false)]; + bool var_1820_transpose_y_0 = const()[name = string("op_1820_transpose_y_0"), val = bool(true)]; + tensor var_1820_cast_fp16 = matmul(transpose_x = var_1820_transpose_x_0, transpose_y = var_1820_transpose_y_0, x = rotated_29, y = var_1811_cast_fp16)[name = string("op_1820_cast_fp16")]; + fp16 var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_15_cast_fp16 = mul(x = var_1820_cast_fp16, y = var_1821_to_fp16)[name = string("attn_weights_15_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_15_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_15_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_15_cast_fp16")]; + tensor var_1832_axes_0 = const()[name = string("op_1832_axes_0"), val = tensor([-1])]; + bool var_1832_keep_dims_0 = const()[name = string("op_1832_keep_dims_0"), val = bool(true)]; + tensor var_1832_cast_fp16 = reduce_sum(axes = var_1832_axes_0, keep_dims = var_1832_keep_dims_0, x = exp_x_15_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor var_1833_cast_fp16 = real_div(x = exp_x_15_cast_fp16, y = var_1832_cast_fp16)[name = string("op_1833_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([24, 64, 1024])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1833_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([24, 1024, 128])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 24, 64, 128])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_1836_perm_0 = const()[name = string("op_1836_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1838 = const()[name = string("op_1838"), val = tensor([1, 64, 3072])]; + tensor var_1836_cast_fp16 = transpose(perm = var_1836_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_9")]; + tensor input_103_cast_fp16 = reshape(shape = var_1838, x = var_1836_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736420672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743498624))))[name = string("model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; + bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; + tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1849_axes_0 = const()[name = string("op_1849_axes_0"), val = tensor([-1])]; + tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743547840)))]; + tensor var_1849_cast_fp16 = layer_norm(axes = var_1849_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1849_cast_fp16")]; + tensor var_1856 = const()[name = string("op_1856"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1857 = transpose(perm = var_1856, x = var_1849_cast_fp16)[name = string("transpose_8")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1857)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_15_pad_type_0 = const()[name = string("up_states_15_pad_type_0"), val = string("valid")]; + tensor up_states_15_strides_0 = const()[name = string("up_states_15_strides_0"), val = tensor([1, 1])]; + tensor up_states_15_pad_0 = const()[name = string("up_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_15_dilations_0 = const()[name = string("up_states_15_dilations_0"), val = tensor([1, 1])]; + int32 up_states_15_groups_0 = const()[name = string("up_states_15_groups_0"), val = int32(1)]; + tensor up_states_15 = conv(dilations = up_states_15_dilations_0, groups = up_states_15_groups_0, pad = up_states_15_pad_0, pad_type = up_states_15_pad_type_0, strides = up_states_15_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states_15")]; + tensor gate_states_15 = silu(x = input_109)[name = string("gate_states_15")]; + tensor input_111 = mul(x = gate_states_15, y = up_states_15)[name = string("input_111")]; + string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; + tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; + tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; + tensor var_1879_axes_0 = const()[name = string("op_1879_axes_0"), val = tensor([2])]; + tensor var_1879 = squeeze(axes = var_1879_axes_0, x = hidden_states_63)[name = string("op_1879")]; + tensor var_1880 = const()[name = string("op_1880"), val = tensor([0, 2, 1])]; + tensor var_1881 = transpose(perm = var_1880, x = var_1879)[name = string("transpose_7")]; + tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1881)[name = string("hidden_states_65_cast_fp16")]; + tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; + bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; + tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_33_cast_fp16")]; + tensor input_113_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_33_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor var_1889_axes_0 = const()[name = string("op_1889_axes_0"), val = tensor([-1])]; + tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743554048)))]; + tensor var_1889_cast_fp16 = layer_norm(axes = var_1889_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_113_cast_fp16)[name = string("op_1889_cast_fp16")]; + tensor var_1893 = const()[name = string("op_1893"), val = tensor([0, 2, 1])]; + tensor var_1895_axes_0 = const()[name = string("op_1895_axes_0"), val = tensor([2])]; + tensor var_1894 = transpose(perm = var_1893, x = var_1889_cast_fp16)[name = string("transpose_6")]; + tensor var_1895 = expand_dims(axes = var_1895_axes_0, x = var_1894)[name = string("op_1895")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_1895)[name = string("query_states_33")]; + string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; + tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; + tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; + int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; + tensor key_states_49 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_1895)[name = string("key_states_49")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_1895)[name = string("value_states_49")]; + tensor var_1915 = const()[name = string("op_1915"), val = tensor([1, 24, 128, 64])]; + tensor var_1916 = reshape(shape = var_1915, x = query_states_33)[name = string("op_1916")]; + tensor var_1917 = const()[name = string("op_1917"), val = tensor([0, 1, 3, 2])]; + tensor var_1919 = const()[name = string("op_1919"), val = tensor([1, 8, 128, 64])]; + tensor var_1920 = reshape(shape = var_1919, x = key_states_49)[name = string("op_1920")]; + tensor var_1921 = const()[name = string("op_1921"), val = tensor([0, 1, 3, 2])]; + tensor var_1923 = const()[name = string("op_1923"), val = tensor([1, 8, 128, 64])]; + tensor var_1924 = reshape(shape = var_1923, x = value_states_49)[name = string("op_1924")]; + tensor var_1925 = const()[name = string("op_1925"), val = tensor([0, 1, 3, 2])]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_225 = transpose(perm = var_1917, x = var_1916)[name = string("transpose_5")]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = x_225)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = x_225)[name = string("x2_33")]; + tensor var_1943 = mul(x = x1_33, y = cos_7)[name = string("op_1943")]; + tensor var_1944 = mul(x = x2_33, y = sin_7)[name = string("op_1944")]; + tensor var_1945 = sub(x = var_1943, y = var_1944)[name = string("op_1945")]; + tensor var_1946 = mul(x = x2_33, y = cos_7)[name = string("op_1946")]; + tensor var_1947 = mul(x = x1_33, y = sin_7)[name = string("op_1947")]; + tensor var_1948 = add(x = var_1946, y = var_1947)[name = string("op_1948")]; + bool rotated_33_interleave_0 = const()[name = string("rotated_33_interleave_0"), val = bool(false)]; + tensor rotated_33 = concat(axis = var_55, interleave = rotated_33_interleave_0, values = (var_1945, var_1948))[name = string("rotated_33")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_229 = transpose(perm = var_1921, x = var_1920)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_229)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_229)[name = string("x2")]; + tensor var_1964 = mul(x = x1, y = cos_7)[name = string("op_1964")]; + tensor var_1965 = mul(x = x2, y = sin_7)[name = string("op_1965")]; + tensor var_1966 = sub(x = var_1964, y = var_1965)[name = string("op_1966")]; + tensor var_1967 = mul(x = x2, y = cos_7)[name = string("op_1967")]; + tensor var_1968 = mul(x = x1, y = sin_7)[name = string("op_1968")]; + tensor var_1969 = add(x = var_1967, y = var_1968)[name = string("op_1969")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_55, interleave = rotated_interleave_0, values = (var_1966, var_1969))[name = string("rotated")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([17])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([18])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_402, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = rotated, x = coreml_update_state_33)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([45])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([46])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_402, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_51 = transpose(perm = var_1925, x = var_1924)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_51, x = coreml_update_state_34)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; + tensor var_1992_begin_0 = const()[name = string("op_1992_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_1992_end_0 = const()[name = string("op_1992_end_0"), val = tensor([18, 8, 1024, 128])]; + tensor var_1992_end_mask_0 = const()[name = string("op_1992_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1992_cast_fp16 = slice_by_index(begin = var_1992_begin_0, end = var_1992_end_0, end_mask = var_1992_end_mask_0, x = coreml_update_state_35)[name = string("op_1992_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1992_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1994_begin_0 = const()[name = string("op_1994_begin_0"), val = tensor([45, 0, 0, 0])]; + tensor var_1994_end_0 = const()[name = string("op_1994_end_0"), val = tensor([46, 8, 1024, 128])]; + tensor var_1994_end_mask_0 = const()[name = string("op_1994_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1994_cast_fp16 = slice_by_index(begin = var_1994_begin_0, end = var_1994_end_0, end_mask = var_1994_end_mask_0, x = coreml_update_state_35)[name = string("op_1994_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1994_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_235_axes_0 = const()[name = string("x_235_axes_0"), val = tensor([1])]; + tensor x_235_cast_fp16 = expand_dims(axes = x_235_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_2003 = const()[name = string("op_2003"), val = tensor([1, 3, 1, 1])]; + tensor x_237_cast_fp16 = tile(reps = var_2003, x = x_235_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor var_2007 = const()[name = string("op_2007"), val = tensor([1, -1, 1024, 128])]; + tensor var_2008_cast_fp16 = reshape(shape = var_2007, x = x_237_cast_fp16)[name = string("op_2008_cast_fp16")]; + tensor x_241_axes_0 = const()[name = string("x_241_axes_0"), val = tensor([1])]; + tensor x_241_cast_fp16 = expand_dims(axes = x_241_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2010 = const()[name = string("op_2010"), val = tensor([1, 3, 1, 1])]; + tensor x_243_cast_fp16 = tile(reps = var_2010, x = x_241_cast_fp16)[name = string("x_243_cast_fp16")]; + bool var_2017_transpose_x_0 = const()[name = string("op_2017_transpose_x_0"), val = bool(false)]; + bool var_2017_transpose_y_0 = const()[name = string("op_2017_transpose_y_0"), val = bool(true)]; + tensor var_2017_cast_fp16 = matmul(transpose_x = var_2017_transpose_x_0, transpose_y = var_2017_transpose_y_0, x = rotated_33, y = var_2008_cast_fp16)[name = string("op_2017_cast_fp16")]; + fp16 var_2018_to_fp16 = const()[name = string("op_2018_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_cast_fp16 = mul(x = var_2017_cast_fp16, y = var_2018_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_245_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_245_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_245_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + tensor x_247_cast_fp16 = sub(x = x_245_cast_fp16, y = reduce_max_8_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_247_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_2029_axes_0 = const()[name = string("op_2029_axes_0"), val = tensor([-1])]; + bool var_2029_keep_dims_0 = const()[name = string("op_2029_keep_dims_0"), val = bool(true)]; + tensor var_2029_cast_fp16 = reduce_sum(axes = var_2029_axes_0, keep_dims = var_2029_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_2029_cast_fp16")]; + tensor var_2030_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_2029_cast_fp16)[name = string("op_2030_cast_fp16")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([24, 64, 1024])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_2030_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([24, 1024, 128])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_243_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 24, 64, 128])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_2033_perm_0 = const()[name = string("op_2033_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2035 = const()[name = string("op_2035"), val = tensor([1, 64, 3072])]; + tensor var_2033_cast_fp16 = transpose(perm = var_2033_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_2")]; + tensor input_117_cast_fp16 = reshape(shape = var_2035, x = var_2033_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743560256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750638208))))[name = string("model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = linear_8_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_69_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_119_cast_fp16 = sub(x = hidden_states_69_cast_fp16, y = mean_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor var_2046_axes_0 = const()[name = string("op_2046_axes_0"), val = tensor([-1])]; + tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(750687424)))]; + tensor var_2046_cast_fp16 = layer_norm(axes = var_2046_axes_0, epsilon = var_57_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_2046_cast_fp16")]; + tensor var_2053 = const()[name = string("op_2053"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_2054 = transpose(perm = var_2053, x = var_2046_cast_fp16)[name = string("transpose_1")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_2054)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_states")]; + tensor gate_states = silu(x = input_123)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_2076_axes_0 = const()[name = string("op_2076_axes_0"), val = tensor([2])]; + tensor var_2076 = squeeze(axes = var_2076_axes_0, x = hidden_states_1)[name = string("op_2076")]; + tensor var_2077 = const()[name = string("op_2077"), val = tensor([0, 2, 1])]; + tensor var_2078 = transpose(perm = var_2077, x = var_2076)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_69_cast_fp16, y = var_2078)[name = string("op_2079_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file