diff --git "a/llama_FFN_PF_lut8_chunk_01of04.mlmodelc/model.mil" "b/llama_FFN_PF_lut8_chunk_01of04.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/llama_FFN_PF_lut8_chunk_01of04.mlmodelc/model.mil" @@ -0,0 +1,2893 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9633984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12779776))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12845376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15991168))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16056768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41222656))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41747008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66912896))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67437248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92603136))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92799808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102237056))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102433728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105579520))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105645120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108790912))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108856512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134022400))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134546752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159712640))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160236992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185402880))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185599552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195036800))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195233472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198379264))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198444864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201590656))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201656256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226822144))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227346496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252512384))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253036736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278202624))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278399296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287836544))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288033216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179008))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291244608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294390400))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294456000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319621888))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320146240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345312128))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371002368))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371199040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380636288))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380832960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383978752))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384044352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387190144))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387255744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412421632))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412945984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438111872))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438636224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463802112))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473436032))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473632704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476778496))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476844096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479989888))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505221376))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505745728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530911616))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531435968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556601856))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556798528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566235776))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566432448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569578240))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569643840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572789632))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572855232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598021120))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598545472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623711360))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624235712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649401600))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + int32 var_47 = const()[name = string("op_47"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_231_axis_0 = const()[name = string("op_231_axis_0"), val = int32(1)]; + int32 var_231_batch_dims_0 = const()[name = string("op_231_batch_dims_0"), val = int32(0)]; + bool var_231_validate_indices_0 = const()[name = string("op_231_validate_indices_0"), val = bool(false)]; + tensor var_52_to_fp16 = const()[name = string("op_52_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649598272)))]; + tensor var_231_cast_fp16 = gather(axis = var_231_axis_0, batch_dims = var_231_batch_dims_0, indices = select_0, validate_indices = var_231_validate_indices_0, x = var_52_to_fp16)[name = string("op_231_cast_fp16")]; + tensor var_232 = const()[name = string("op_232"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_232, x = var_231_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(1)]; + int32 var_236_batch_dims_0 = const()[name = string("op_236_batch_dims_0"), val = int32(0)]; + bool var_236_validate_indices_0 = const()[name = string("op_236_validate_indices_0"), val = bool(false)]; + tensor var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(683152768)))]; + tensor var_236_cast_fp16 = gather(axis = var_236_axis_0, batch_dims = var_236_batch_dims_0, indices = select_0, validate_indices = var_236_validate_indices_0, x = var_46_to_fp16)[name = string("op_236_cast_fp16")]; + tensor var_237 = const()[name = string("op_237"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_237, x = var_236_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_245_axes_0 = const()[name = string("op_245_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716707264)))]; + fp16 var_42_to_fp16 = const()[name = string("op_42_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_245_cast_fp16 = layer_norm(axes = var_245_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_245_cast_fp16")]; + tensor var_248 = const()[name = string("op_248"), val = tensor([0, 2, 1])]; + tensor var_250_axes_0 = const()[name = string("op_250_axes_0"), val = tensor([2])]; + tensor var_249 = transpose(perm = var_248, x = var_245_cast_fp16)[name = string("transpose_27")]; + tensor var_250 = expand_dims(axes = var_250_axes_0, x = var_249)[name = string("op_250")]; + string var_257_pad_type_0 = const()[name = string("op_257_pad_type_0"), val = string("valid")]; + tensor var_257_strides_0 = const()[name = string("op_257_strides_0"), val = tensor([1, 1])]; + tensor var_257_pad_0 = const()[name = string("op_257_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_257_dilations_0 = const()[name = string("op_257_dilations_0"), val = tensor([1, 1])]; + int32 var_257_groups_0 = const()[name = string("op_257_groups_0"), val = int32(1)]; + tensor var_257 = conv(dilations = var_257_dilations_0, groups = var_257_groups_0, pad = var_257_pad_0, pad_type = var_257_pad_type_0, strides = var_257_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_250)[name = string("op_257")]; + tensor var_258 = const()[name = string("op_258"), val = tensor([1, 24, 1, 128])]; + tensor var_259 = reshape(shape = var_258, x = var_257)[name = string("op_259")]; + string var_266_pad_type_0 = const()[name = string("op_266_pad_type_0"), val = string("valid")]; + tensor var_266_strides_0 = const()[name = string("op_266_strides_0"), val = tensor([1, 1])]; + tensor var_266_pad_0 = const()[name = string("op_266_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_266_dilations_0 = const()[name = string("op_266_dilations_0"), val = tensor([1, 1])]; + int32 var_266_groups_0 = const()[name = string("op_266_groups_0"), val = int32(1)]; + tensor var_266 = conv(dilations = var_266_dilations_0, groups = var_266_groups_0, pad = var_266_pad_0, pad_type = var_266_pad_type_0, strides = var_266_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_250)[name = string("op_266")]; + tensor var_267 = const()[name = string("op_267"), val = tensor([1, 8, 1, 128])]; + tensor var_268 = reshape(shape = var_267, x = var_266)[name = string("op_268")]; + string var_275_pad_type_0 = const()[name = string("op_275_pad_type_0"), val = string("valid")]; + tensor var_275_strides_0 = const()[name = string("op_275_strides_0"), val = tensor([1, 1])]; + tensor var_275_pad_0 = const()[name = string("op_275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_275_dilations_0 = const()[name = string("op_275_dilations_0"), val = tensor([1, 1])]; + int32 var_275_groups_0 = const()[name = string("op_275_groups_0"), val = int32(1)]; + tensor var_275 = conv(dilations = var_275_dilations_0, groups = var_275_groups_0, pad = var_275_pad_0, pad_type = var_275_pad_type_0, strides = var_275_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_250)[name = string("op_275")]; + tensor var_276 = const()[name = string("op_276"), val = tensor([1, 8, 1, 128])]; + tensor var_277 = reshape(shape = var_276, x = var_275)[name = string("op_277")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_259)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_259)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_291_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_291_cast_fp16")]; + tensor var_292_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_292_cast_fp16")]; + tensor var_293_cast_fp16 = sub(x = var_291_cast_fp16, y = var_292_cast_fp16)[name = string("op_293_cast_fp16")]; + tensor var_294_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_294_cast_fp16")]; + tensor var_295_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_295_cast_fp16")]; + tensor var_296_cast_fp16 = add(x = var_294_cast_fp16, y = var_295_cast_fp16)[name = string("op_296_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_47, interleave = rotated_1_interleave_0, values = (var_293_cast_fp16, var_296_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_268)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_268)[name = string("x2_3")]; + tensor var_312_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_312_cast_fp16")]; + tensor var_313_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_313_cast_fp16")]; + tensor var_314_cast_fp16 = sub(x = var_312_cast_fp16, y = var_313_cast_fp16)[name = string("op_314_cast_fp16")]; + tensor var_315_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_315_cast_fp16")]; + tensor var_316_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_316_cast_fp16")]; + tensor var_317_cast_fp16 = add(x = var_315_cast_fp16, y = var_316_cast_fp16)[name = string("op_317_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_47, interleave = rotated_3_interleave_0, values = (var_314_cast_fp16, var_317_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_321 = const()[name = string("op_321"), val = int32(1)]; + tensor var_322 = add(x = current_pos, y = var_321)[name = string("op_322")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_322, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_322, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_277, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_337_begin_0 = const()[name = string("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = string("op_337_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_337_end_mask_0 = const()[name = string("op_337_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = coreml_update_state_15)[name = string("op_337_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_337_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_339_begin_0 = const()[name = string("op_339_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_339_end_0 = const()[name = string("op_339_end_0"), val = tensor([29, 8, 1024, 128])]; + tensor var_339_end_mask_0 = const()[name = string("op_339_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = coreml_update_state_15)[name = string("op_339_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_339_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_348 = const()[name = string("op_348"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_348, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_352 = const()[name = string("op_352"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_352, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_355 = const()[name = string("op_355"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_355, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_359 = const()[name = string("op_359"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_359, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_362_transpose_x_1 = const()[name = string("op_362_transpose_x_1"), val = bool(false)]; + bool var_362_transpose_y_1 = const()[name = string("op_362_transpose_y_1"), val = bool(true)]; + tensor var_362_cast_fp16 = matmul(transpose_x = var_362_transpose_x_1, transpose_y = var_362_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_362_cast_fp16")]; + fp16 var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_362_cast_fp16, y = var_363_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_374_axes_0 = const()[name = string("op_374_axes_0"), val = tensor([-1])]; + bool var_374_keep_dims_0 = const()[name = string("op_374_keep_dims_0"), val = bool(true)]; + tensor var_374_cast_fp16 = reduce_sum(axes = var_374_axes_0, keep_dims = var_374_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_374_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_374_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_377_perm_0 = const()[name = string("op_377_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_379 = const()[name = string("op_379"), val = tensor([1, 1, 3072])]; + tensor var_377_cast_fp16 = transpose(perm = var_377_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_26")]; + tensor input_5_cast_fp16 = reshape(shape = var_379, x = var_377_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726150720))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726347392)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_390_axes_0 = const()[name = string("op_390_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726353600)))]; + tensor var_390_cast_fp16 = layer_norm(axes = var_390_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_390_cast_fp16")]; + tensor var_397 = const()[name = string("op_397"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_398 = transpose(perm = var_397, x = var_390_cast_fp16)[name = string("transpose_25")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_398)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_420_axes_0 = const()[name = string("op_420_axes_0"), val = tensor([2])]; + tensor var_420 = squeeze(axes = var_420_axes_0, x = hidden_states_7)[name = string("op_420")]; + tensor var_421 = const()[name = string("op_421"), val = tensor([0, 2, 1])]; + tensor var_422 = transpose(perm = var_421, x = var_420)[name = string("transpose_24")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_422)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_430_axes_0 = const()[name = string("op_430_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726359808)))]; + tensor var_430_cast_fp16 = layer_norm(axes = var_430_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_430_cast_fp16")]; + tensor var_433 = const()[name = string("op_433"), val = tensor([0, 2, 1])]; + tensor var_435_axes_0 = const()[name = string("op_435_axes_0"), val = tensor([2])]; + tensor var_434 = transpose(perm = var_433, x = var_430_cast_fp16)[name = string("transpose_23")]; + tensor var_435 = expand_dims(axes = var_435_axes_0, x = var_434)[name = string("op_435")]; + string var_442_pad_type_0 = const()[name = string("op_442_pad_type_0"), val = string("valid")]; + tensor var_442_strides_0 = const()[name = string("op_442_strides_0"), val = tensor([1, 1])]; + tensor var_442_pad_0 = const()[name = string("op_442_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_442_dilations_0 = const()[name = string("op_442_dilations_0"), val = tensor([1, 1])]; + int32 var_442_groups_0 = const()[name = string("op_442_groups_0"), val = int32(1)]; + tensor var_442 = conv(dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_435)[name = string("op_442")]; + tensor var_443 = const()[name = string("op_443"), val = tensor([1, 24, 1, 128])]; + tensor var_444 = reshape(shape = var_443, x = var_442)[name = string("op_444")]; + string var_451_pad_type_0 = const()[name = string("op_451_pad_type_0"), val = string("valid")]; + tensor var_451_strides_0 = const()[name = string("op_451_strides_0"), val = tensor([1, 1])]; + tensor var_451_pad_0 = const()[name = string("op_451_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_451_dilations_0 = const()[name = string("op_451_dilations_0"), val = tensor([1, 1])]; + int32 var_451_groups_0 = const()[name = string("op_451_groups_0"), val = int32(1)]; + tensor var_451 = conv(dilations = var_451_dilations_0, groups = var_451_groups_0, pad = var_451_pad_0, pad_type = var_451_pad_type_0, strides = var_451_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_435)[name = string("op_451")]; + tensor var_452 = const()[name = string("op_452"), val = tensor([1, 8, 1, 128])]; + tensor var_453 = reshape(shape = var_452, x = var_451)[name = string("op_453")]; + string var_460_pad_type_0 = const()[name = string("op_460_pad_type_0"), val = string("valid")]; + tensor var_460_strides_0 = const()[name = string("op_460_strides_0"), val = tensor([1, 1])]; + tensor var_460_pad_0 = const()[name = string("op_460_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_460_dilations_0 = const()[name = string("op_460_dilations_0"), val = tensor([1, 1])]; + int32 var_460_groups_0 = const()[name = string("op_460_groups_0"), val = int32(1)]; + tensor var_460 = conv(dilations = var_460_dilations_0, groups = var_460_groups_0, pad = var_460_pad_0, pad_type = var_460_pad_type_0, strides = var_460_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_435)[name = string("op_460")]; + tensor var_461 = const()[name = string("op_461"), val = tensor([1, 8, 1, 128])]; + tensor var_462 = reshape(shape = var_461, x = var_460)[name = string("op_462")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_444)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_444)[name = string("x2_5")]; + tensor var_476_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_476_cast_fp16")]; + tensor var_477_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_477_cast_fp16")]; + tensor var_478_cast_fp16 = sub(x = var_476_cast_fp16, y = var_477_cast_fp16)[name = string("op_478_cast_fp16")]; + tensor var_479_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor var_480_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_480_cast_fp16")]; + tensor var_481_cast_fp16 = add(x = var_479_cast_fp16, y = var_480_cast_fp16)[name = string("op_481_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_47, interleave = rotated_5_interleave_0, values = (var_478_cast_fp16, var_481_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_453)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_453)[name = string("x2_7")]; + tensor var_497_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_497_cast_fp16")]; + tensor var_498_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_498_cast_fp16")]; + tensor var_499_cast_fp16 = sub(x = var_497_cast_fp16, y = var_498_cast_fp16)[name = string("op_499_cast_fp16")]; + tensor var_500_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_500_cast_fp16")]; + tensor var_501_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_501_cast_fp16")]; + tensor var_502_cast_fp16 = add(x = var_500_cast_fp16, y = var_501_cast_fp16)[name = string("op_502_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_47, interleave = rotated_7_interleave_0, values = (var_499_cast_fp16, var_502_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_322, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_322, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_462, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_522_begin_0 = const()[name = string("op_522_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_522_end_0 = const()[name = string("op_522_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_522_end_mask_0 = const()[name = string("op_522_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = coreml_update_state_17)[name = string("op_522_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_522_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_524_begin_0 = const()[name = string("op_524_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_524_end_0 = const()[name = string("op_524_end_0"), val = tensor([30, 8, 1024, 128])]; + tensor var_524_end_mask_0 = const()[name = string("op_524_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_524_cast_fp16 = slice_by_index(begin = var_524_begin_0, end = var_524_end_0, end_mask = var_524_end_mask_0, x = coreml_update_state_17)[name = string("op_524_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_524_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_533 = const()[name = string("op_533"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_533, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_537 = const()[name = string("op_537"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_537, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_540 = const()[name = string("op_540"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_540, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_544 = const()[name = string("op_544"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_544, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_547_transpose_x_1 = const()[name = string("op_547_transpose_x_1"), val = bool(false)]; + bool var_547_transpose_y_1 = const()[name = string("op_547_transpose_y_1"), val = bool(true)]; + tensor var_547_cast_fp16 = matmul(transpose_x = var_547_transpose_x_1, transpose_y = var_547_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_547_cast_fp16")]; + fp16 var_548_to_fp16 = const()[name = string("op_548_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_559_axes_0 = const()[name = string("op_559_axes_0"), val = tensor([-1])]; + bool var_559_keep_dims_0 = const()[name = string("op_559_keep_dims_0"), val = bool(true)]; + tensor var_559_cast_fp16 = reduce_sum(axes = var_559_axes_0, keep_dims = var_559_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_559_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_562_perm_0 = const()[name = string("op_562_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_564 = const()[name = string("op_564"), val = tensor([1, 1, 3072])]; + tensor var_562_cast_fp16 = transpose(perm = var_562_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_22")]; + tensor input_19_cast_fp16 = reshape(shape = var_564, x = var_562_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726366016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735803264))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_575_axes_0 = const()[name = string("op_575_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735999936)))]; + tensor var_575_cast_fp16 = layer_norm(axes = var_575_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_575_cast_fp16")]; + tensor var_582 = const()[name = string("op_582"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_583 = transpose(perm = var_582, x = var_575_cast_fp16)[name = string("transpose_21")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_583)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor([2])]; + tensor var_605 = squeeze(axes = var_605_axes_0, x = hidden_states_15)[name = string("op_605")]; + tensor var_606 = const()[name = string("op_606"), val = tensor([0, 2, 1])]; + tensor var_607 = transpose(perm = var_606, x = var_605)[name = string("transpose_20")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_607)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_615_axes_0 = const()[name = string("op_615_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736006144)))]; + tensor var_615_cast_fp16 = layer_norm(axes = var_615_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_615_cast_fp16")]; + tensor var_618 = const()[name = string("op_618"), val = tensor([0, 2, 1])]; + tensor var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor([2])]; + tensor var_619 = transpose(perm = var_618, x = var_615_cast_fp16)[name = string("transpose_19")]; + tensor var_620 = expand_dims(axes = var_620_axes_0, x = var_619)[name = string("op_620")]; + string var_627_pad_type_0 = const()[name = string("op_627_pad_type_0"), val = string("valid")]; + tensor var_627_strides_0 = const()[name = string("op_627_strides_0"), val = tensor([1, 1])]; + tensor var_627_pad_0 = const()[name = string("op_627_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_627_dilations_0 = const()[name = string("op_627_dilations_0"), val = tensor([1, 1])]; + int32 var_627_groups_0 = const()[name = string("op_627_groups_0"), val = int32(1)]; + tensor var_627 = conv(dilations = var_627_dilations_0, groups = var_627_groups_0, pad = var_627_pad_0, pad_type = var_627_pad_type_0, strides = var_627_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_620)[name = string("op_627")]; + tensor var_628 = const()[name = string("op_628"), val = tensor([1, 24, 1, 128])]; + tensor var_629 = reshape(shape = var_628, x = var_627)[name = string("op_629")]; + string var_636_pad_type_0 = const()[name = string("op_636_pad_type_0"), val = string("valid")]; + tensor var_636_strides_0 = const()[name = string("op_636_strides_0"), val = tensor([1, 1])]; + tensor var_636_pad_0 = const()[name = string("op_636_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_636_dilations_0 = const()[name = string("op_636_dilations_0"), val = tensor([1, 1])]; + int32 var_636_groups_0 = const()[name = string("op_636_groups_0"), val = int32(1)]; + tensor var_636 = conv(dilations = var_636_dilations_0, groups = var_636_groups_0, pad = var_636_pad_0, pad_type = var_636_pad_type_0, strides = var_636_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_620)[name = string("op_636")]; + tensor var_637 = const()[name = string("op_637"), val = tensor([1, 8, 1, 128])]; + tensor var_638 = reshape(shape = var_637, x = var_636)[name = string("op_638")]; + string var_645_pad_type_0 = const()[name = string("op_645_pad_type_0"), val = string("valid")]; + tensor var_645_strides_0 = const()[name = string("op_645_strides_0"), val = tensor([1, 1])]; + tensor var_645_pad_0 = const()[name = string("op_645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_645_dilations_0 = const()[name = string("op_645_dilations_0"), val = tensor([1, 1])]; + int32 var_645_groups_0 = const()[name = string("op_645_groups_0"), val = int32(1)]; + tensor var_645 = conv(dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_620)[name = string("op_645")]; + tensor var_646 = const()[name = string("op_646"), val = tensor([1, 8, 1, 128])]; + tensor var_647 = reshape(shape = var_646, x = var_645)[name = string("op_647")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_629)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_629)[name = string("x2_9")]; + tensor var_661_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor var_662_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_662_cast_fp16")]; + tensor var_663_cast_fp16 = sub(x = var_661_cast_fp16, y = var_662_cast_fp16)[name = string("op_663_cast_fp16")]; + tensor var_664_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_664_cast_fp16")]; + tensor var_665_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_665_cast_fp16")]; + tensor var_666_cast_fp16 = add(x = var_664_cast_fp16, y = var_665_cast_fp16)[name = string("op_666_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_47, interleave = rotated_9_interleave_0, values = (var_663_cast_fp16, var_666_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_638)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_638)[name = string("x2_11")]; + tensor var_682_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_682_cast_fp16")]; + tensor var_683_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_683_cast_fp16")]; + tensor var_684_cast_fp16 = sub(x = var_682_cast_fp16, y = var_683_cast_fp16)[name = string("op_684_cast_fp16")]; + tensor var_685_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_685_cast_fp16")]; + tensor var_686_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_686_cast_fp16")]; + tensor var_687_cast_fp16 = add(x = var_685_cast_fp16, y = var_686_cast_fp16)[name = string("op_687_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_47, interleave = rotated_11_interleave_0, values = (var_684_cast_fp16, var_687_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_322, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_322, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_647, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_707_begin_0 = const()[name = string("op_707_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_707_end_0 = const()[name = string("op_707_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_707_end_mask_0 = const()[name = string("op_707_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_707_cast_fp16 = slice_by_index(begin = var_707_begin_0, end = var_707_end_0, end_mask = var_707_end_mask_0, x = coreml_update_state_19)[name = string("op_707_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_707_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_709_begin_0 = const()[name = string("op_709_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_709_end_0 = const()[name = string("op_709_end_0"), val = tensor([31, 8, 1024, 128])]; + tensor var_709_end_mask_0 = const()[name = string("op_709_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_709_cast_fp16 = slice_by_index(begin = var_709_begin_0, end = var_709_end_0, end_mask = var_709_end_mask_0, x = coreml_update_state_19)[name = string("op_709_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_709_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_718 = const()[name = string("op_718"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_718, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_722 = const()[name = string("op_722"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_722, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_725 = const()[name = string("op_725"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_725, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_729 = const()[name = string("op_729"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_729, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_732_transpose_x_1 = const()[name = string("op_732_transpose_x_1"), val = bool(false)]; + bool var_732_transpose_y_1 = const()[name = string("op_732_transpose_y_1"), val = bool(true)]; + tensor var_732_cast_fp16 = matmul(transpose_x = var_732_transpose_x_1, transpose_y = var_732_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_732_cast_fp16")]; + fp16 var_733_to_fp16 = const()[name = string("op_733_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_732_cast_fp16, y = var_733_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_744_axes_0 = const()[name = string("op_744_axes_0"), val = tensor([-1])]; + bool var_744_keep_dims_0 = const()[name = string("op_744_keep_dims_0"), val = bool(true)]; + tensor var_744_cast_fp16 = reduce_sum(axes = var_744_axes_0, keep_dims = var_744_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_744_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_744_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_747_perm_0 = const()[name = string("op_747_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_749 = const()[name = string("op_749"), val = tensor([1, 1, 3072])]; + tensor var_747_cast_fp16 = transpose(perm = var_747_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_18")]; + tensor input_33_cast_fp16 = reshape(shape = var_749, x = var_747_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736012352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745449600))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745646272)))]; + tensor var_760_cast_fp16 = layer_norm(axes = var_760_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor var_767 = const()[name = string("op_767"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_768 = transpose(perm = var_767, x = var_760_cast_fp16)[name = string("transpose_17")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_768)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_790_axes_0 = const()[name = string("op_790_axes_0"), val = tensor([2])]; + tensor var_790 = squeeze(axes = var_790_axes_0, x = hidden_states_23)[name = string("op_790")]; + tensor var_791 = const()[name = string("op_791"), val = tensor([0, 2, 1])]; + tensor var_792 = transpose(perm = var_791, x = var_790)[name = string("transpose_16")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_792)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_800_axes_0 = const()[name = string("op_800_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745652480)))]; + tensor var_800_cast_fp16 = layer_norm(axes = var_800_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_800_cast_fp16")]; + tensor var_803 = const()[name = string("op_803"), val = tensor([0, 2, 1])]; + tensor var_805_axes_0 = const()[name = string("op_805_axes_0"), val = tensor([2])]; + tensor var_804 = transpose(perm = var_803, x = var_800_cast_fp16)[name = string("transpose_15")]; + tensor var_805 = expand_dims(axes = var_805_axes_0, x = var_804)[name = string("op_805")]; + string var_812_pad_type_0 = const()[name = string("op_812_pad_type_0"), val = string("valid")]; + tensor var_812_strides_0 = const()[name = string("op_812_strides_0"), val = tensor([1, 1])]; + tensor var_812_pad_0 = const()[name = string("op_812_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_812_dilations_0 = const()[name = string("op_812_dilations_0"), val = tensor([1, 1])]; + int32 var_812_groups_0 = const()[name = string("op_812_groups_0"), val = int32(1)]; + tensor var_812 = conv(dilations = var_812_dilations_0, groups = var_812_groups_0, pad = var_812_pad_0, pad_type = var_812_pad_type_0, strides = var_812_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_805)[name = string("op_812")]; + tensor var_813 = const()[name = string("op_813"), val = tensor([1, 24, 1, 128])]; + tensor var_814 = reshape(shape = var_813, x = var_812)[name = string("op_814")]; + string var_821_pad_type_0 = const()[name = string("op_821_pad_type_0"), val = string("valid")]; + tensor var_821_strides_0 = const()[name = string("op_821_strides_0"), val = tensor([1, 1])]; + tensor var_821_pad_0 = const()[name = string("op_821_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_821_dilations_0 = const()[name = string("op_821_dilations_0"), val = tensor([1, 1])]; + int32 var_821_groups_0 = const()[name = string("op_821_groups_0"), val = int32(1)]; + tensor var_821 = conv(dilations = var_821_dilations_0, groups = var_821_groups_0, pad = var_821_pad_0, pad_type = var_821_pad_type_0, strides = var_821_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_805)[name = string("op_821")]; + tensor var_822 = const()[name = string("op_822"), val = tensor([1, 8, 1, 128])]; + tensor var_823 = reshape(shape = var_822, x = var_821)[name = string("op_823")]; + string var_830_pad_type_0 = const()[name = string("op_830_pad_type_0"), val = string("valid")]; + tensor var_830_strides_0 = const()[name = string("op_830_strides_0"), val = tensor([1, 1])]; + tensor var_830_pad_0 = const()[name = string("op_830_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_830_dilations_0 = const()[name = string("op_830_dilations_0"), val = tensor([1, 1])]; + int32 var_830_groups_0 = const()[name = string("op_830_groups_0"), val = int32(1)]; + tensor var_830 = conv(dilations = var_830_dilations_0, groups = var_830_groups_0, pad = var_830_pad_0, pad_type = var_830_pad_type_0, strides = var_830_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_805)[name = string("op_830")]; + tensor var_831 = const()[name = string("op_831"), val = tensor([1, 8, 1, 128])]; + tensor var_832 = reshape(shape = var_831, x = var_830)[name = string("op_832")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_814)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_814)[name = string("x2_13")]; + tensor var_846_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor var_847_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_847_cast_fp16")]; + tensor var_848_cast_fp16 = sub(x = var_846_cast_fp16, y = var_847_cast_fp16)[name = string("op_848_cast_fp16")]; + tensor var_849_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_849_cast_fp16")]; + tensor var_850_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_850_cast_fp16")]; + tensor var_851_cast_fp16 = add(x = var_849_cast_fp16, y = var_850_cast_fp16)[name = string("op_851_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_47, interleave = rotated_13_interleave_0, values = (var_848_cast_fp16, var_851_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_823)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_823)[name = string("x2_15")]; + tensor var_867_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_867_cast_fp16")]; + tensor var_868_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_868_cast_fp16")]; + tensor var_869_cast_fp16 = sub(x = var_867_cast_fp16, y = var_868_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor var_870_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_870_cast_fp16")]; + tensor var_871_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_871_cast_fp16")]; + tensor var_872_cast_fp16 = add(x = var_870_cast_fp16, y = var_871_cast_fp16)[name = string("op_872_cast_fp16")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15_cast_fp16 = concat(axis = var_47, interleave = rotated_15_interleave_0, values = (var_869_cast_fp16, var_872_cast_fp16))[name = string("rotated_15_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_322, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_322, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_832, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_892_begin_0 = const()[name = string("op_892_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_892_end_0 = const()[name = string("op_892_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_892_end_mask_0 = const()[name = string("op_892_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_892_cast_fp16 = slice_by_index(begin = var_892_begin_0, end = var_892_end_0, end_mask = var_892_end_mask_0, x = coreml_update_state_21)[name = string("op_892_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_892_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_894_begin_0 = const()[name = string("op_894_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_894_end_0 = const()[name = string("op_894_end_0"), val = tensor([32, 8, 1024, 128])]; + tensor var_894_end_mask_0 = const()[name = string("op_894_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_894_cast_fp16 = slice_by_index(begin = var_894_begin_0, end = var_894_end_0, end_mask = var_894_end_mask_0, x = coreml_update_state_21)[name = string("op_894_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_894_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_903 = const()[name = string("op_903"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_903, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_907 = const()[name = string("op_907"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_907, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_910 = const()[name = string("op_910"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_910, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_914 = const()[name = string("op_914"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_914, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_917_transpose_x_1 = const()[name = string("op_917_transpose_x_1"), val = bool(false)]; + bool var_917_transpose_y_1 = const()[name = string("op_917_transpose_y_1"), val = bool(true)]; + tensor var_917_cast_fp16 = matmul(transpose_x = var_917_transpose_x_1, transpose_y = var_917_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_917_cast_fp16")]; + fp16 var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_917_cast_fp16, y = var_918_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_929_axes_0 = const()[name = string("op_929_axes_0"), val = tensor([-1])]; + bool var_929_keep_dims_0 = const()[name = string("op_929_keep_dims_0"), val = bool(true)]; + tensor var_929_cast_fp16 = reduce_sum(axes = var_929_axes_0, keep_dims = var_929_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_929_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_929_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_932_perm_0 = const()[name = string("op_932_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_934 = const()[name = string("op_934"), val = tensor([1, 1, 3072])]; + tensor var_932_cast_fp16 = transpose(perm = var_932_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_14")]; + tensor input_47_cast_fp16 = reshape(shape = var_934, x = var_932_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745658688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755095936))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_945_axes_0 = const()[name = string("op_945_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755292608)))]; + tensor var_945_cast_fp16 = layer_norm(axes = var_945_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_945_cast_fp16")]; + tensor var_952 = const()[name = string("op_952"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_953 = transpose(perm = var_952, x = var_945_cast_fp16)[name = string("transpose_13")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_953)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_975_axes_0 = const()[name = string("op_975_axes_0"), val = tensor([2])]; + tensor var_975 = squeeze(axes = var_975_axes_0, x = hidden_states_31)[name = string("op_975")]; + tensor var_976 = const()[name = string("op_976"), val = tensor([0, 2, 1])]; + tensor var_977 = transpose(perm = var_976, x = var_975)[name = string("transpose_12")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_977)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755298816)))]; + tensor var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_985_cast_fp16")]; + tensor var_988 = const()[name = string("op_988"), val = tensor([0, 2, 1])]; + tensor var_990_axes_0 = const()[name = string("op_990_axes_0"), val = tensor([2])]; + tensor var_989 = transpose(perm = var_988, x = var_985_cast_fp16)[name = string("transpose_11")]; + tensor var_990 = expand_dims(axes = var_990_axes_0, x = var_989)[name = string("op_990")]; + string var_997_pad_type_0 = const()[name = string("op_997_pad_type_0"), val = string("valid")]; + tensor var_997_strides_0 = const()[name = string("op_997_strides_0"), val = tensor([1, 1])]; + tensor var_997_pad_0 = const()[name = string("op_997_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_997_dilations_0 = const()[name = string("op_997_dilations_0"), val = tensor([1, 1])]; + int32 var_997_groups_0 = const()[name = string("op_997_groups_0"), val = int32(1)]; + tensor var_997 = conv(dilations = var_997_dilations_0, groups = var_997_groups_0, pad = var_997_pad_0, pad_type = var_997_pad_type_0, strides = var_997_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_990)[name = string("op_997")]; + tensor var_998 = const()[name = string("op_998"), val = tensor([1, 24, 1, 128])]; + tensor var_999 = reshape(shape = var_998, x = var_997)[name = string("op_999")]; + string var_1006_pad_type_0 = const()[name = string("op_1006_pad_type_0"), val = string("valid")]; + tensor var_1006_strides_0 = const()[name = string("op_1006_strides_0"), val = tensor([1, 1])]; + tensor var_1006_pad_0 = const()[name = string("op_1006_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1006_dilations_0 = const()[name = string("op_1006_dilations_0"), val = tensor([1, 1])]; + int32 var_1006_groups_0 = const()[name = string("op_1006_groups_0"), val = int32(1)]; + tensor var_1006 = conv(dilations = var_1006_dilations_0, groups = var_1006_groups_0, pad = var_1006_pad_0, pad_type = var_1006_pad_type_0, strides = var_1006_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_990)[name = string("op_1006")]; + tensor var_1007 = const()[name = string("op_1007"), val = tensor([1, 8, 1, 128])]; + tensor var_1008 = reshape(shape = var_1007, x = var_1006)[name = string("op_1008")]; + string var_1015_pad_type_0 = const()[name = string("op_1015_pad_type_0"), val = string("valid")]; + tensor var_1015_strides_0 = const()[name = string("op_1015_strides_0"), val = tensor([1, 1])]; + tensor var_1015_pad_0 = const()[name = string("op_1015_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1015_dilations_0 = const()[name = string("op_1015_dilations_0"), val = tensor([1, 1])]; + int32 var_1015_groups_0 = const()[name = string("op_1015_groups_0"), val = int32(1)]; + tensor var_1015 = conv(dilations = var_1015_dilations_0, groups = var_1015_groups_0, pad = var_1015_pad_0, pad_type = var_1015_pad_type_0, strides = var_1015_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_990)[name = string("op_1015")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([1, 8, 1, 128])]; + tensor var_1017 = reshape(shape = var_1016, x = var_1015)[name = string("op_1017")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_999)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_999)[name = string("x2_17")]; + tensor var_1031_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1031_cast_fp16")]; + tensor var_1032_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1032_cast_fp16")]; + tensor var_1033_cast_fp16 = sub(x = var_1031_cast_fp16, y = var_1032_cast_fp16)[name = string("op_1033_cast_fp16")]; + tensor var_1034_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor var_1035_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1035_cast_fp16")]; + tensor var_1036_cast_fp16 = add(x = var_1034_cast_fp16, y = var_1035_cast_fp16)[name = string("op_1036_cast_fp16")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17_cast_fp16 = concat(axis = var_47, interleave = rotated_17_interleave_0, values = (var_1033_cast_fp16, var_1036_cast_fp16))[name = string("rotated_17_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1008)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1008)[name = string("x2_19")]; + tensor var_1052_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1052_cast_fp16")]; + tensor var_1053_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1053_cast_fp16")]; + tensor var_1054_cast_fp16 = sub(x = var_1052_cast_fp16, y = var_1053_cast_fp16)[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor var_1056_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1056_cast_fp16")]; + tensor var_1057_cast_fp16 = add(x = var_1055_cast_fp16, y = var_1056_cast_fp16)[name = string("op_1057_cast_fp16")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19_cast_fp16 = concat(axis = var_47, interleave = rotated_19_interleave_0, values = (var_1054_cast_fp16, var_1057_cast_fp16))[name = string("rotated_19_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_322, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_322, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1017, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_1077_begin_0 = const()[name = string("op_1077_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1077_end_0 = const()[name = string("op_1077_end_0"), val = tensor([5, 8, 1024, 128])]; + tensor var_1077_end_mask_0 = const()[name = string("op_1077_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1077_cast_fp16 = slice_by_index(begin = var_1077_begin_0, end = var_1077_end_0, end_mask = var_1077_end_mask_0, x = coreml_update_state_23)[name = string("op_1077_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1077_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1079_begin_0 = const()[name = string("op_1079_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_1079_end_0 = const()[name = string("op_1079_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_1079_end_mask_0 = const()[name = string("op_1079_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = var_1079_end_0, end_mask = var_1079_end_mask_0, x = coreml_update_state_23)[name = string("op_1079_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1079_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1088 = const()[name = string("op_1088"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1088, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1092 = const()[name = string("op_1092"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_1092, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1095 = const()[name = string("op_1095"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1095, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1099 = const()[name = string("op_1099"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_19_cast_fp16 = reshape(shape = var_1099, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; + bool var_1102_transpose_x_1 = const()[name = string("op_1102_transpose_x_1"), val = bool(false)]; + bool var_1102_transpose_y_1 = const()[name = string("op_1102_transpose_y_1"), val = bool(true)]; + tensor var_1102_cast_fp16 = matmul(transpose_x = var_1102_transpose_x_1, transpose_y = var_1102_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1102_cast_fp16")]; + fp16 var_1103_to_fp16 = const()[name = string("op_1103_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_1102_cast_fp16, y = var_1103_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1114_axes_0 = const()[name = string("op_1114_axes_0"), val = tensor([-1])]; + bool var_1114_keep_dims_0 = const()[name = string("op_1114_keep_dims_0"), val = bool(true)]; + tensor var_1114_cast_fp16 = reduce_sum(axes = var_1114_axes_0, keep_dims = var_1114_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1114_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1114_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1117_perm_0 = const()[name = string("op_1117_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1119 = const()[name = string("op_1119"), val = tensor([1, 1, 3072])]; + tensor var_1117_cast_fp16 = transpose(perm = var_1117_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_10")]; + tensor input_61_cast_fp16 = reshape(shape = var_1119, x = var_1117_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755305024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764742272))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1130_axes_0 = const()[name = string("op_1130_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764938944)))]; + tensor var_1130_cast_fp16 = layer_norm(axes = var_1130_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1130_cast_fp16")]; + tensor var_1137 = const()[name = string("op_1137"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1138 = transpose(perm = var_1137, x = var_1130_cast_fp16)[name = string("transpose_9")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1138)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1160_axes_0 = const()[name = string("op_1160_axes_0"), val = tensor([2])]; + tensor var_1160 = squeeze(axes = var_1160_axes_0, x = hidden_states_39)[name = string("op_1160")]; + tensor var_1161 = const()[name = string("op_1161"), val = tensor([0, 2, 1])]; + tensor var_1162 = transpose(perm = var_1161, x = var_1160)[name = string("transpose_8")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1162)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1170_axes_0 = const()[name = string("op_1170_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764945152)))]; + tensor var_1170_cast_fp16 = layer_norm(axes = var_1170_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1170_cast_fp16")]; + tensor var_1173 = const()[name = string("op_1173"), val = tensor([0, 2, 1])]; + tensor var_1175_axes_0 = const()[name = string("op_1175_axes_0"), val = tensor([2])]; + tensor var_1174 = transpose(perm = var_1173, x = var_1170_cast_fp16)[name = string("transpose_7")]; + tensor var_1175 = expand_dims(axes = var_1175_axes_0, x = var_1174)[name = string("op_1175")]; + string var_1182_pad_type_0 = const()[name = string("op_1182_pad_type_0"), val = string("valid")]; + tensor var_1182_strides_0 = const()[name = string("op_1182_strides_0"), val = tensor([1, 1])]; + tensor var_1182_pad_0 = const()[name = string("op_1182_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1182_dilations_0 = const()[name = string("op_1182_dilations_0"), val = tensor([1, 1])]; + int32 var_1182_groups_0 = const()[name = string("op_1182_groups_0"), val = int32(1)]; + tensor var_1182 = conv(dilations = var_1182_dilations_0, groups = var_1182_groups_0, pad = var_1182_pad_0, pad_type = var_1182_pad_type_0, strides = var_1182_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1175)[name = string("op_1182")]; + tensor var_1183 = const()[name = string("op_1183"), val = tensor([1, 24, 1, 128])]; + tensor var_1184 = reshape(shape = var_1183, x = var_1182)[name = string("op_1184")]; + string var_1191_pad_type_0 = const()[name = string("op_1191_pad_type_0"), val = string("valid")]; + tensor var_1191_strides_0 = const()[name = string("op_1191_strides_0"), val = tensor([1, 1])]; + tensor var_1191_pad_0 = const()[name = string("op_1191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1191_dilations_0 = const()[name = string("op_1191_dilations_0"), val = tensor([1, 1])]; + int32 var_1191_groups_0 = const()[name = string("op_1191_groups_0"), val = int32(1)]; + tensor var_1191 = conv(dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1175)[name = string("op_1191")]; + tensor var_1192 = const()[name = string("op_1192"), val = tensor([1, 8, 1, 128])]; + tensor var_1193 = reshape(shape = var_1192, x = var_1191)[name = string("op_1193")]; + string var_1200_pad_type_0 = const()[name = string("op_1200_pad_type_0"), val = string("valid")]; + tensor var_1200_strides_0 = const()[name = string("op_1200_strides_0"), val = tensor([1, 1])]; + tensor var_1200_pad_0 = const()[name = string("op_1200_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1200_dilations_0 = const()[name = string("op_1200_dilations_0"), val = tensor([1, 1])]; + int32 var_1200_groups_0 = const()[name = string("op_1200_groups_0"), val = int32(1)]; + tensor var_1200 = conv(dilations = var_1200_dilations_0, groups = var_1200_groups_0, pad = var_1200_pad_0, pad_type = var_1200_pad_type_0, strides = var_1200_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1175)[name = string("op_1200")]; + tensor var_1201 = const()[name = string("op_1201"), val = tensor([1, 8, 1, 128])]; + tensor var_1202 = reshape(shape = var_1201, x = var_1200)[name = string("op_1202")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1184)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1184)[name = string("x2_21")]; + tensor var_1216_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1216_cast_fp16")]; + tensor var_1217_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1217_cast_fp16")]; + tensor var_1218_cast_fp16 = sub(x = var_1216_cast_fp16, y = var_1217_cast_fp16)[name = string("op_1218_cast_fp16")]; + tensor var_1219_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1219_cast_fp16")]; + tensor var_1220_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1220_cast_fp16")]; + tensor var_1221_cast_fp16 = add(x = var_1219_cast_fp16, y = var_1220_cast_fp16)[name = string("op_1221_cast_fp16")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21_cast_fp16 = concat(axis = var_47, interleave = rotated_21_interleave_0, values = (var_1218_cast_fp16, var_1221_cast_fp16))[name = string("rotated_21_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1193)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1193)[name = string("x2_23")]; + tensor var_1237_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1237_cast_fp16")]; + tensor var_1238_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1238_cast_fp16")]; + tensor var_1239_cast_fp16 = sub(x = var_1237_cast_fp16, y = var_1238_cast_fp16)[name = string("op_1239_cast_fp16")]; + tensor var_1240_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1240_cast_fp16")]; + tensor var_1241_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1241_cast_fp16")]; + tensor var_1242_cast_fp16 = add(x = var_1240_cast_fp16, y = var_1241_cast_fp16)[name = string("op_1242_cast_fp16")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23_cast_fp16 = concat(axis = var_47, interleave = rotated_23_interleave_0, values = (var_1239_cast_fp16, var_1242_cast_fp16))[name = string("rotated_23_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_322, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_322, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1202, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_1262_begin_0 = const()[name = string("op_1262_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1262_end_0 = const()[name = string("op_1262_end_0"), val = tensor([6, 8, 1024, 128])]; + tensor var_1262_end_mask_0 = const()[name = string("op_1262_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = var_1262_end_0, end_mask = var_1262_end_mask_0, x = coreml_update_state_25)[name = string("op_1262_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1262_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = string("op_1264_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_1264_end_0 = const()[name = string("op_1264_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_1264_end_mask_0 = const()[name = string("op_1264_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = coreml_update_state_25)[name = string("op_1264_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1264_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1273 = const()[name = string("op_1273"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1273, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1277 = const()[name = string("op_1277"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_1277, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1280 = const()[name = string("op_1280"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1280, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1284 = const()[name = string("op_1284"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_23_cast_fp16 = reshape(shape = var_1284, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; + bool var_1287_transpose_x_1 = const()[name = string("op_1287_transpose_x_1"), val = bool(false)]; + bool var_1287_transpose_y_1 = const()[name = string("op_1287_transpose_y_1"), val = bool(true)]; + tensor var_1287_cast_fp16 = matmul(transpose_x = var_1287_transpose_x_1, transpose_y = var_1287_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1287_cast_fp16")]; + fp16 var_1288_to_fp16 = const()[name = string("op_1288_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_1287_cast_fp16, y = var_1288_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1299_axes_0 = const()[name = string("op_1299_axes_0"), val = tensor([-1])]; + bool var_1299_keep_dims_0 = const()[name = string("op_1299_keep_dims_0"), val = bool(true)]; + tensor var_1299_cast_fp16 = reduce_sum(axes = var_1299_axes_0, keep_dims = var_1299_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1299_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1299_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_1302_perm_0 = const()[name = string("op_1302_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 1, 3072])]; + tensor var_1302_cast_fp16 = transpose(perm = var_1302_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_6")]; + tensor input_75_cast_fp16 = reshape(shape = var_1304, x = var_1302_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764951360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774388608))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1315_axes_0 = const()[name = string("op_1315_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774585280)))]; + tensor var_1315_cast_fp16 = layer_norm(axes = var_1315_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1315_cast_fp16")]; + tensor var_1322 = const()[name = string("op_1322"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1323 = transpose(perm = var_1322, x = var_1315_cast_fp16)[name = string("transpose_5")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1323)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1345_axes_0 = const()[name = string("op_1345_axes_0"), val = tensor([2])]; + tensor var_1345 = squeeze(axes = var_1345_axes_0, x = hidden_states_47)[name = string("op_1345")]; + tensor var_1346 = const()[name = string("op_1346"), val = tensor([0, 2, 1])]; + tensor var_1347 = transpose(perm = var_1346, x = var_1345)[name = string("transpose_4")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1347)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1355_axes_0 = const()[name = string("op_1355_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774591488)))]; + tensor var_1355_cast_fp16 = layer_norm(axes = var_1355_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1355_cast_fp16")]; + tensor var_1358 = const()[name = string("op_1358"), val = tensor([0, 2, 1])]; + tensor var_1360_axes_0 = const()[name = string("op_1360_axes_0"), val = tensor([2])]; + tensor var_1359 = transpose(perm = var_1358, x = var_1355_cast_fp16)[name = string("transpose_3")]; + tensor var_1360 = expand_dims(axes = var_1360_axes_0, x = var_1359)[name = string("op_1360")]; + string var_1367_pad_type_0 = const()[name = string("op_1367_pad_type_0"), val = string("valid")]; + tensor var_1367_strides_0 = const()[name = string("op_1367_strides_0"), val = tensor([1, 1])]; + tensor var_1367_pad_0 = const()[name = string("op_1367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1367_dilations_0 = const()[name = string("op_1367_dilations_0"), val = tensor([1, 1])]; + int32 var_1367_groups_0 = const()[name = string("op_1367_groups_0"), val = int32(1)]; + tensor var_1367 = conv(dilations = var_1367_dilations_0, groups = var_1367_groups_0, pad = var_1367_pad_0, pad_type = var_1367_pad_type_0, strides = var_1367_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1360)[name = string("op_1367")]; + tensor var_1368 = const()[name = string("op_1368"), val = tensor([1, 24, 1, 128])]; + tensor var_1369 = reshape(shape = var_1368, x = var_1367)[name = string("op_1369")]; + string var_1376_pad_type_0 = const()[name = string("op_1376_pad_type_0"), val = string("valid")]; + tensor var_1376_strides_0 = const()[name = string("op_1376_strides_0"), val = tensor([1, 1])]; + tensor var_1376_pad_0 = const()[name = string("op_1376_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1376_dilations_0 = const()[name = string("op_1376_dilations_0"), val = tensor([1, 1])]; + int32 var_1376_groups_0 = const()[name = string("op_1376_groups_0"), val = int32(1)]; + tensor var_1376 = conv(dilations = var_1376_dilations_0, groups = var_1376_groups_0, pad = var_1376_pad_0, pad_type = var_1376_pad_type_0, strides = var_1376_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1360)[name = string("op_1376")]; + tensor var_1377 = const()[name = string("op_1377"), val = tensor([1, 8, 1, 128])]; + tensor var_1378 = reshape(shape = var_1377, x = var_1376)[name = string("op_1378")]; + string var_1385_pad_type_0 = const()[name = string("op_1385_pad_type_0"), val = string("valid")]; + tensor var_1385_strides_0 = const()[name = string("op_1385_strides_0"), val = tensor([1, 1])]; + tensor var_1385_pad_0 = const()[name = string("op_1385_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1385_dilations_0 = const()[name = string("op_1385_dilations_0"), val = tensor([1, 1])]; + int32 var_1385_groups_0 = const()[name = string("op_1385_groups_0"), val = int32(1)]; + tensor var_1385 = conv(dilations = var_1385_dilations_0, groups = var_1385_groups_0, pad = var_1385_pad_0, pad_type = var_1385_pad_type_0, strides = var_1385_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1360)[name = string("op_1385")]; + tensor var_1386 = const()[name = string("op_1386"), val = tensor([1, 8, 1, 128])]; + tensor var_1387 = reshape(shape = var_1386, x = var_1385)[name = string("op_1387")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 1, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1369)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 1, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1369)[name = string("x2_25")]; + tensor var_1401_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1401_cast_fp16")]; + tensor var_1402_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1402_cast_fp16")]; + tensor var_1403_cast_fp16 = sub(x = var_1401_cast_fp16, y = var_1402_cast_fp16)[name = string("op_1403_cast_fp16")]; + tensor var_1404_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1404_cast_fp16")]; + tensor var_1405_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1405_cast_fp16")]; + tensor var_1406_cast_fp16 = add(x = var_1404_cast_fp16, y = var_1405_cast_fp16)[name = string("op_1406_cast_fp16")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25_cast_fp16 = concat(axis = var_47, interleave = rotated_25_interleave_0, values = (var_1403_cast_fp16, var_1406_cast_fp16))[name = string("rotated_25_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1378)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1378)[name = string("x2")]; + tensor var_1422_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1422_cast_fp16")]; + tensor var_1423_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1423_cast_fp16")]; + tensor var_1424_cast_fp16 = sub(x = var_1422_cast_fp16, y = var_1423_cast_fp16)[name = string("op_1424_cast_fp16")]; + tensor var_1425_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1425_cast_fp16")]; + tensor var_1426_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1426_cast_fp16")]; + tensor var_1427_cast_fp16 = add(x = var_1425_cast_fp16, y = var_1426_cast_fp16)[name = string("op_1427_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_47, interleave = rotated_interleave_0, values = (var_1424_cast_fp16, var_1427_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_322, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_322, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1387, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_1447_begin_0 = const()[name = string("op_1447_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1447_end_0 = const()[name = string("op_1447_end_0"), val = tensor([7, 8, 1024, 128])]; + tensor var_1447_end_mask_0 = const()[name = string("op_1447_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1447_cast_fp16 = slice_by_index(begin = var_1447_begin_0, end = var_1447_end_0, end_mask = var_1447_end_mask_0, x = coreml_update_state_27)[name = string("op_1447_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1447_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1449_begin_0 = const()[name = string("op_1449_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_1449_end_0 = const()[name = string("op_1449_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_1449_end_mask_0 = const()[name = string("op_1449_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = coreml_update_state_27)[name = string("op_1449_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1449_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1458 = const()[name = string("op_1458"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1458, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1462 = const()[name = string("op_1462"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_1462, x = x_181_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1465 = const()[name = string("op_1465"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1465, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_1469 = const()[name = string("op_1469"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_cast_fp16 = reshape(shape = var_1469, x = x_187_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_1472_transpose_x_1 = const()[name = string("op_1472_transpose_x_1"), val = bool(false)]; + bool var_1472_transpose_y_1 = const()[name = string("op_1472_transpose_y_1"), val = bool(true)]; + tensor var_1472_cast_fp16 = matmul(transpose_x = var_1472_transpose_x_1, transpose_y = var_1472_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_cast_fp16)[name = string("op_1472_cast_fp16")]; + fp16 var_1473_to_fp16 = const()[name = string("op_1473_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_1472_cast_fp16, y = var_1473_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1484_axes_0 = const()[name = string("op_1484_axes_0"), val = tensor([-1])]; + bool var_1484_keep_dims_0 = const()[name = string("op_1484_keep_dims_0"), val = bool(true)]; + tensor var_1484_cast_fp16 = reduce_sum(axes = var_1484_axes_0, keep_dims = var_1484_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1484_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1484_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1487_perm_0 = const()[name = string("op_1487_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1489 = const()[name = string("op_1489"), val = tensor([1, 1, 3072])]; + tensor var_1487_cast_fp16 = transpose(perm = var_1487_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_2")]; + tensor input_89_cast_fp16 = reshape(shape = var_1489, x = var_1487_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774597696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784034944))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1500_axes_0 = const()[name = string("op_1500_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784231616)))]; + tensor var_1500_cast_fp16 = layer_norm(axes = var_1500_axes_0, epsilon = var_42_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1500_cast_fp16")]; + tensor var_1507 = const()[name = string("op_1507"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1508 = transpose(perm = var_1507, x = var_1500_cast_fp16)[name = string("transpose_1")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1508)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")]; + tensor gate_states = silu(x = input_95)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1530_axes_0 = const()[name = string("op_1530_axes_0"), val = tensor([2])]; + tensor var_1530 = squeeze(axes = var_1530_axes_0, x = hidden_states_1)[name = string("op_1530")]; + tensor var_1531 = const()[name = string("op_1531"), val = tensor([0, 2, 1])]; + tensor var_1532 = transpose(perm = var_1531, x = var_1530)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1532)[name = string("op_1533_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9633984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12779776))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12845376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15991168))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16056768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41222656))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41747008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66912896))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67437248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92603136))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92799808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102237056))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102433728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105579520))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105645120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108790912))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108856512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134022400))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134546752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159712640))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160236992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185402880))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185599552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195036800))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195233472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198379264))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198444864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201590656))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201656256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226822144))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227346496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252512384))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253036736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278202624))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278399296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287836544))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288033216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179008))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291244608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294390400))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294456000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319621888))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320146240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345312128))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371002368))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371199040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380636288))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380832960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383978752))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384044352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387190144))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387255744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412421632))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412945984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438111872))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438636224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463802112))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473436032))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473632704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476778496))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476844096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479989888))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505221376))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505745728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530911616))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531435968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556601856))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556798528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566235776))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566432448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569578240))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569643840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572789632))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572855232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598021120))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598545472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623711360))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624235712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649401600))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + int32 var_42 = const()[name = string("op_42"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_234_axis_0 = const()[name = string("op_234_axis_0"), val = int32(1)]; + int32 var_234_batch_dims_0 = const()[name = string("op_234_batch_dims_0"), val = int32(0)]; + bool var_234_validate_indices_0 = const()[name = string("op_234_validate_indices_0"), val = bool(false)]; + tensor var_53_to_fp16 = const()[name = string("op_53_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(683152768)))]; + tensor var_234_cast_fp16 = gather(axis = var_234_axis_0, batch_dims = var_234_batch_dims_0, indices = select_0, validate_indices = var_234_validate_indices_0, x = var_53_to_fp16)[name = string("op_234_cast_fp16")]; + tensor var_235 = const()[name = string("op_235"), val = tensor([1, 64, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_235, x = var_234_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_239_axis_0 = const()[name = string("op_239_axis_0"), val = int32(1)]; + int32 var_239_batch_dims_0 = const()[name = string("op_239_batch_dims_0"), val = int32(0)]; + bool var_239_validate_indices_0 = const()[name = string("op_239_validate_indices_0"), val = bool(false)]; + tensor var_48_to_fp16 = const()[name = string("op_48_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649598272)))]; + tensor var_239_cast_fp16 = gather(axis = var_239_axis_0, batch_dims = var_239_batch_dims_0, indices = select_0, validate_indices = var_239_validate_indices_0, x = var_48_to_fp16)[name = string("op_239_cast_fp16")]; + tensor var_240 = const()[name = string("op_240"), val = tensor([1, 64, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_240, x = var_239_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_250_axes_0 = const()[name = string("op_250_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716707264)))]; + fp16 var_44_to_fp16 = const()[name = string("op_44_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_250_cast_fp16 = layer_norm(axes = var_250_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_250_cast_fp16")]; + tensor var_254 = const()[name = string("op_254"), val = tensor([0, 2, 1])]; + tensor var_256_axes_0 = const()[name = string("op_256_axes_0"), val = tensor([2])]; + tensor var_255 = transpose(perm = var_254, x = var_250_cast_fp16)[name = string("transpose_50")]; + tensor var_256 = expand_dims(axes = var_256_axes_0, x = var_255)[name = string("op_256")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_256)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_256)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_256)[name = string("value_states_1")]; + tensor var_276 = const()[name = string("op_276"), val = tensor([1, 24, 128, 64])]; + tensor var_277 = reshape(shape = var_276, x = query_states_1)[name = string("op_277")]; + tensor var_278 = const()[name = string("op_278"), val = tensor([0, 1, 3, 2])]; + tensor var_280 = const()[name = string("op_280"), val = tensor([1, 8, 128, 64])]; + tensor var_281 = reshape(shape = var_280, x = key_states_1)[name = string("op_281")]; + tensor var_282 = const()[name = string("op_282"), val = tensor([0, 1, 3, 2])]; + tensor var_284 = const()[name = string("op_284"), val = tensor([1, 8, 128, 64])]; + tensor var_285 = reshape(shape = var_284, x = value_states_1)[name = string("op_285")]; + tensor var_286 = const()[name = string("op_286"), val = tensor([0, 1, 3, 2])]; + tensor var_288 = const()[name = string("op_288"), val = tensor([0, 2, 1, 3])]; + tensor var_290 = const()[name = string("op_290"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_278, x = var_277)[name = string("transpose_49")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_288, x = cos_1_cast_fp16)[name = string("transpose_48")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 64])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_290, x = sin_1_cast_fp16)[name = string("transpose_47")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_304 = mul(x = x1_1, y = cos_7)[name = string("op_304")]; + tensor var_305 = mul(x = x2_1, y = sin_7)[name = string("op_305")]; + tensor var_306 = sub(x = var_304, y = var_305)[name = string("op_306")]; + tensor var_307 = mul(x = x2_1, y = cos_7)[name = string("op_307")]; + tensor var_308 = mul(x = x1_1, y = sin_7)[name = string("op_308")]; + tensor var_309 = add(x = var_307, y = var_308)[name = string("op_309")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_42, interleave = rotated_1_interleave_0, values = (var_306, var_309))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_282, x = var_281)[name = string("transpose_46")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_325 = mul(x = x1_3, y = cos_7)[name = string("op_325")]; + tensor var_326 = mul(x = x2_3, y = sin_7)[name = string("op_326")]; + tensor var_327 = sub(x = var_325, y = var_326)[name = string("op_327")]; + tensor var_328 = mul(x = x2_3, y = cos_7)[name = string("op_328")]; + tensor var_329 = mul(x = x1_3, y = sin_7)[name = string("op_329")]; + tensor var_330 = add(x = var_328, y = var_329)[name = string("op_330")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_42, interleave = rotated_3_interleave_0, values = (var_327, var_330))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_339 = add(x = current_pos, y = seq_length_1)[name = string("op_339")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_339, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_339, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_286, x = var_285)[name = string("transpose_45")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_353_begin_0 = const()[name = string("op_353_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_353_end_0 = const()[name = string("op_353_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_353_end_mask_0 = const()[name = string("op_353_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_353_cast_fp16 = slice_by_index(begin = var_353_begin_0, end = var_353_end_0, end_mask = var_353_end_mask_0, x = coreml_update_state_15)[name = string("op_353_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_353_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_355_begin_0 = const()[name = string("op_355_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_355_end_0 = const()[name = string("op_355_end_0"), val = tensor([29, 8, 1024, 128])]; + tensor var_355_end_mask_0 = const()[name = string("op_355_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_355_cast_fp16 = slice_by_index(begin = var_355_begin_0, end = var_355_end_0, end_mask = var_355_end_mask_0, x = coreml_update_state_15)[name = string("op_355_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_355_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_364 = const()[name = string("op_364"), val = tensor([1, 3, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_364, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_368 = const()[name = string("op_368"), val = tensor([1, -1, 1024, 128])]; + tensor var_369_cast_fp16 = reshape(shape = var_368, x = x_13_cast_fp16)[name = string("op_369_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_371 = const()[name = string("op_371"), val = tensor([1, 3, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_371, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_378_transpose_x_0 = const()[name = string("op_378_transpose_x_0"), val = bool(false)]; + bool var_378_transpose_y_0 = const()[name = string("op_378_transpose_y_0"), val = bool(true)]; + tensor var_378_cast_fp16 = matmul(transpose_x = var_378_transpose_x_0, transpose_y = var_378_transpose_y_0, x = rotated_1, y = var_369_cast_fp16)[name = string("op_378_cast_fp16")]; + fp16 var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_378_cast_fp16, y = var_379_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_390_axes_0 = const()[name = string("op_390_axes_0"), val = tensor([-1])]; + bool var_390_keep_dims_0 = const()[name = string("op_390_keep_dims_0"), val = bool(true)]; + tensor var_390_cast_fp16 = reduce_sum(axes = var_390_axes_0, keep_dims = var_390_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_390_cast_fp16")]; + tensor var_391_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_390_cast_fp16)[name = string("op_391_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([24, 64, 1024])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_391_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([24, 1024, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 24, 64, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_394_perm_0 = const()[name = string("op_394_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_396 = const()[name = string("op_396"), val = tensor([1, 64, 3072])]; + tensor var_394_cast_fp16 = transpose(perm = var_394_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_44")]; + tensor input_5_cast_fp16 = reshape(shape = var_396, x = var_394_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726150720))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726347392)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_407_axes_0 = const()[name = string("op_407_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726353600)))]; + tensor var_407_cast_fp16 = layer_norm(axes = var_407_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_407_cast_fp16")]; + tensor var_414 = const()[name = string("op_414"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_415 = transpose(perm = var_414, x = var_407_cast_fp16)[name = string("transpose_43")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_415)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_437_axes_0 = const()[name = string("op_437_axes_0"), val = tensor([2])]; + tensor var_437 = squeeze(axes = var_437_axes_0, x = hidden_states_7)[name = string("op_437")]; + tensor var_438 = const()[name = string("op_438"), val = tensor([0, 2, 1])]; + tensor var_439 = transpose(perm = var_438, x = var_437)[name = string("transpose_42")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_439)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_447_axes_0 = const()[name = string("op_447_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726359808)))]; + tensor var_447_cast_fp16 = layer_norm(axes = var_447_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor var_451 = const()[name = string("op_451"), val = tensor([0, 2, 1])]; + tensor var_453_axes_0 = const()[name = string("op_453_axes_0"), val = tensor([2])]; + tensor var_452 = transpose(perm = var_451, x = var_447_cast_fp16)[name = string("transpose_41")]; + tensor var_453 = expand_dims(axes = var_453_axes_0, x = var_452)[name = string("op_453")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_453)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_453)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_453)[name = string("value_states_7")]; + tensor var_473 = const()[name = string("op_473"), val = tensor([1, 24, 128, 64])]; + tensor var_474 = reshape(shape = var_473, x = query_states_5)[name = string("op_474")]; + tensor var_475 = const()[name = string("op_475"), val = tensor([0, 1, 3, 2])]; + tensor var_477 = const()[name = string("op_477"), val = tensor([1, 8, 128, 64])]; + tensor var_478 = reshape(shape = var_477, x = key_states_7)[name = string("op_478")]; + tensor var_479 = const()[name = string("op_479"), val = tensor([0, 1, 3, 2])]; + tensor var_481 = const()[name = string("op_481"), val = tensor([1, 8, 128, 64])]; + tensor var_482 = reshape(shape = var_481, x = value_states_7)[name = string("op_482")]; + tensor var_483 = const()[name = string("op_483"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_475, x = var_474)[name = string("transpose_40")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_501 = mul(x = x1_5, y = cos_7)[name = string("op_501")]; + tensor var_502 = mul(x = x2_5, y = sin_7)[name = string("op_502")]; + tensor var_503 = sub(x = var_501, y = var_502)[name = string("op_503")]; + tensor var_504 = mul(x = x2_5, y = cos_7)[name = string("op_504")]; + tensor var_505 = mul(x = x1_5, y = sin_7)[name = string("op_505")]; + tensor var_506 = add(x = var_504, y = var_505)[name = string("op_506")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_42, interleave = rotated_5_interleave_0, values = (var_503, var_506))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_479, x = var_478)[name = string("transpose_39")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_522 = mul(x = x1_7, y = cos_7)[name = string("op_522")]; + tensor var_523 = mul(x = x2_7, y = sin_7)[name = string("op_523")]; + tensor var_524 = sub(x = var_522, y = var_523)[name = string("op_524")]; + tensor var_525 = mul(x = x2_7, y = cos_7)[name = string("op_525")]; + tensor var_526 = mul(x = x1_7, y = sin_7)[name = string("op_526")]; + tensor var_527 = add(x = var_525, y = var_526)[name = string("op_527")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_42, interleave = rotated_7_interleave_0, values = (var_524, var_527))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_339, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_339, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_483, x = var_482)[name = string("transpose_38")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_550_begin_0 = const()[name = string("op_550_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_550_end_0 = const()[name = string("op_550_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_550_end_mask_0 = const()[name = string("op_550_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = coreml_update_state_17)[name = string("op_550_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_550_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_552_begin_0 = const()[name = string("op_552_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_552_end_0 = const()[name = string("op_552_end_0"), val = tensor([30, 8, 1024, 128])]; + tensor var_552_end_mask_0 = const()[name = string("op_552_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_552_cast_fp16 = slice_by_index(begin = var_552_begin_0, end = var_552_end_0, end_mask = var_552_end_mask_0, x = coreml_update_state_17)[name = string("op_552_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_552_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_561 = const()[name = string("op_561"), val = tensor([1, 3, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_561, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_565 = const()[name = string("op_565"), val = tensor([1, -1, 1024, 128])]; + tensor var_566_cast_fp16 = reshape(shape = var_565, x = x_41_cast_fp16)[name = string("op_566_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_568 = const()[name = string("op_568"), val = tensor([1, 3, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_568, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_575_transpose_x_0 = const()[name = string("op_575_transpose_x_0"), val = bool(false)]; + bool var_575_transpose_y_0 = const()[name = string("op_575_transpose_y_0"), val = bool(true)]; + tensor var_575_cast_fp16 = matmul(transpose_x = var_575_transpose_x_0, transpose_y = var_575_transpose_y_0, x = rotated_5, y = var_566_cast_fp16)[name = string("op_575_cast_fp16")]; + fp16 var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_575_cast_fp16, y = var_576_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_587_axes_0 = const()[name = string("op_587_axes_0"), val = tensor([-1])]; + bool var_587_keep_dims_0 = const()[name = string("op_587_keep_dims_0"), val = bool(true)]; + tensor var_587_cast_fp16 = reduce_sum(axes = var_587_axes_0, keep_dims = var_587_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_587_cast_fp16")]; + tensor var_588_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_587_cast_fp16)[name = string("op_588_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 64, 1024])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_588_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([24, 1024, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 24, 64, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_591_perm_0 = const()[name = string("op_591_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_593 = const()[name = string("op_593"), val = tensor([1, 64, 3072])]; + tensor var_591_cast_fp16 = transpose(perm = var_591_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_37")]; + tensor input_19_cast_fp16 = reshape(shape = var_593, x = var_591_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726366016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735803264))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_604_axes_0 = const()[name = string("op_604_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735999936)))]; + tensor var_604_cast_fp16 = layer_norm(axes = var_604_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_604_cast_fp16")]; + tensor var_611 = const()[name = string("op_611"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_612 = transpose(perm = var_611, x = var_604_cast_fp16)[name = string("transpose_36")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_612)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_634_axes_0 = const()[name = string("op_634_axes_0"), val = tensor([2])]; + tensor var_634 = squeeze(axes = var_634_axes_0, x = hidden_states_15)[name = string("op_634")]; + tensor var_635 = const()[name = string("op_635"), val = tensor([0, 2, 1])]; + tensor var_636 = transpose(perm = var_635, x = var_634)[name = string("transpose_35")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_636)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_644_axes_0 = const()[name = string("op_644_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736006144)))]; + tensor var_644_cast_fp16 = layer_norm(axes = var_644_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_644_cast_fp16")]; + tensor var_648 = const()[name = string("op_648"), val = tensor([0, 2, 1])]; + tensor var_650_axes_0 = const()[name = string("op_650_axes_0"), val = tensor([2])]; + tensor var_649 = transpose(perm = var_648, x = var_644_cast_fp16)[name = string("transpose_34")]; + tensor var_650 = expand_dims(axes = var_650_axes_0, x = var_649)[name = string("op_650")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_650)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_650)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_650)[name = string("value_states_13")]; + tensor var_670 = const()[name = string("op_670"), val = tensor([1, 24, 128, 64])]; + tensor var_671 = reshape(shape = var_670, x = query_states_9)[name = string("op_671")]; + tensor var_672 = const()[name = string("op_672"), val = tensor([0, 1, 3, 2])]; + tensor var_674 = const()[name = string("op_674"), val = tensor([1, 8, 128, 64])]; + tensor var_675 = reshape(shape = var_674, x = key_states_13)[name = string("op_675")]; + tensor var_676 = const()[name = string("op_676"), val = tensor([0, 1, 3, 2])]; + tensor var_678 = const()[name = string("op_678"), val = tensor([1, 8, 128, 64])]; + tensor var_679 = reshape(shape = var_678, x = value_states_13)[name = string("op_679")]; + tensor var_680 = const()[name = string("op_680"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_672, x = var_671)[name = string("transpose_33")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_698 = mul(x = x1_9, y = cos_7)[name = string("op_698")]; + tensor var_699 = mul(x = x2_9, y = sin_7)[name = string("op_699")]; + tensor var_700 = sub(x = var_698, y = var_699)[name = string("op_700")]; + tensor var_701 = mul(x = x2_9, y = cos_7)[name = string("op_701")]; + tensor var_702 = mul(x = x1_9, y = sin_7)[name = string("op_702")]; + tensor var_703 = add(x = var_701, y = var_702)[name = string("op_703")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_42, interleave = rotated_9_interleave_0, values = (var_700, var_703))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_676, x = var_675)[name = string("transpose_32")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_719 = mul(x = x1_11, y = cos_7)[name = string("op_719")]; + tensor var_720 = mul(x = x2_11, y = sin_7)[name = string("op_720")]; + tensor var_721 = sub(x = var_719, y = var_720)[name = string("op_721")]; + tensor var_722 = mul(x = x2_11, y = cos_7)[name = string("op_722")]; + tensor var_723 = mul(x = x1_11, y = sin_7)[name = string("op_723")]; + tensor var_724 = add(x = var_722, y = var_723)[name = string("op_724")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_42, interleave = rotated_11_interleave_0, values = (var_721, var_724))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_339, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_339, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_680, x = var_679)[name = string("transpose_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_747_begin_0 = const()[name = string("op_747_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_747_end_0 = const()[name = string("op_747_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_747_end_mask_0 = const()[name = string("op_747_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_747_cast_fp16 = slice_by_index(begin = var_747_begin_0, end = var_747_end_0, end_mask = var_747_end_mask_0, x = coreml_update_state_19)[name = string("op_747_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_747_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_749_begin_0 = const()[name = string("op_749_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_749_end_0 = const()[name = string("op_749_end_0"), val = tensor([31, 8, 1024, 128])]; + tensor var_749_end_mask_0 = const()[name = string("op_749_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_749_cast_fp16 = slice_by_index(begin = var_749_begin_0, end = var_749_end_0, end_mask = var_749_end_mask_0, x = coreml_update_state_19)[name = string("op_749_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_749_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_758 = const()[name = string("op_758"), val = tensor([1, 3, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_758, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, -1, 1024, 128])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = x_69_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_765 = const()[name = string("op_765"), val = tensor([1, 3, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_765, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_772_transpose_x_0 = const()[name = string("op_772_transpose_x_0"), val = bool(false)]; + bool var_772_transpose_y_0 = const()[name = string("op_772_transpose_y_0"), val = bool(true)]; + tensor var_772_cast_fp16 = matmul(transpose_x = var_772_transpose_x_0, transpose_y = var_772_transpose_y_0, x = rotated_9, y = var_763_cast_fp16)[name = string("op_772_cast_fp16")]; + fp16 var_773_to_fp16 = const()[name = string("op_773_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_772_cast_fp16, y = var_773_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_784_axes_0 = const()[name = string("op_784_axes_0"), val = tensor([-1])]; + bool var_784_keep_dims_0 = const()[name = string("op_784_keep_dims_0"), val = bool(true)]; + tensor var_784_cast_fp16 = reduce_sum(axes = var_784_axes_0, keep_dims = var_784_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor var_785_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_784_cast_fp16)[name = string("op_785_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([24, 64, 1024])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_785_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([24, 1024, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 24, 64, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_788_perm_0 = const()[name = string("op_788_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_790 = const()[name = string("op_790"), val = tensor([1, 64, 3072])]; + tensor var_788_cast_fp16 = transpose(perm = var_788_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_30")]; + tensor input_33_cast_fp16 = reshape(shape = var_790, x = var_788_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736012352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745449600))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_801_axes_0 = const()[name = string("op_801_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745646272)))]; + tensor var_801_cast_fp16 = layer_norm(axes = var_801_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_801_cast_fp16")]; + tensor var_808 = const()[name = string("op_808"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_809 = transpose(perm = var_808, x = var_801_cast_fp16)[name = string("transpose_29")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_809)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor([2])]; + tensor var_831 = squeeze(axes = var_831_axes_0, x = hidden_states_23)[name = string("op_831")]; + tensor var_832 = const()[name = string("op_832"), val = tensor([0, 2, 1])]; + tensor var_833 = transpose(perm = var_832, x = var_831)[name = string("transpose_28")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_833)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_841_axes_0 = const()[name = string("op_841_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745652480)))]; + tensor var_841_cast_fp16 = layer_norm(axes = var_841_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_841_cast_fp16")]; + tensor var_845 = const()[name = string("op_845"), val = tensor([0, 2, 1])]; + tensor var_847_axes_0 = const()[name = string("op_847_axes_0"), val = tensor([2])]; + tensor var_846 = transpose(perm = var_845, x = var_841_cast_fp16)[name = string("transpose_27")]; + tensor var_847 = expand_dims(axes = var_847_axes_0, x = var_846)[name = string("op_847")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_847)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_847)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_847)[name = string("value_states_19")]; + tensor var_867 = const()[name = string("op_867"), val = tensor([1, 24, 128, 64])]; + tensor var_868 = reshape(shape = var_867, x = query_states_13)[name = string("op_868")]; + tensor var_869 = const()[name = string("op_869"), val = tensor([0, 1, 3, 2])]; + tensor var_871 = const()[name = string("op_871"), val = tensor([1, 8, 128, 64])]; + tensor var_872 = reshape(shape = var_871, x = key_states_19)[name = string("op_872")]; + tensor var_873 = const()[name = string("op_873"), val = tensor([0, 1, 3, 2])]; + tensor var_875 = const()[name = string("op_875"), val = tensor([1, 8, 128, 64])]; + tensor var_876 = reshape(shape = var_875, x = value_states_19)[name = string("op_876")]; + tensor var_877 = const()[name = string("op_877"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_869, x = var_868)[name = string("transpose_26")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_895 = mul(x = x1_13, y = cos_7)[name = string("op_895")]; + tensor var_896 = mul(x = x2_13, y = sin_7)[name = string("op_896")]; + tensor var_897 = sub(x = var_895, y = var_896)[name = string("op_897")]; + tensor var_898 = mul(x = x2_13, y = cos_7)[name = string("op_898")]; + tensor var_899 = mul(x = x1_13, y = sin_7)[name = string("op_899")]; + tensor var_900 = add(x = var_898, y = var_899)[name = string("op_900")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_42, interleave = rotated_13_interleave_0, values = (var_897, var_900))[name = string("rotated_13")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_873, x = var_872)[name = string("transpose_25")]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; + tensor var_916 = mul(x = x1_15, y = cos_7)[name = string("op_916")]; + tensor var_917 = mul(x = x2_15, y = sin_7)[name = string("op_917")]; + tensor var_918 = sub(x = var_916, y = var_917)[name = string("op_918")]; + tensor var_919 = mul(x = x2_15, y = cos_7)[name = string("op_919")]; + tensor var_920 = mul(x = x1_15, y = sin_7)[name = string("op_920")]; + tensor var_921 = add(x = var_919, y = var_920)[name = string("op_921")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15 = concat(axis = var_42, interleave = rotated_15_interleave_0, values = (var_918, var_921))[name = string("rotated_15")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_339, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_339, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_877, x = var_876)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_944_begin_0 = const()[name = string("op_944_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_944_end_0 = const()[name = string("op_944_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_944_end_mask_0 = const()[name = string("op_944_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = coreml_update_state_21)[name = string("op_944_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_944_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_946_begin_0 = const()[name = string("op_946_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_946_end_0 = const()[name = string("op_946_end_0"), val = tensor([32, 8, 1024, 128])]; + tensor var_946_end_mask_0 = const()[name = string("op_946_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_946_cast_fp16 = slice_by_index(begin = var_946_begin_0, end = var_946_end_0, end_mask = var_946_end_mask_0, x = coreml_update_state_21)[name = string("op_946_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_946_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_955 = const()[name = string("op_955"), val = tensor([1, 3, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_955, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_959 = const()[name = string("op_959"), val = tensor([1, -1, 1024, 128])]; + tensor var_960_cast_fp16 = reshape(shape = var_959, x = x_97_cast_fp16)[name = string("op_960_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_962 = const()[name = string("op_962"), val = tensor([1, 3, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_962, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_969_transpose_x_0 = const()[name = string("op_969_transpose_x_0"), val = bool(false)]; + bool var_969_transpose_y_0 = const()[name = string("op_969_transpose_y_0"), val = bool(true)]; + tensor var_969_cast_fp16 = matmul(transpose_x = var_969_transpose_x_0, transpose_y = var_969_transpose_y_0, x = rotated_13, y = var_960_cast_fp16)[name = string("op_969_cast_fp16")]; + fp16 var_970_to_fp16 = const()[name = string("op_970_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_969_cast_fp16, y = var_970_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_981_axes_0 = const()[name = string("op_981_axes_0"), val = tensor([-1])]; + bool var_981_keep_dims_0 = const()[name = string("op_981_keep_dims_0"), val = bool(true)]; + tensor var_981_cast_fp16 = reduce_sum(axes = var_981_axes_0, keep_dims = var_981_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_981_cast_fp16")]; + tensor var_982_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_981_cast_fp16)[name = string("op_982_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([24, 64, 1024])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_982_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([24, 1024, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 24, 64, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_985_perm_0 = const()[name = string("op_985_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_987 = const()[name = string("op_987"), val = tensor([1, 64, 3072])]; + tensor var_985_cast_fp16 = transpose(perm = var_985_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_23")]; + tensor input_47_cast_fp16 = reshape(shape = var_987, x = var_985_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745658688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755095936))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_998_axes_0 = const()[name = string("op_998_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755292608)))]; + tensor var_998_cast_fp16 = layer_norm(axes = var_998_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_998_cast_fp16")]; + tensor var_1005 = const()[name = string("op_1005"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1006 = transpose(perm = var_1005, x = var_998_cast_fp16)[name = string("transpose_22")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1006)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1028_axes_0 = const()[name = string("op_1028_axes_0"), val = tensor([2])]; + tensor var_1028 = squeeze(axes = var_1028_axes_0, x = hidden_states_31)[name = string("op_1028")]; + tensor var_1029 = const()[name = string("op_1029"), val = tensor([0, 2, 1])]; + tensor var_1030 = transpose(perm = var_1029, x = var_1028)[name = string("transpose_21")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1030)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1038_axes_0 = const()[name = string("op_1038_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755298816)))]; + tensor var_1038_cast_fp16 = layer_norm(axes = var_1038_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1042 = const()[name = string("op_1042"), val = tensor([0, 2, 1])]; + tensor var_1044_axes_0 = const()[name = string("op_1044_axes_0"), val = tensor([2])]; + tensor var_1043 = transpose(perm = var_1042, x = var_1038_cast_fp16)[name = string("transpose_20")]; + tensor var_1044 = expand_dims(axes = var_1044_axes_0, x = var_1043)[name = string("op_1044")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1044)[name = string("query_states_17")]; + string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; + tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; + tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; + int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; + tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1044)[name = string("key_states_25")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1044)[name = string("value_states_25")]; + tensor var_1064 = const()[name = string("op_1064"), val = tensor([1, 24, 128, 64])]; + tensor var_1065 = reshape(shape = var_1064, x = query_states_17)[name = string("op_1065")]; + tensor var_1066 = const()[name = string("op_1066"), val = tensor([0, 1, 3, 2])]; + tensor var_1068 = const()[name = string("op_1068"), val = tensor([1, 8, 128, 64])]; + tensor var_1069 = reshape(shape = var_1068, x = key_states_25)[name = string("op_1069")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([0, 1, 3, 2])]; + tensor var_1072 = const()[name = string("op_1072"), val = tensor([1, 8, 128, 64])]; + tensor var_1073 = reshape(shape = var_1072, x = value_states_25)[name = string("op_1073")]; + tensor var_1074 = const()[name = string("op_1074"), val = tensor([0, 1, 3, 2])]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_113 = transpose(perm = var_1066, x = var_1065)[name = string("transpose_19")]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; + tensor var_1092 = mul(x = x1_17, y = cos_7)[name = string("op_1092")]; + tensor var_1093 = mul(x = x2_17, y = sin_7)[name = string("op_1093")]; + tensor var_1094 = sub(x = var_1092, y = var_1093)[name = string("op_1094")]; + tensor var_1095 = mul(x = x2_17, y = cos_7)[name = string("op_1095")]; + tensor var_1096 = mul(x = x1_17, y = sin_7)[name = string("op_1096")]; + tensor var_1097 = add(x = var_1095, y = var_1096)[name = string("op_1097")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17 = concat(axis = var_42, interleave = rotated_17_interleave_0, values = (var_1094, var_1097))[name = string("rotated_17")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_117 = transpose(perm = var_1070, x = var_1069)[name = string("transpose_18")]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; + tensor var_1113 = mul(x = x1_19, y = cos_7)[name = string("op_1113")]; + tensor var_1114 = mul(x = x2_19, y = sin_7)[name = string("op_1114")]; + tensor var_1115 = sub(x = var_1113, y = var_1114)[name = string("op_1115")]; + tensor var_1116 = mul(x = x2_19, y = cos_7)[name = string("op_1116")]; + tensor var_1117 = mul(x = x1_19, y = sin_7)[name = string("op_1117")]; + tensor var_1118 = add(x = var_1116, y = var_1117)[name = string("op_1118")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19 = concat(axis = var_42, interleave = rotated_19_interleave_0, values = (var_1115, var_1118))[name = string("rotated_19")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_339, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_339, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_1074, x = var_1073)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_1141_begin_0 = const()[name = string("op_1141_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1141_end_0 = const()[name = string("op_1141_end_0"), val = tensor([5, 8, 1024, 128])]; + tensor var_1141_end_mask_0 = const()[name = string("op_1141_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1141_cast_fp16 = slice_by_index(begin = var_1141_begin_0, end = var_1141_end_0, end_mask = var_1141_end_mask_0, x = coreml_update_state_23)[name = string("op_1141_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1141_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1143_begin_0 = const()[name = string("op_1143_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_1143_end_0 = const()[name = string("op_1143_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_1143_end_mask_0 = const()[name = string("op_1143_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1143_cast_fp16 = slice_by_index(begin = var_1143_begin_0, end = var_1143_end_0, end_mask = var_1143_end_mask_0, x = coreml_update_state_23)[name = string("op_1143_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1143_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1152 = const()[name = string("op_1152"), val = tensor([1, 3, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1152, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1156 = const()[name = string("op_1156"), val = tensor([1, -1, 1024, 128])]; + tensor var_1157_cast_fp16 = reshape(shape = var_1156, x = x_125_cast_fp16)[name = string("op_1157_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1159 = const()[name = string("op_1159"), val = tensor([1, 3, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1159, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + bool var_1166_transpose_x_0 = const()[name = string("op_1166_transpose_x_0"), val = bool(false)]; + bool var_1166_transpose_y_0 = const()[name = string("op_1166_transpose_y_0"), val = bool(true)]; + tensor var_1166_cast_fp16 = matmul(transpose_x = var_1166_transpose_x_0, transpose_y = var_1166_transpose_y_0, x = rotated_17, y = var_1157_cast_fp16)[name = string("op_1166_cast_fp16")]; + fp16 var_1167_to_fp16 = const()[name = string("op_1167_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_1166_cast_fp16, y = var_1167_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1178_axes_0 = const()[name = string("op_1178_axes_0"), val = tensor([-1])]; + bool var_1178_keep_dims_0 = const()[name = string("op_1178_keep_dims_0"), val = bool(true)]; + tensor var_1178_cast_fp16 = reduce_sum(axes = var_1178_axes_0, keep_dims = var_1178_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1178_cast_fp16")]; + tensor var_1179_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1178_cast_fp16)[name = string("op_1179_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([24, 64, 1024])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1179_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([24, 1024, 128])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 24, 64, 128])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_1182_perm_0 = const()[name = string("op_1182_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1184 = const()[name = string("op_1184"), val = tensor([1, 64, 3072])]; + tensor var_1182_cast_fp16 = transpose(perm = var_1182_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_16")]; + tensor input_61_cast_fp16 = reshape(shape = var_1184, x = var_1182_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755305024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764742272))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1195_axes_0 = const()[name = string("op_1195_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764938944)))]; + tensor var_1195_cast_fp16 = layer_norm(axes = var_1195_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1195_cast_fp16")]; + tensor var_1202 = const()[name = string("op_1202"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1203 = transpose(perm = var_1202, x = var_1195_cast_fp16)[name = string("transpose_15")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1203)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1225_axes_0 = const()[name = string("op_1225_axes_0"), val = tensor([2])]; + tensor var_1225 = squeeze(axes = var_1225_axes_0, x = hidden_states_39)[name = string("op_1225")]; + tensor var_1226 = const()[name = string("op_1226"), val = tensor([0, 2, 1])]; + tensor var_1227 = transpose(perm = var_1226, x = var_1225)[name = string("transpose_14")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1227)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1235_axes_0 = const()[name = string("op_1235_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764945152)))]; + tensor var_1235_cast_fp16 = layer_norm(axes = var_1235_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1235_cast_fp16")]; + tensor var_1239 = const()[name = string("op_1239"), val = tensor([0, 2, 1])]; + tensor var_1241_axes_0 = const()[name = string("op_1241_axes_0"), val = tensor([2])]; + tensor var_1240 = transpose(perm = var_1239, x = var_1235_cast_fp16)[name = string("transpose_13")]; + tensor var_1241 = expand_dims(axes = var_1241_axes_0, x = var_1240)[name = string("op_1241")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1241)[name = string("query_states_21")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1241)[name = string("key_states_31")]; + string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; + tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; + tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; + int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; + tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1241)[name = string("value_states_31")]; + tensor var_1261 = const()[name = string("op_1261"), val = tensor([1, 24, 128, 64])]; + tensor var_1262 = reshape(shape = var_1261, x = query_states_21)[name = string("op_1262")]; + tensor var_1263 = const()[name = string("op_1263"), val = tensor([0, 1, 3, 2])]; + tensor var_1265 = const()[name = string("op_1265"), val = tensor([1, 8, 128, 64])]; + tensor var_1266 = reshape(shape = var_1265, x = key_states_31)[name = string("op_1266")]; + tensor var_1267 = const()[name = string("op_1267"), val = tensor([0, 1, 3, 2])]; + tensor var_1269 = const()[name = string("op_1269"), val = tensor([1, 8, 128, 64])]; + tensor var_1270 = reshape(shape = var_1269, x = value_states_31)[name = string("op_1270")]; + tensor var_1271 = const()[name = string("op_1271"), val = tensor([0, 1, 3, 2])]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_141 = transpose(perm = var_1263, x = var_1262)[name = string("transpose_12")]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; + tensor var_1289 = mul(x = x1_21, y = cos_7)[name = string("op_1289")]; + tensor var_1290 = mul(x = x2_21, y = sin_7)[name = string("op_1290")]; + tensor var_1291 = sub(x = var_1289, y = var_1290)[name = string("op_1291")]; + tensor var_1292 = mul(x = x2_21, y = cos_7)[name = string("op_1292")]; + tensor var_1293 = mul(x = x1_21, y = sin_7)[name = string("op_1293")]; + tensor var_1294 = add(x = var_1292, y = var_1293)[name = string("op_1294")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21 = concat(axis = var_42, interleave = rotated_21_interleave_0, values = (var_1291, var_1294))[name = string("rotated_21")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_145 = transpose(perm = var_1267, x = var_1266)[name = string("transpose_11")]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; + tensor var_1310 = mul(x = x1_23, y = cos_7)[name = string("op_1310")]; + tensor var_1311 = mul(x = x2_23, y = sin_7)[name = string("op_1311")]; + tensor var_1312 = sub(x = var_1310, y = var_1311)[name = string("op_1312")]; + tensor var_1313 = mul(x = x2_23, y = cos_7)[name = string("op_1313")]; + tensor var_1314 = mul(x = x1_23, y = sin_7)[name = string("op_1314")]; + tensor var_1315 = add(x = var_1313, y = var_1314)[name = string("op_1315")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23 = concat(axis = var_42, interleave = rotated_23_interleave_0, values = (var_1312, var_1315))[name = string("rotated_23")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_339, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_339, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_33 = transpose(perm = var_1271, x = var_1270)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_1338_begin_0 = const()[name = string("op_1338_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1338_end_0 = const()[name = string("op_1338_end_0"), val = tensor([6, 8, 1024, 128])]; + tensor var_1338_end_mask_0 = const()[name = string("op_1338_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1338_cast_fp16 = slice_by_index(begin = var_1338_begin_0, end = var_1338_end_0, end_mask = var_1338_end_mask_0, x = coreml_update_state_25)[name = string("op_1338_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1338_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = string("op_1340_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_1340_end_0 = const()[name = string("op_1340_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_1340_end_mask_0 = const()[name = string("op_1340_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = coreml_update_state_25)[name = string("op_1340_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1340_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1349 = const()[name = string("op_1349"), val = tensor([1, 3, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1349, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1353 = const()[name = string("op_1353"), val = tensor([1, -1, 1024, 128])]; + tensor var_1354_cast_fp16 = reshape(shape = var_1353, x = x_153_cast_fp16)[name = string("op_1354_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1356 = const()[name = string("op_1356"), val = tensor([1, 3, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1356, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + bool var_1363_transpose_x_0 = const()[name = string("op_1363_transpose_x_0"), val = bool(false)]; + bool var_1363_transpose_y_0 = const()[name = string("op_1363_transpose_y_0"), val = bool(true)]; + tensor var_1363_cast_fp16 = matmul(transpose_x = var_1363_transpose_x_0, transpose_y = var_1363_transpose_y_0, x = rotated_21, y = var_1354_cast_fp16)[name = string("op_1363_cast_fp16")]; + fp16 var_1364_to_fp16 = const()[name = string("op_1364_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_11_cast_fp16 = mul(x = var_1363_cast_fp16, y = var_1364_to_fp16)[name = string("attn_weights_11_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1375_axes_0 = const()[name = string("op_1375_axes_0"), val = tensor([-1])]; + bool var_1375_keep_dims_0 = const()[name = string("op_1375_keep_dims_0"), val = bool(true)]; + tensor var_1375_cast_fp16 = reduce_sum(axes = var_1375_axes_0, keep_dims = var_1375_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1375_cast_fp16")]; + tensor var_1376_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1375_cast_fp16)[name = string("op_1376_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([24, 64, 1024])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1376_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([24, 1024, 128])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 24, 64, 128])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_1379_perm_0 = const()[name = string("op_1379_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1381 = const()[name = string("op_1381"), val = tensor([1, 64, 3072])]; + tensor var_1379_cast_fp16 = transpose(perm = var_1379_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_9")]; + tensor input_75_cast_fp16 = reshape(shape = var_1381, x = var_1379_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764951360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774388608))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1392_axes_0 = const()[name = string("op_1392_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774585280)))]; + tensor var_1392_cast_fp16 = layer_norm(axes = var_1392_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1392_cast_fp16")]; + tensor var_1399 = const()[name = string("op_1399"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1400 = transpose(perm = var_1399, x = var_1392_cast_fp16)[name = string("transpose_8")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1400)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1422_axes_0 = const()[name = string("op_1422_axes_0"), val = tensor([2])]; + tensor var_1422 = squeeze(axes = var_1422_axes_0, x = hidden_states_47)[name = string("op_1422")]; + tensor var_1423 = const()[name = string("op_1423"), val = tensor([0, 2, 1])]; + tensor var_1424 = transpose(perm = var_1423, x = var_1422)[name = string("transpose_7")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1424)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1432_axes_0 = const()[name = string("op_1432_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774591488)))]; + tensor var_1432_cast_fp16 = layer_norm(axes = var_1432_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1432_cast_fp16")]; + tensor var_1436 = const()[name = string("op_1436"), val = tensor([0, 2, 1])]; + tensor var_1438_axes_0 = const()[name = string("op_1438_axes_0"), val = tensor([2])]; + tensor var_1437 = transpose(perm = var_1436, x = var_1432_cast_fp16)[name = string("transpose_6")]; + tensor var_1438 = expand_dims(axes = var_1438_axes_0, x = var_1437)[name = string("op_1438")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1438)[name = string("query_states_25")]; + string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; + tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; + tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; + int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; + tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1438)[name = string("key_states_37")]; + string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; + tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; + tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; + int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; + tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1438)[name = string("value_states_37")]; + tensor var_1458 = const()[name = string("op_1458"), val = tensor([1, 24, 128, 64])]; + tensor var_1459 = reshape(shape = var_1458, x = query_states_25)[name = string("op_1459")]; + tensor var_1460 = const()[name = string("op_1460"), val = tensor([0, 1, 3, 2])]; + tensor var_1462 = const()[name = string("op_1462"), val = tensor([1, 8, 128, 64])]; + tensor var_1463 = reshape(shape = var_1462, x = key_states_37)[name = string("op_1463")]; + tensor var_1464 = const()[name = string("op_1464"), val = tensor([0, 1, 3, 2])]; + tensor var_1466 = const()[name = string("op_1466"), val = tensor([1, 8, 128, 64])]; + tensor var_1467 = reshape(shape = var_1466, x = value_states_37)[name = string("op_1467")]; + tensor var_1468 = const()[name = string("op_1468"), val = tensor([0, 1, 3, 2])]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 24, 64, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_169 = transpose(perm = var_1460, x = var_1459)[name = string("transpose_5")]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 24, 64, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; + tensor var_1486 = mul(x = x1_25, y = cos_7)[name = string("op_1486")]; + tensor var_1487 = mul(x = x2_25, y = sin_7)[name = string("op_1487")]; + tensor var_1488 = sub(x = var_1486, y = var_1487)[name = string("op_1488")]; + tensor var_1489 = mul(x = x2_25, y = cos_7)[name = string("op_1489")]; + tensor var_1490 = mul(x = x1_25, y = sin_7)[name = string("op_1490")]; + tensor var_1491 = add(x = var_1489, y = var_1490)[name = string("op_1491")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25 = concat(axis = var_42, interleave = rotated_25_interleave_0, values = (var_1488, var_1491))[name = string("rotated_25")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_173 = transpose(perm = var_1464, x = var_1463)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_173)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_173)[name = string("x2")]; + tensor var_1507 = mul(x = x1, y = cos_7)[name = string("op_1507")]; + tensor var_1508 = mul(x = x2, y = sin_7)[name = string("op_1508")]; + tensor var_1509 = sub(x = var_1507, y = var_1508)[name = string("op_1509")]; + tensor var_1510 = mul(x = x2, y = cos_7)[name = string("op_1510")]; + tensor var_1511 = mul(x = x1, y = sin_7)[name = string("op_1511")]; + tensor var_1512 = add(x = var_1510, y = var_1511)[name = string("op_1512")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_42, interleave = rotated_interleave_0, values = (var_1509, var_1512))[name = string("rotated")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_339, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_339, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_39 = transpose(perm = var_1468, x = var_1467)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_1535_begin_0 = const()[name = string("op_1535_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1535_end_0 = const()[name = string("op_1535_end_0"), val = tensor([7, 8, 1024, 128])]; + tensor var_1535_end_mask_0 = const()[name = string("op_1535_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1535_cast_fp16 = slice_by_index(begin = var_1535_begin_0, end = var_1535_end_0, end_mask = var_1535_end_mask_0, x = coreml_update_state_27)[name = string("op_1535_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1535_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1537_begin_0 = const()[name = string("op_1537_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_1537_end_0 = const()[name = string("op_1537_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_1537_end_mask_0 = const()[name = string("op_1537_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1537_cast_fp16 = slice_by_index(begin = var_1537_begin_0, end = var_1537_end_0, end_mask = var_1537_end_mask_0, x = coreml_update_state_27)[name = string("op_1537_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1537_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([1, 3, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1546, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1550 = const()[name = string("op_1550"), val = tensor([1, -1, 1024, 128])]; + tensor var_1551_cast_fp16 = reshape(shape = var_1550, x = x_181_cast_fp16)[name = string("op_1551_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1553 = const()[name = string("op_1553"), val = tensor([1, 3, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1553, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_1560_transpose_x_0 = const()[name = string("op_1560_transpose_x_0"), val = bool(false)]; + bool var_1560_transpose_y_0 = const()[name = string("op_1560_transpose_y_0"), val = bool(true)]; + tensor var_1560_cast_fp16 = matmul(transpose_x = var_1560_transpose_x_0, transpose_y = var_1560_transpose_y_0, x = rotated_25, y = var_1551_cast_fp16)[name = string("op_1560_cast_fp16")]; + fp16 var_1561_to_fp16 = const()[name = string("op_1561_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_cast_fp16 = mul(x = var_1560_cast_fp16, y = var_1561_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1572_axes_0 = const()[name = string("op_1572_axes_0"), val = tensor([-1])]; + bool var_1572_keep_dims_0 = const()[name = string("op_1572_keep_dims_0"), val = bool(true)]; + tensor var_1572_cast_fp16 = reduce_sum(axes = var_1572_axes_0, keep_dims = var_1572_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1572_cast_fp16")]; + tensor var_1573_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1572_cast_fp16)[name = string("op_1573_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([24, 64, 1024])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1573_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([24, 1024, 128])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 24, 64, 128])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_1576_perm_0 = const()[name = string("op_1576_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1578 = const()[name = string("op_1578"), val = tensor([1, 64, 3072])]; + tensor var_1576_cast_fp16 = transpose(perm = var_1576_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_2")]; + tensor input_89_cast_fp16 = reshape(shape = var_1578, x = var_1576_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774597696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784034944))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1589_axes_0 = const()[name = string("op_1589_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784231616)))]; + tensor var_1589_cast_fp16 = layer_norm(axes = var_1589_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1589_cast_fp16")]; + tensor var_1596 = const()[name = string("op_1596"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1597 = transpose(perm = var_1596, x = var_1589_cast_fp16)[name = string("transpose_1")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1597)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")]; + tensor gate_states = silu(x = input_95)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1619_axes_0 = const()[name = string("op_1619_axes_0"), val = tensor([2])]; + tensor var_1619 = squeeze(axes = var_1619_axes_0, x = hidden_states_1)[name = string("op_1619")]; + tensor var_1620 = const()[name = string("op_1620"), val = tensor([0, 2, 1])]; + tensor var_1621 = transpose(perm = var_1620, x = var_1619)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1621)[name = string("op_1622_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file