diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10505 @@ +{ + "metadata": { + "ParamSize": 867, + "ParamBytes": 6619494912.0, + "BitsPerParam": 4.3451667303689145 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 503439360, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 262208, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 503439360, + "byteOffset": 0 + } + ], + "md5sum": "ead30219d0421f04ab408552ee845a92" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 62929920, + "records": [ + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 262208, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 62929920, + "byteOffset": 0 + } + ], + "md5sum": "885ec15c62b36f400e2fa10ac1479175" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "afd8f5ff1b4b6bf3d3d766d8a8a2cf08" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33185280, + "records": [ + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 7680 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 29498880 + } + ], + "md5sum": "0b55f9cb61f4cae5548ab2d2bd2e9255" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "74f998eb2b4ae14017d1c25ede0398b9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "df822705d1b79000017ef82f6c895172" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "b4f304aad94c48e8da06e16a262c46d1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "a7c5712341513696c0a9d03f7871c45e" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "0619ec49914dd505f390e2e69834ae4b" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "3d93e16b23d77b683ea49bffbacdf704" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "fbde2261e9126ad8bcc990271a47694e" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "50a9b8c0a885208d46e3bb4374e6fe83" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f7d8a2d37625cccec331d6672410fd6d" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "f072fe010bd84462490f23591c47201f" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "288b799ed8e578c84b7e6b67b6891183" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33424384, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 29491712 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29492224 + } + ], + "md5sum": "898590bc5ebf6d6a2df04286d92932df" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "2c4b2cbc43eba9a21c25851058522599" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "248d57aed86302d195ee682c2e6dac72" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26304512, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 8355840 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 9338880 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 9339392 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 17203712 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 18186752 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22610432 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22618112 + } + ], + "md5sum": "93d83ddb8ef9e02d9f4a906364a73dd3" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "cbee960bb7c4c48daa9156f86954991d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "09fe65d1a90415a56b27c5e135bb213d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "73c77ead8fcf554605df5f7ecf9fc7c3" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "e80af94bf30289efedb40409b798f350" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "5e439dd9302d4a13da2f2b90575054c9" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "a7aeddaf22d9aaf6255b0d20bc8bdb01" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "476e2693741695d3b424396a91d7b8b6" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "25ba5840d77168f11dad4e679c004982" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "0480f8218f10dc7e65a57339aafef20e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "1e627eb2ed0a2652d004143c1353d0f4" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "be3bc8eade0fb9a6392fc3a3c93b9b7c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "3f4bb1ff4c288496a1e7b8aa11df7447" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "2f708e4c3ef0c5484790bde802e125d3" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ded4385d608322b170c3aa4eb2a5475c" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26542592, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + } + ], + "md5sum": "5fe0e2a4ecc50307a9c37730f690194e" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33424384, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7373312 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11305472 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19661312 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20644352 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20644864 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28509184 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29492224 + } + ], + "md5sum": "33f56706cec7168f552216af118400fc" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "6f6cc80a4b81976615d0c3b4c003feca" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33208320, + "records": [ + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3694080 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3701760 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3709440 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 3717120 + } + ], + "md5sum": "fa94df2df398acc63d1b649aed3a79be" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "1353b93438782e93ed9500ef37bacf59" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "bd2f0d39e3563919194f4102685ff2b3" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "781cd922156cad827ee5b0e806ee77be" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "b1716079762aaaf9a178fc91edf2ad0f" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "e4e163b9be800966a6558b594b38e721" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "9b3f9c8ef2bc541da2868ede9ab3f64c" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "97aadbed5def3030bab55779a0d58af9" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "23bfefb47a1817ec46fc6c521fdd3e66" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "80d320b3232d2b78c69aa1f5e257106c" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "9f099a574f79e23b80f3611322322917" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "d1f707c630c031d967372c5224990110" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "6a2019e48e90831b1cb3e4c94018a109" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "62ec157cfc80db0d14525bf738d66a7a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "21f381379a74211e448a1a6a2a0c616e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "87c0914ec9a604f418d3e10f2103b45e" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "ad4f868423d60e100358ef0870e7bc95" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33208320, + "records": [ + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3694080 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3701760 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 3709440 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 3717120 + } + ], + "md5sum": "9f3ed00f35adf73436ca3dc8d8844870" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "c2a458b0f33d55fc6eafc7d899cb78fb" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "4c8c6317c800fd2138fc7134fce9af05" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "4c20e80aee02935235c609e9eea6f107" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "4b7981ab5cd36a0a2aabb1392b4ccf83" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "4d5b6ac4df1b223cae9712349ec937d1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "88b00a27a47a707fd41c45fd0126f1c4" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "c816da3aa729b87d2a1c93ccd66b8664" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.18.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "d168ae90193d5eae047e17515f2b2eb0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "94eec85e2312810d10987a147798501f" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "d95ee6f803fc6a1e0690ed15dacfec52" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.18.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.19.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "cedd3c28c35f185669dbce5ff782d294" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "7d29a7d6d8a2ce2ac4229af3e2fdb65d" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "2d47f320e2a5292fe1d82e3f35d9aa54" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.19.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "ff97d38a08a1f6c3b71a7f1e671d5cae" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.20.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.20.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "8dace36175b2a0b0c5f08768d9d804b4" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "4d4e16491935d1869664e119f01a659a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "a1265acd674b560ee2e12361a89e31d1" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.21.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.21.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "da6e6cba53caa8f27e542ae33fb7238d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "7a52ea24d4fc1e67559210cc15b250af" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "3d2271e86e5218bd58513c68437a0c7f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.22.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.22.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "3c209924d19e1682c6013fcaf00dac04" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "84fffaa1f00f202e82330808eb401fc3" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "2bcf08dd17d8998d53041c823fd5a4d3" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.23.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "cabb3a85bf0225f4a6eb3ee32bfb436e" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "114263b9364041e11952c7558b03646a" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "bd81959ec5557259257ee86d1244857d" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.23.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.24.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "a55839f59155618dc85e674cd080314a" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "21d7e05d81db19bbcc534648acf26b36" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "5437fb33102b2c93b7c02277fcb8a468" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.24.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "d60f3b5ea2e7282b3c345d59061b3b04" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.25.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.25.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "f25475fa21cbbaeddd251e20517cdd87" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "4a76d1cb7f3facc1a8724caaa3ae47ce" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29983744, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.26.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7864832 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 12288512 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 20152832 + }, + { + "name": "language_model.model.layers.26.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 21136384 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 29000704 + } + ], + "md5sum": "56b4387000e8e51a9c6e56168aa93c45" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "653e8b5a2355f5c9d032c3084dfe5c49" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "b274bb7e08b0098345e41d3091785ee7" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "946fae092a98ddbfadccc8ece69c4d3c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32502784, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 8125440 + }, + { + "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 8133120 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 8140800 + }, + { + "name": "language_model.model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 8148480 + }, + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 11834880 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 19207680 + }, + { + "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 19215360 + }, + { + "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 19223040 + }, + { + "name": "language_model.model.layers.27.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 19230720 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 19231232 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 23163392 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 23654912 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 31519232 + }, + { + "name": "language_model.model.layers.27.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 32502272 + } + ], + "md5sum": "a8256f98bfacd7f8164416bbbc3147c0" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "d8685f353f802247ff11df2ec5d2dada" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "5e0ed3ba421923fd1974090e4bf98e30" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.28.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "2a492c87378aa203a172a8dae5db59dd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "598782986b9db661fe057576c47113ba" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "55b86d308813dcc7e2679ce05552bef7" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.28.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.29.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "908ba045cfac82b30de91a9467bcb33e" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "554c75f5f51b202e3783503e6043dbee" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "596d5537e1bd48bc03ff7e67458581ac" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.29.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "5510a40e68963bbc84ea16465c43218e" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.30.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.30.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "1f6eded35e3270ebf5db435a1664fcda" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "5309f29050b081a0b67630da5579a516" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "c9c7f992d49d93f32704faf37560fa22" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.31.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.31.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "0c587173958a45f6ea069eea592dde8f" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "fc5a84552c6ad2b82c7f5d94ef6b407b" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f241a932c093e7fe6217cae6499eb6e3" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.32.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.32.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.32.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.32.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "e6239f6f3318b21340d90cb84586be70" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "e631b7ab93324872d1d59d93a4f86a0c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "010fabbd3df69efa9aedd8dc2c6664ee" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.33.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.33.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.33.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "c24617cd0a6f1e0f7396dbcbec6d3cde" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "b0fc037529cee6db8e11d26ba4a0b08a" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "e809d8aada554295219cefb6f995bf26" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.33.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.34.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.34.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.34.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "5b662c4132a9180ade6ec0103dcad7ba" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "037b93dd6edef20a5de16a8a1952b146" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f8511eae8c6c89fc36ab39bdc450fb3c" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.34.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.35.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "c9dff1c21a56eb9e8980cb0dbf6a0ad4" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.35.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.35.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.35.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "d53125beefa318fb62b5235e385704e4" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "a22dfd7e0e7f7c2b7e704d4275b3a65a" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f2e3440f7ecd2b352ed2b98d1f887067" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.36.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.36.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.36.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "820cd36ac8675d8ef54ceed4d7b3d629" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "400017b83e7e2db23f29cef1b4545bba" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32932864, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.37.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11796480 + }, + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15729152 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 16220672 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 24084992 + }, + { + "name": "language_model.model.layers.37.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 25068032 + }, + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 25068544 + } + ], + "md5sum": "768b302964e2c54eb77101383c5c67ee" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "45bf5664eb45e585b662532fb31289fd" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "4a1b5a807aff3ba01387a3027fe29e46" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "1b01c3a937d16ade4c404bd8830438b0" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33485824, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 983040 + }, + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 4915200 + }, + { + "name": "language_model.model.layers.37.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 5406720 + }, + { + "name": "language_model.model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 5414400 + }, + { + "name": "language_model.model.layers.37.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 9100800 + }, + { + "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 9108480 + }, + { + "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 9116160 + }, + { + "name": "language_model.model.layers.38.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 9123840 + }, + { + "name": "language_model.model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 9131520 + }, + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 12817920 + }, + { + "name": "language_model.model.layers.38.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 20190720 + }, + { + "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 20198400 + }, + { + "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 20206080 + }, + { + "name": "language_model.model.layers.38.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20213760 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 20214272 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 24146432 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24637952 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32502272 + }, + { + "name": "language_model.model.layers.38.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33485312 + } + ], + "md5sum": "22e1a7981b0bc7adcc6270a763b3e473" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "43859a0b0d23c1f9398943b70aded7b6" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "a9c17213ec10623d8bdeea2aea6e8300" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.39.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.39.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.39.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "dc5577e75604227be31a06b944a347c6" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "a2ec80be48fd9c81cbfecf7c3fb66fd4" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "92873ce8b1742681275d308cc284c568" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.39.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.40.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.40.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.40.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "b5f3d49d21b15387b4345ec8208f30aa" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "b259c05fe5ee110b505edc7222f9be89" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "1bb3181268b8cbb14a57083633a27a26" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.40.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.41.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "eb3c9eb295dff256a7dbc4982f4f6e37" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.41.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.41.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.41.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "cd70f818c33dc72d7693896de5900c3d" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "adb5846c5483e3373057c279283b8868" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "4588b8a21015923b9da423c1d21da198" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.42.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.42.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.42.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "cfbebdb141a7817c12450a800541f2c2" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "16f033ca20a7c01473289243ae49523c" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f22a763c30797635cc16d11f6e23bcbd" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.43.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.43.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.43.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.43.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "c776fe2993013a5b4f84f43f2b1073d5" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "faa62347cede42df45f42d2c68a52c88" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "62fd41ff833fc7664e9c99c3b6191b88" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.44.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.44.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.44.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "461c2ea035be9229692748db679a7652" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "296ff4f9d7fd951421959d3db2340df9" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ec99ebb85c7b07e3e5ee7ce272e07529" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.44.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.45.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.45.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.45.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "2ff961eef30fc8629761ac316ca5a930" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "39a57c19299e2cb05992ca665b5348a1" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f64d2cbda9c58e7fd7c9a8cf053006c5" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.45.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.46.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "174b8da767654df4687af438dfd03fe2" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.46.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.46.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.46.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "9ffeb07b80fdd409195a020ec6da1acd" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "84789ff620e20cc29870cd68389c7836" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "7e48014863f4dd9beb1cd03157b0c533" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.47.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.47.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.47.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "177cbfddefe95f21f681c91d8a90843e" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 4431360, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 3840 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7680, + "byteOffset": 4423680 + } + ], + "md5sum": "a604ba7dbd311247e25554f194d51245" + } + ] +} \ No newline at end of file