{ "metadata": { "ParamSize": 805, "ParamBytes": 43115020288.0, "BitsPerParam": 4.836979883651508 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "0f573cb9e05fa0794376371f6fe559da" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e4954c99b7195ff90795e2ab53458ac9" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "99467331511ae936516bc48f0e0754ee" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "06162130e87e6fbf1b175eefafcd51cc" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a4570a59e89824fc0d9bd7a96152f6f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "43567b5eeaad6fe27075a4f69ce5d422" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2410c0e27d07be455dbc83443118e75b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "103785ec358c0937687a9b8ed0488b05" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "768b9766d2457fd122f972d0b374b507" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "09bbba6c0cea9e800795d943670fe51f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "87667d475eb4b98f506ed9da0bc40f3b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "fc5e1eb714e755bf93da3c0a3d0a4a27" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "ecc8908bb2733df864be668b19d1a9ce" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e65f0d22147263c9ec0b98ca90c99e1e" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d703739d06c64bb941abaa17f8bcd874" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2a810ebb4be5c9d2fe1ee291301e0e72" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9111ff1a331d1f4da3c99a728e8c5624" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "f0dcb36ccb9b75c58407ef71c932de33" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aa74bfc2f73a0ca93d947eff31a99ff9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3768b1eb862c1c60eab87dc06210d1a4" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "09fa84362409d757323582994a5153b3" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fe154d5d4e02723e5f0aac268469594d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "012cba8af5fc8a142c687222a5c549d7" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56cd54f1407258c134a5f97ca62dd5eb" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "c53c32baca97805eecc823fc14cbe1c6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c7ccbaea410e0654c675594c1d7bda60" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dce7c86ca8dbb8d5f17f8194deefaaf3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "74a227352ac7317202ac5292ab13da65" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ff04d8658d5545e3173a7b86c00aef8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0a288b2e3cdd00c8e7497903beba9a6e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5df5e2122177701d0e52af0663f497c6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5ec584f04a677313c03b5045516cf2c5" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "56ab781c9b3ce9f2dfe9f4a7070c9b3f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 } ], "md5sum": "26a57936c68b5f07ffa5a764f4e6a896" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a83c922f59779db786a40e9ef96ec97e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0a4f3feb69eb041526a73148ab9d04e1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dcb312b3d342a518e25203cee3277e9d" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f9ac7f6b03cb7d19b37b958a6e729b56" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7d737c76324f9df1dfc58a6535ce561e" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e6c6531121810b908ee65b57b1a6b4ef" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "2bdad7cf8ff5c73c067e29bd0d4cb8f2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2474df857fee37c3fee643eee20ac46c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "565ee9bfb37f175ff28533cc361060d8" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9791b151c58ce2216d5320ccffbb62a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b78a5b0d801181de5ce3ee1e0f5f886" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1bded7b25d48ab88602babca02ed3df9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "95e9c741bdc80c03091aeb9170c71fa4" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5c52fc0abbfceb4c5ac3713b71064d4a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2066f2275dc826b92d8a689b87036c13" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9a25f2dc9ad5245a8f856f9ac52f6ad0" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a1dc6784ebe11884429e84211f48e23a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1fd3a4118321bdb5178e87b5f3cff4d4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d7b69c5f55af40639821f816a5a271b3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7f9036dd082360ae02c1cc44f5d7793b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a7e2c7c4e9922f796edd08d567b562c5" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "da90a9990e5d4e169f725d9dd683aa5a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e5e4e3d694d51a192df26b72d73231cb" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6e2f06eac789c5d601293664ee98bc01" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2ab73aefe3547c817980723d689ddf59" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d5a07431e81cef8e455072b47cb04677" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "630d6c08de09b18ebeaed11038bff14e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "d1f403425729e5096d2780d08cb76426" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8ff0123722c76318068ad3387387d4bd" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "022e297d677027e49d2b24c621f65603" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "cfc3b404f88edb3df445adfe3735b821" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "66d2158dc54e4574b89da1582ca27b76" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "901444b4a65718586c33443ffe0681ea" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b39876b85cef005925ce8ac577565f07" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a390dbc40b34676d027d0d5f8f1f9758" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "98ca827f33e697c848da4fc8d3649c6d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "b546ac9d2794b8dddf39682a63fd9589" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b55f2139d4c1092f25ceac014019cc23" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0e4f6466cc7260e8a84f787609dd9543" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f6d24c6a98349038200cf4e8257294a0" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c65a0326a0c9c203c9ce124ff6d39e5a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50d339377be6c26ceea20f66b8c92f1c" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "08f5debb87d350cb140e1d27aa0f2025" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "10adda684f61ac1be21458d85c906cda" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9663f636eda80665198097953f496838" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "a8fa31567791361b67451e59b720bc8a" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bf9d043facceefb224b565f6273c9b9f" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "26c2bb819b1265fae99e8f09b08bdc45" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "4c3a59f78096d9bd0ed4c18a0e4a7a88" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "196f2386f3f8f1069e36be9e4f7890ad" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "db7a5e94b70bd73370a94785006b9bb3" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94d267da4b6a029baf6b73d9311e19a2" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c323f169c6a850844187f3c4702771ee" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a034185eb1b3f594b321ea5763fb8c31" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "9a31a9d9b8d99cafe3cd737929490df2" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aafd6ce19b9c8be58536f54d33f753c1" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "47b1995281b125cf2e96125661fa0fa8" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5b7f4d497d6c405cc01446c3702b2d3b" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4acfd98f9eb1fe6c93133a21a3b9325a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "367473ca4cd4dce5d515aaffa4f97bec" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4f2fffce4f8c7859ef40f49b9c9f6dcc" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "b894bb0c1396f0321f7a8b59a75ac30e" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ad83b1f83adafcaa465e7acd91efdeec" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9718f6ff6636b657adbd326ab717588b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "63418c1b75b917840a37cf87dd72f94d" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e2df14083bbe4619fb613ff5c38442f5" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "dd136a9e9d7354580977178f81ff0182" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b28713c029735640346149ace9d7aee0" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e3ba06ce50bc5970228793f079328f4c" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aac5c744d5a20d780d6d54e09f70aef4" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a43ecd7bab14edc0ebb308a5d234a822" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "70cd0abe595be43f10d0c3c1aa18ecbb" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5d451ccfe2f82622fce6aafd0e99964e" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3c61e0350736b0badd6eb91b043fce15" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4e6abb3186670c352bb6e4356b10eb36" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4965b9c2c1903178058cad4ba03e00c2" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "65817641c5f4b50e21f39b9aa1410638" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "941b1cb2833ecd20614641dd96aedd2a" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f39421bb762aab4c28b61974e550b44c" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "dfec3db5f6f49a761332842ff7a7a6f3" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bda328c1cf39fac95d943c7b65fa8137" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2202a9cc4147659af0c1215872b92201" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ad0051e7a0d7e5d2e2dd8170e92151e7" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "38021ee61d527d941d676fa108be15b0" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "d1d9a75a5427c36b5a705c0e6df8bc23" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e7fd19281a71d1f973a2abe4b16e9e8" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c76af95e39f01d9ac3f564aaa565044c" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b532983f277d5c38bcc0b7067cabe522" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e16da84b894561309c2313870c6af5ea" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "61c08013902e09a85b6fab9c831925b1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a3542a6843bb351f6fb6c1780242f6d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "ff4ea0b9c868f51599291e67b10e6b8b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d5b57164988ad36e571cab8371462856" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3c09ab8fa2dc10ab59ab67ac596aa7ff" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6b83fade58e701bc41c1a0b56c5ed682" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "91a8b0e3fb7ef44bd578c775d3654cb1" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "17f134ee80846f5b0134ee2079320961" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0bfd04c3452c7d22754d7e0389d971be" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c1b3b64e1b1412775e882118c9ca88ed" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32555008, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 28344320 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 32538624 } ], "md5sum": "df49e40d37fdbc3c658a45f2848ec8f8" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f5c12608be73266f3115a4384433a68b" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bae17929fc88e29ac002cbd8ea9db7d4" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1bdd9aa5b253c7b011f60c548f7b3ec8" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f5b21553e26278dc6cfeca61b174193a" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6b90708e64301aa5b80e3fa7b1a38808" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "732a8498f31336d66e63455211f1c92f" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e05bd8e58dc045400533d69741f1d992" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9450ea4597f2c94bf7ddc5f3b7da442d" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "90cd6136acb8ead3847b77c8f738bde4" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70f553f8a228a6ed408c8da0590843da" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2ec0ced34a59a40d76c1e6586d21dc34" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "458b5a1a9c39304d245bc8d81774c45d" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a44e34997ba8fbdf231cd1cc7db89d52" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "81e02c3be384aeca84de84f9e9ee977a" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a37d1921858c4c0c3db2c0012d15ae4b" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f129cd91d61b9e1fd9608ad6681b4aca" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1c40f0917388a8b2f4520eb9892041d6" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4ae0c0f0015658ae66ee5b9ac69ab2ef" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d6881da135118d0baafc589cfee75f92" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f3e8ecea6ae5449bcd722a956f3d88e2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9ef7d3ad0f95a4b9cf8a64327b5afdd" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "464c79eb46f363150ed50ad1079900c9" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b595e301b849782bf69b65f20b2476ee" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bfb9aff7edb21a3124767d0a41bf15fd" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a60dda7f0e679ff12c189d8dae9db00a" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a378bbf86d9d0ff4d57f658a8d1a1c25" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "ec77fad89498f9f02c98fa23758cb2af" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "04de1010b9fce8aa8c9b63733fc8a06a" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8a330c9a9198ad9c760369147aa129a8" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "6dfb0d9227de5c556c403f2633a5e165" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6f3cca1fcc83735e8ecadc3f68c4d477" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0dbaca80fb98fb96dbd4a5c36adc5210" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cff70d8122c8f92aebacd7cc77772d6a" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "380f73d33c0c34e9e6705141fe15e8b9" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "addc73d1bb503aba1415b5b2675921a2" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "77dc33566f120a45006f508a63d9310f" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b21fc3f8d463b371f6978b72b73e090" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a0dae5d3c8252e53c478dbafc2eb7611" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1912f42209705cc8c5e342ba7962a099" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff91d8886dd1d546c0f0d642eb2a0181" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4a4cd726b0f31d1072bbaf0f2bdf9739" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "967652e884e5514f54833aec3f5250e0" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ff5ba3e20cb6108a280cb791def522e6" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7671d044168277ae8b950bb4750c779d" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "661e3b2d989c2ea320876d5b25b747a8" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4d1a466a881a179510bebf0befd9f7b7" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f89538547ad2ec808ecf4e90502bfd7b" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "202f075122aef0c2fa30f1185b974cf3" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2ad62111b204d666b8642df40bf89738" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2423e725ccdc293e71447c3b7a836ac6" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "90603cde66d9f6beb6d6a7c039663945" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "96b9bb49cf6768beaaa34077511cb144" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "58a52c8edf6aaf903cf07deddcedc45c" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d5f0c6138d18d408167f855ffb1999c5" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d2f225aef788ecd344d388f3653e4b20" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b373c8735896437d7e0071af18a93647" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "465620fb85732bff5516016f77ac63b0" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "55d9c7ce663ab0f3627afb66dfa6dba6" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4db99c21715797a707f461a0e485528b" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a3c73a9136938be0f481fb567db33cb7" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "e746803c4e31d07b93797cc29dd55f2f" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "868f86fdd4b29154f08380294eb087ea" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "348c9deec664110cc774c6a4a77cdef5" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "64a0ccbc712ebb9957c3a7bec5dbeb28" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bf70714b68612c0169d13a200ccf05b0" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "a85b09ea58c2f5f337f2b84e3caea00b" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee205d5d11d795af7bd1cd4f238d624e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "286a3c74fe4aff5176db3bbb51bdee41" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "78313257e5d6073a58c96772d7b0dcd1" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e553455c63681e12cf940c810a380853" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "47ef1ca127bd5e4f1cc2aa5c395a5e30" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d2ad13096163129d7cffcc7f8c739d85" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d62c2a17a5da9bc1e2df8d3b43ea3ae2" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d5581da786635309d3ba4bf2de3f0c18" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "30c7754eae81f2124d7b06a9f743f176" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a46a93217361907685a5a15f2364408f" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f1cc51d5df2786ea22e58067cc323473" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4281d95321f78f8d056d518f4eea03cb" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "816bac1ef59ca7c5f13221dbd58f3fe3" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "384393d0c0c598da23b36533353033a8" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a62d5aa67197cad241c09b07743598d4" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dca4becea6d7f786213693f8252846ae" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bdf44ff59482ee7d5f4e16b93686b560" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "02915b84908171bed4aea0c45f705536" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8faa8716585e1dea36a38db92f72347a" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "956f256634b05088b98e5b4f68018dd1" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "081a78867ba45eca7ce95f366872d316" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3562429ff4aedd5660602197055fe6a6" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cbf32d1fb443e3a7189287b7d6d6e058" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a27144ff478e6e452cf7acddab435129" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "60539a25d0a907862f343e7f1e27e8e6" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4d5988332dc72c8d045f12b494a694c0" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bbdbc9811b3de559c6704f048aa227a7" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8596f39412dcad3a9ddbd2345022f1d0" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da0ccb653d7774a6f479cb911cf8a1b2" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "fe2705afe7754e04a82c271cfc4e7d08" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f64de3e656ed6c1f1aead5435502da44" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bfca12ccf7445da364669bc92f0cb04d" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0b7d47f97d4632404bc92ad2dbb87de5" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5aec8838ac119f9fd194ca751174d6d7" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7693e86b88826509923b34fa9eb1f66e" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "34cfcb81d448cc573e7ae345405825d0" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c969c59774840ab8dab8248399be65d3" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "674e86191f4eb0f24d0b4a673626c1d1" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cdfff93b4553b88bf627a1abccdddba4" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "99dec44b65862327fccdf2529141240c" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6064069eb9bff920360f0b4ce899cc69" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2ab957c8f4e84a2fd05be76e80880b5f" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0e9eecc1e552b845995c4680593d635a" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "73aaeaac38e1be64624064457e62be78" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "abb1be8b141467b006401ad54c7ca27c" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3773a784c0ec31b15c1c35b91fceaf52" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2b159cb8fbde32521e8bfa0dac83808d" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "ce0d42438aa56114dcb07cd232771939" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fb9551dc0848dab2030a2937b79e8339" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cfbb4623faeeff4dd0545ef3ade0cfe1" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ac0ddf42d35dde3f8851e9bb2737c7fe" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e2197d1d10b95ba1926cfe8f8f093816" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b48499377d4aa2fac066cb8f404f78a5" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a530bc642ac71b26240cd2f5264d8c72" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e48bdfc193a51c95b961990f252cf349" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fae8e8eddfcb8d368a8cae2f3c73abe4" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b1319b5a402d70e679662a2a1895bca" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5d673f6a69258aa230a03b8d10642d82" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "637910a52cbac7f024f2d7819bf58b3b" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f7c21f3e560998a7edb43c2b2305f9e3" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4f08b1180418b74f9aec731dc6e3db9a" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9d179350f161de76f1c63dbfb43d5fa1" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "a1130041fd939eeeac4b1f63a847dc6d" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "04752ba225d400a275a5e1b8896f48da" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e328baaa1f492af093a6d472d2b8efce" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "b610a7d662f132e668e1be1c4520948a" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73af0eaf07e614782d1c264a89c2c2fe" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "78774f23ba9c07665f4a24af3d4a733b" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f598e2f3c2cabc1e8d47ec9a2127d30d" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "36beb5a991b29380f03607099caf52f5" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "55f7a4bfd9c657db621f75ccf27d537d" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5c93b69c55b1551d479f2bf4d696f48b" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0988dc088328d6567af359cb6a3e7eac" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0d146e7652e0a0e2a4e4111990b3c026" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "251b756238241c81f605c5944b5591e0" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a9c80b2e1977c801cdbe60687ae2745" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c670d55527381320ee7fa27e295e896" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5bc8e8a1e6cf4fc852bc478b5d9ba48e" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "bad2ea20fb44bb0b205db2dcd39a3e79" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "579f6c04cab75bb251fb24972e39e75e" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7a5fbb4e92176ebf47b2ef129dd8e090" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b7f271b307c72bfb9bfa5ade541af9a7" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cc6e469c4c129a271bf918506444aeb1" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "c1c725e3136551fb5092e563b50ac235" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eee7fbc7e4be98989923ded30393e789" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c69dcab96b61713aa36923c3c8aef544" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca4883e969a46e6ab14bf5f18b646182" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2070e47f26e48f50333a02644729ccd8" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f8dc0c5838a9d13dc567a7b6415c6252" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "3e591efb8cf4fc449651d64ddd698665" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "abfb86c9dab795d0cd3c8e18c7f74e10" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "317d870a868acc6d7cad27a5ce6ae507" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2cc63b5f5fd63a900a8f3f08b2c8a5a8" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "44287434836aa7b0bb126412e7ed2f85" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d845add9847fb1c4a194d720fce5268e" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3cd70f841169a517b74dd3fd70b1073b" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "e529a91c17e3770c4b8c9b5a799695dc" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d49c4865d45a82c583ca4ae806e1ba17" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6d2fae6d35a075c941e7ebe480efb92a" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5450a48935d9c38413eb55f53b9d96c8" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "30b4d32673e23bea9dea6b7b43ff9a54" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6df4c238895cc021eaa1de3b3bedbe68" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "619bc46f71ad12be213623ce03be4473" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "807af6ef4c62936cabb8a3e4d3b4df9c" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b2da474c31781c9a39ce651e4a0d8a31" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "49fea7b24d0bdae6a638f9ed691e0008" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "db8529b2cf74aa54dd916c186bb5657e" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c5d3a38fb1699150bdef37fcdfc022d0" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "11abeed7c5e007e21b19133d9399e005" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "998534ad5dff1d1675b871e7823777d9" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "5c0a059c8a3923c161b09410e6884a2c" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0dc08981a460a7fd809c11504c90b8bc" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8c51a5c65ad5761d197f518448772ca8" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "968270e3a91656ed7d98e808c87ebfae" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5e80eafcafc5ca781e568ce1ad3872e5" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a63f9f4891eb888ada9cca6a19b4e346" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac212e4271421005af14132b2bbae738" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "8d930aae8259202b923f6fc8de5ee879" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e9899cf8417cc32ea923da5d07864425" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c81468f856468b0ab5c7e116d4a26e1e" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "811077fd297dfd69e7fb59ddb3b8f9f7" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "975ad5db032f349b05a05c2573fa2c40" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0df61a316f9071261f280f163ec2545e" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5a5163e224f85c595675a69e2e6c04e3" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9072ef157479f943d868b2cee8b4858e" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "014ee7128ac8828c5618ed6983dddb2c" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ab7d52fe349a5ed9d3995a639c8f200f" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fed05e4b4db893d119cd7d322eb7a7f5" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b1e290d372bc61835c8560a910fe8d9b" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "843088601a68c6909a06407dad331354" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bacf079c5e3a401ea12b10e08aa5dcd2" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bec9bbb78ad7a0b4e2941e32599e4856" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "20ad50656bf1277fe4c5b0a904f08d49" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c992ebf7056dc3d2a1563863516b6635" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fc9de11188942681d5106f0771f00b79" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "b31173ccae8f75cbe1cb73d8f44e4815" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3b4ff3e03180123a805aa8439da090a4" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d62d124d6bc0f7685d40582294d33943" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c14e7464564527f317fd50a860af66c5" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e74e5ce21eeba0679b8c31df207443c" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2d11402dd644423ebe98cee485027768" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b575b616d0a7fce55b92435a63a14bce" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d1f6a4e54c78770368aba42f8906c4b" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ba3d0e9aa0b03a159c51560d46da5f4c" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "ef8a8840af60b4a1d5eb2c5beeffeee3" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "77e5896f997dc1db329dde0a62a2dace" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0061d2a27578f0b06f2e2888e049675c" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "54dc452ee5001386e15776ff90f865e1" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "90cb6ecdf84905ced6e672b1ec6ac747" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "347e9051db6332b33dcb0b9cc07e78e4" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6ce0aa351ec9a17b1e111d3baf651e82" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dddfff218f70e295d2e634f74a77a0b4" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c32dee158eca2ecb776ac000436a2f78" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "7a812a31f1edf56cb203fcc428c64808" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a94ba3305cd7adf871facc4b1d1c9462" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5181a0f7d9b103c8f16eabe7bea8ed30" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "df768951d4bcc581389a530f7af02a97" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "552cc5c82bc0201c9f4d66c9dba8f9ce" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2dfa4fe7b95ac23c872c74ca388c0ba9" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1091f1996bc838b9f497ccc86a76934f" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "57b8d413132e63bd8d530f24a7d53406" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7cd0e86a1c3f6a81b1009a0cea327277" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "478116668d001cd6b7d790c0d04bb119" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "917c87857f004fc9e90b8f83cbfcb37f" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ba3691fc198475d340ed4a8af540abf2" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "85f708dfeb0f711a52f5f4ee535f13ae" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a3347b44d5c5d0c6d830acc0ede0bbf7" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8eaf58bd343ae56e60b48b722127e2e8" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "352ac896ce229602b49a67a18b669a2c" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "caf41e1b2816295f631a28abbc8ca384" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6923824ad76583a3fa58b11764def663" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "12282f470e2c1690fbcdc89c36d01d56" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe072a04347d3c3808ec8fcc452d1dd7" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "29a6470407174bd1bf882548efd0ba4f" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ef2e5f251e78e5bbc69e6df919e48f6a" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b0b2f048318ef4fcbc70fa21d4a1502" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "794b63b9ddf1f86743ea86c8610f2516" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aefc87d910fbe976128f5a56aae5c3a5" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "77704d86724c62795582683375ee88e9" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ab2bdad9e1499d09010e0518aa003834" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3709872fbf936f9f5a5e1e61dc3c4e70" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "43b1f20639ccada7743713379089c125" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac14d029525a73e3eefa32988132d50f" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "d566cf81f4e6c139b3a6b1de2f18dadf" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bb9863f3b56f15c35c756c6d2ddaa7b0" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5b9297657aabc2f185067c46dcd1a4b0" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0399a3cc0828f2343bbf5f659fed0042" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3770f0e593b785b19826ab235ef12625" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7e72ec0a9f93cafb4f5d687c33f2f0f3" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e8e71e953a05cd2c591840503bef22e0" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1d158f4798a0346420dbd7fb4e2e4a7f" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "428d22e873e8e80d4929e1e5f0b474db" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "de6899c5833df72581194481cf20953a" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e4d012241750faca137d26708729ba43" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f3da47602f4644c3de5a85b445d09227" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1b73388e7337101a8dc0cc982e6c77b4" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "51762c49ed0d27797a7c9531a80aece0" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "069a575e483bd8a9f66fdc7888e1cf80" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "073a2ba979ab190fd27978af4b59c437" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c3807876bc9592b240aa1dffbb126cf3" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ab80f2f33d6b5d7ecda8fa36279f57ca" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "e1713a73260d1f770eac607262d9e15f" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d1600190391c258f180ac6aab8143d5e" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c5794ff3b8f6109cb6487ea451270e40" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "18dd100bde8e955624dd96f12522cae8" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "915cf730e51e5645ac64a9bfc589b7c0" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "68eb9187a48fe48406b18db2a9098343" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4b01d999c08ca07a5c12d35e18533026" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bf73b488366d1d6593b6d4cca7464db4" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ee1170dbd41363fc5ef5034735c40b16" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "972896a1b1cc640b31b91297bc0d5848" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cfd86229635c6ec7980962ed7605851f" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "64609c6147f4b44c7430f609bc7d7e0b" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "6a19bc5a92058d493e83699a2b7831be" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b937d334121dcc021d917c45271b35d7" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "801fe5e5e1f5a88f969f716d21d2553f" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4cbc13282268820cf83d36bc3561b0fa" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25d1ace4eb99f712fad808c1bf392410" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3b6f969268188a0fd33ac0841f779e04" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e3c194c9b91901a4406eaeaf79484814" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50928fbd6ad5aabeb87976918194ed59" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4936b9d42fe47c90ac2dd75ed2a46b66" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d325d7ce7f880e9041f7e1b6b1d23a85" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b4015611cbe2f2ad32b450b94caad6a4" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2585424cc3adcee0d929c6c0c1201cdf" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fa6b8541fced3afe44a5bbbf0ac695f3" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5cd085a323d8e402e57e9fadc6c0a1c" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d168de54840b7305d4437956fe99a56f" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "6467435e2cf67b87f6833d5b6d16393c" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fabff9e4e66cf558d50fb8ea65235a20" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8e69707be5e05e6e188370be3268bab3" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "7f19b8fc6ea6acf6f753b2b31465b717" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5ef0dfe9009c9312a919ac6ae720d42e" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "28ba2a1b4d7aadbf0a69a0f4dee0b053" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6bcac9c529ac7b671a430049bfa485eb" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d6156c212d652038819c44a010668513" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "77b61721b64b08396657a57e00569949" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "eb5501a44f659c9ee29db10129e2dcaa" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d483decd4cba60b481c86731b5ff890d" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dcc5407aced3c68086d0f2aaed8070e6" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "46c844b25954fd65f80433b8e31790d8" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6655d312d0b33868312b9e87be81bf93" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03f138c249cdbcd0b7a39348b78c1d47" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ac2fab83ebf2d021f3238b54a04e527f" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "eb4362e91e5956577f5d29cdae257547" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "701a635378f53caaa8c850294eea6fb8" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "b749cb4e4b07e0661e9e215c7ad06ce5" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e66e8788116cbc6806a234e2bbced2db" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "58c3f8542d494d00e0fcda3a9d67c56e" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "8eee56d77fa3a06c708900f6ffbe4a2d" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25fe50523fa13231e4b81a0a38ebade3" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "26e0815584496e9e589dc4c570c1d0a4" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "003aa87762899a50b28970c32f9a85f3" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45b7fbfcae0df5f235661533527673d4" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fb00df0094eae3d479b60d99ca7eb859" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "04afa83301d05a2648ff004cb3ef4387" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5670e618d23db1bf10a8e0e41d9b961d" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4f3dda3ac69a3c8cdc22dc086cf2e3a8" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5da369fa68e755fcf3d0473e30f1b5e0" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f09f7a109e17e6ac570d1bc4ae538851" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7ed57bc9b4c1c584de7dd017dc4df64f" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "57793d52d5f491e26af14ab320248a32" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "96bdaeec987c07205111b130b28ad680" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "af02d581936233b02cc9fea7d4815a9e" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cedad09ef938237c653c442be6ca7a30" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "93d9296934a39aabe8282db3d8712234" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2a1aa9cf48b668ce4672bd843e10d96f" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "17c492a7b9753c4e634b4c3decc0ac2c" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7259cc9dd94cb0d9d053a1f13812fe57" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "330b093d2a83933df3bbb1e9a7764969" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8da4d15193d6fff0610cc8221e0e371a" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce9c13ab101e4a3cbdfa816ca929f751" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c6b4cee72e69eafadc2688cc400ca836" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "28857c29ed00513fff3d00137b3eb694" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b333a88e5ce479329405746629e07fb" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "494ab320443e3a8152bb73342cd82986" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c6b2c550137d78c3692dc07bdaba042c" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20e8ae8ccb9637e9724abbf272d623ed" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2fef0d82bf87abad4ace4fbd9ee6c1f4" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "39c0e4b24c55abeae9c1e72b9cb99d22" } ] }