diff --git a/mlc-chat-config.json b/mlc-chat-config.json index e2c4b76bce096bb03fff260cdbb163ad82b36f41..0f20c3b418c4ea647520e42f4069e91c5442ed02 100644 --- a/mlc-chat-config.json +++ b/mlc-chat-config.json @@ -1,7 +1,7 @@ { "version": "0.1.0", "model_type": "qwen2", - "quantization": "q4f32_1", + "quantization": "q0f32", "model_config": { "hidden_act": "silu", "hidden_size": 1536, diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json index ceed32587686e9855b25a69941ba443ba1e77748..99f672aa1db6e053503f39e432964a76954dfcf0 100644 --- a/ndarray-cache-b16.json +++ b/ndarray-cache-b16.json @@ -1,136 +1,92 @@ { "metadata": { - "ParamSize": 313, - "ParamBytes": 1111169024.0, - "BitsPerParam": 5.002201462167321 + "ParamSize": 199, + "ParamBytes": 7108352000.0, + "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", - "nbytes": 116686848, + "nbytes": 466747392, "records": [ { - "name": "lm_head.q_weight", + "name": "lm_head.weight", "shape": [ 151936, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 116686848, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 466747392, "byteOffset": 0 } ], - "md5sum": "3eba7944239eea590b63e87f00016a6b" + "md5sum": "e376b8cb28d4376590f01ad535d6071d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", - "nbytes": 116686848, + "nbytes": 466747392, "records": [ { - "name": "model.embed_tokens.q_weight", + "name": "model.embed_tokens.weight", "shape": [ 151936, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 116686848, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 466747392, "byteOffset": 0 } ], - "md5sum": "be8d442fc5234135620521550a8148fa" + "md5sum": "6a2ea36ede6c428672115e3a3d8ff0d2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", - "nbytes": 29174784, + "nbytes": 55050240, "records": [ { - "name": "lm_head.q_scale", - "shape": [ - 151936, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 14585856, - "byteOffset": 0 - }, - { - "name": "model.embed_tokens.q_scale", - "shape": [ - 151936, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 14585856, - "byteOffset": 14585856 - }, - { - "name": "model.layers.0.input_layernorm.weight", + "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 29171712 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "51e65235b4e65bd14f51a603e8ef7b49" + "md5sum": "9463e4903f97fd312a8f057495aa1239" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", - "nbytes": 33212416, + "nbytes": 27535360, "records": [ { - "name": "model.layers.0.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 0 - }, - { - "name": "model.layers.0.mlp.down_proj.q_scale", + "name": "model.layers.0.input_layernorm.weight", "shape": [ - 1536, - 280 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 6881280 - }, - { - "name": "model.layers.0.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 7741440 + "nbytes": 3072, + "byteOffset": 0 }, { - "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "name": "model.layers.0.mlp.down_proj.weight", "shape": [ - 17920, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 21504000 + "nbytes": 27525120, + "byteOffset": 3072 }, { "name": "model.layers.0.post_attention_layernorm.weight", @@ -140,7 +96,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 23224320 + "byteOffset": 27528192 }, { "name": "model.layers.0.self_attn.c_attn.bias", @@ -150,113 +106,161 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 23227392 - }, + "byteOffset": 27531264 + } + ], + "md5sum": "1431994b9042b3fd664eae27a814f0dd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.0.self_attn.c_attn.q_weight", + "name": "model.layers.1.mlp.down_proj.weight", "shape": [ - 2048, - 192 + 1536, + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23231488 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "22267d86aa57d6448950d6b1fd5d3a1b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.0.self_attn.c_attn.q_scale", + "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ - 2048, - 48 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 24804352 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "e545b60a4a590e0d1c5e3b9140db35f3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.0.self_attn.o_proj.q_weight", + "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1536, - 192 + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 25000960 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "8e38f424f7c0d358cd84cfd6d524534b" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.0.self_attn.o_proj.q_scale", + "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ - 1536, - 48 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 26180608 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "631ad4762294e6034a11b0b66bae13b5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.1.input_layernorm.weight", + "name": "model.layers.11.mlp.down_proj.weight", "shape": [ - 1536 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 26328064 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "4c542b91b3dc431e62e87024c4b79779" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.1.mlp.down_proj.q_weight", + "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ - 1536, - 1120 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 26331136 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "59cea6a143c3e50522e6a7ab56fd1302" + "md5sum": "da04ac4500d98575c04bc5229b7fcafb" }, { - "dataPath": "params_shard_4.bin", + "dataPath": "params_shard_10.bin", "format": "raw-shard", - "nbytes": 27191296, + "nbytes": 33060864, "records": [ { - "name": "model.layers.1.mlp.down_proj.q_scale", + "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 860160 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "name": "model.layers.1.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 14622720 + "nbytes": 3072, + "byteOffset": 11010048 }, { "name": "model.layers.1.post_attention_layernorm.weight", @@ -266,7 +270,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 16343040 + "byteOffset": 11013120 }, { "name": "model.layers.1.self_attn.c_attn.bias", @@ -276,51 +280,29 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 16346112 - }, - { - "name": "model.layers.1.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 16350208 + "byteOffset": 11016192 }, { - "name": "model.layers.1.self_attn.c_attn.q_scale", + "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17923072 - }, - { - "name": "model.layers.1.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 18119680 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.1.self_attn.o_proj.q_scale", + "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 19299328 + "nbytes": 4718592, + "byteOffset": 17311744 }, { "name": "model.layers.10.input_layernorm.weight", @@ -330,59 +312,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 19446784 - }, - { - "name": "model.layers.10.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 19449856 - }, - { - "name": "model.layers.10.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 26331136 - } - ], - "md5sum": "13283f546f5e1640e0d179f91c737060" - }, - { - "dataPath": "params_shard_5.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.10.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.10.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "byteOffset": 22030336 }, { "name": "model.layers.10.post_attention_layernorm.weight", @@ -392,7 +322,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 22033408 }, { "name": "model.layers.10.self_attn.c_attn.bias", @@ -402,2759 +332,1985 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.10.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "byteOffset": 22036480 }, { - "name": "model.layers.10.self_attn.c_attn.q_scale", + "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 22040576 }, { - "name": "model.layers.10.self_attn.o_proj.q_weight", + "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.10.self_attn.o_proj.q_scale", + "name": "model.layers.11.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.11.input_layernorm.weight", + "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 33053696 }, { - "name": "model.layers.11.mlp.down_proj.q_weight", + "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 33056768 + } + ], + "md5sum": "f1dfff1bdbfc338078cd03dcad4f25ad" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.11.mlp.down_proj.q_scale", + "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1536, - 280 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 27525120, + "byteOffset": 0 } ], - "md5sum": "f80a070c11451222f8169d453336a19c" + "md5sum": "b8e570d8afef1127f3e3bd074074d74f" }, { - "dataPath": "params_shard_6.bin", + "dataPath": "params_shard_12.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 55050240, "records": [ { - "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 17920, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, "byteOffset": 0 - }, + } + ], + "md5sum": "e7a70f4c903318c16326ef7bcd6181d6" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "name": "model.layers.13.mlp.down_proj.weight", "shape": [ - 17920, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "a4eab6c623efb1460907f9e4bfbc9ee5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.11.post_attention_layernorm.weight", + "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "127d9f5e010997c546676c086891a0ad" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.11.self_attn.c_attn.bias", + "name": "model.layers.14.mlp.down_proj.weight", "shape": [ - 2048 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "64c5a74a548ff008fdb5d95636e4fd63" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.11.self_attn.c_attn.q_weight", + "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + 17920, + 1536 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "99334446e606608b2aaa2c25815f3f04" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33060864, + "records": [ { - "name": "model.layers.11.self_attn.c_attn.q_scale", + "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.11.self_attn.o_proj.q_weight", + "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.11.self_attn.o_proj.q_scale", + "name": "model.layers.12.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 11010048 }, { - "name": "model.layers.12.input_layernorm.weight", + "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 11013120 }, { - "name": "model.layers.12.mlp.down_proj.q_weight", + "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.12.mlp.down_proj.q_scale", + "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "114d2917404e99a37c39aacfa57a9a50" - }, - { - "dataPath": "params_shard_7.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 11020288 + }, { - "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "name": "model.layers.13.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.12.post_attention_layernorm.weight", + "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 22033408 }, { - "name": "model.layers.12.self_attn.c_attn.bias", + "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 + "byteOffset": 22036480 }, { - "name": "model.layers.12.self_attn.c_attn.q_weight", + "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.12.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 22040576 }, { - "name": "model.layers.12.self_attn.o_proj.q_weight", + "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.12.self_attn.o_proj.q_scale", + "name": "model.layers.14.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.13.input_layernorm.weight", + "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 33053696 }, { - "name": "model.layers.13.mlp.down_proj.q_weight", + "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 33056768 + } + ], + "md5sum": "f94fe101baadd653c719760c39936366" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.13.mlp.down_proj.q_scale", + "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1536, - 280 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 27525120, + "byteOffset": 0 } ], - "md5sum": "dc3b5e347d2aba0026bc6cc0dcf4b005" + "md5sum": "6a3fc846bb0b5a13f659c6a1dc4c4694" }, { - "dataPath": "params_shard_8.bin", + "dataPath": "params_shard_19.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 55050240, "records": [ { - "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 17920, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, "byteOffset": 0 - }, + } + ], + "md5sum": "ad4b8262ad73b103f7d24ae8a372da04" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "name": "model.layers.16.mlp.down_proj.weight", "shape": [ - 17920, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "8a98a1a048e006dd076822db1ae73846" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.13.post_attention_layernorm.weight", + "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "72543b4e31346428357244f70b615d12" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.13.self_attn.c_attn.bias", + "name": "model.layers.17.mlp.down_proj.weight", "shape": [ - 2048 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "284d2e9aa13b996ea3297594b10df1f1" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.13.self_attn.c_attn.q_weight", + "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ - 2048, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "a150d9ebc7ec5c2915ef543348a130cf" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33060864, + "records": [ { - "name": "model.layers.13.self_attn.c_attn.q_scale", + "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.13.self_attn.o_proj.q_weight", + "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.13.self_attn.o_proj.q_scale", + "name": "model.layers.15.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 11010048 }, { - "name": "model.layers.14.input_layernorm.weight", + "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 11013120 }, { - "name": "model.layers.14.mlp.down_proj.q_weight", + "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.14.mlp.down_proj.q_scale", + "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "60e7284515bb4c2999432289d45839c4" - }, - { - "dataPath": "params_shard_9.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 11020288 + }, { - "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "name": "model.layers.16.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.14.post_attention_layernorm.weight", + "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 22033408 }, { - "name": "model.layers.14.self_attn.c_attn.bias", + "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.14.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "byteOffset": 22036480 }, { - "name": "model.layers.14.self_attn.c_attn.q_scale", + "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.14.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 22040576 }, { - "name": "model.layers.14.self_attn.o_proj.q_scale", + "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.15.input_layernorm.weight", + "name": "model.layers.17.input_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 33050624 }, { - "name": "model.layers.15.mlp.down_proj.q_weight", + "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ - 1536, - 1120 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3072, + "byteOffset": 33053696 }, { - "name": "model.layers.15.mlp.down_proj.q_scale", + "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ - 1536, - 280 + 2048 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 4096, + "byteOffset": 33056768 } ], - "md5sum": "f48e561b8d2b76519c23c5db3e8f6f16" + "md5sum": "eccb4e34a4523e8f5ddad7512bce96cd" }, { - "dataPath": "params_shard_10.bin", + "dataPath": "params_shard_25.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 27525120, "records": [ { - "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "name": "model.layers.18.mlp.down_proj.weight", "shape": [ - 17920, - 192 + 1536, + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, "byteOffset": 0 - }, + } + ], + "md5sum": "98c6b2f2b0a3fbdaa97b34ff1c44f19f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "a27c1d063ffa5747651e4090e48e7b4c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.15.post_attention_layernorm.weight", + "name": "model.layers.19.mlp.down_proj.weight", "shape": [ - 1536 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "bce8361e10f3eba7353b8f498d779676" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.15.self_attn.c_attn.bias", + "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.15.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.15.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.16.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.16.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.16.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "252c5013974cfddd0dbba30e029f877e" - }, - { - "dataPath": "params_shard_11.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.16.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.16.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.16.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.16.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.16.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.16.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.17.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.17.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.17.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "390cb82a590d93b12129b8d3c50f73f1" - }, - { - "dataPath": "params_shard_12.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.17.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.17.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.17.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.17.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.17.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.17.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.17.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.17.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.18.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.18.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.18.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "a3495d635ea3c53c9b0bbbcef749ceb8" - }, - { - "dataPath": "params_shard_13.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.18.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.18.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.18.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.18.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.18.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.18.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.19.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.19.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.19.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "35700f4804c01da98c6241cee2ff5732" - }, - { - "dataPath": "params_shard_14.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.19.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.19.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.19.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.19.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.19.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.19.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.19.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.19.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.2.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.2.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.2.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "fc44a49e8c67d1e47c7d9f9b47c32cb5" + "md5sum": "8528ddaa61b8703c89aa81d92afff1cc" }, { - "dataPath": "params_shard_15.bin", + "dataPath": "params_shard_29.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 27525120, "records": [ { - "name": "model.layers.2.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.2.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.2.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.2.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.2.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.2.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.2.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.2.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.20.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.20.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.20.mlp.down_proj.q_scale", + "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1536, - 280 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 27525120, + "byteOffset": 0 } ], - "md5sum": "66952571c7809812a74701771112d5ac" + "md5sum": "d33d62ce5f4294e21b34df2db479f5d3" }, { - "dataPath": "params_shard_16.bin", + "dataPath": "params_shard_30.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 55050240, "records": [ { - "name": "model.layers.20.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.20.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.20.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.20.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.20.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.20.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.20.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.21.input_layernorm.weight", - "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.21.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.21.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "451ddd6918f481774a5ddcae93fcc19a" + "md5sum": "2dd687b44fa54cc3489a523d9797e2f0" }, { - "dataPath": "params_shard_17.bin", + "dataPath": "params_shard_31.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.21.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.21.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.21.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.21.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.21.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.21.self_attn.c_attn.q_scale", + "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 2048, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.21.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.21.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.22.input_layernorm.weight", - "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.22.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.22.mlp.down_proj.q_scale", + "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 1536, - 280 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "6d6e530a4f2c516481ef459c432eab73" - }, - { - "dataPath": "params_shard_18.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.22.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "name": "model.layers.18.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 11010048 }, { - "name": "model.layers.22.post_attention_layernorm.weight", + "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11013120 }, { - "name": "model.layers.22.self_attn.c_attn.bias", + "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 + "byteOffset": 11016192 }, { - "name": "model.layers.22.self_attn.c_attn.q_weight", + "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.22.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.22.self_attn.o_proj.q_weight", + "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.22.self_attn.o_proj.q_scale", + "name": "model.layers.19.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.23.input_layernorm.weight", + "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.23.mlp.down_proj.q_weight", + "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.23.mlp.down_proj.q_scale", + "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "0e25e76a52534e27e009f1966b60e908" - }, - { - "dataPath": "params_shard_19.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "name": "model.layers.2.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.23.post_attention_layernorm.weight", + "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.23.self_attn.c_attn.bias", + "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.23.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "7cc1e4b5ba621f3e55b0dcc20acdbc09" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.23.self_attn.c_attn.q_scale", + "name": "model.layers.20.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "7050d538a01ea8409da478ecfe4b7308" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.23.self_attn.o_proj.q_weight", + "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "a1a18612a0019a522efe32bbf85b09d3" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.23.self_attn.o_proj.q_scale", + "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "449087c60ab28cead04f4d648e9c115b" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.24.input_layernorm.weight", + "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "349c72f8fe7053102705ccc87332b90d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.24.mlp.down_proj.q_weight", + "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "b5995018e8c8653dff6649c8deb76b4b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.24.mlp.down_proj.q_scale", + "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "0b2f3521eb647a178bf4fe69ebbefca4" + "md5sum": "c9f851e6d38fbb3193e8401ceb8a1d43" }, { - "dataPath": "params_shard_20.bin", + "dataPath": "params_shard_38.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.24.post_attention_layernorm.weight", + "name": "model.layers.20.input_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.24.self_attn.c_attn.bias", + "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.24.self_attn.c_attn.q_weight", + "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.24.self_attn.c_attn.q_scale", + "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.24.self_attn.o_proj.q_weight", + "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.24.self_attn.o_proj.q_scale", + "name": "model.layers.21.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.25.input_layernorm.weight", + "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.25.mlp.down_proj.q_weight", + "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.25.mlp.down_proj.q_scale", + "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "176c5666e37994d7f23960de639e90cc" - }, - { - "dataPath": "params_shard_21.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "name": "model.layers.22.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.25.post_attention_layernorm.weight", + "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.25.self_attn.c_attn.bias", + "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.25.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "49f162c562d76a1aa323b681ae8bf59b" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.25.self_attn.c_attn.q_scale", + "name": "model.layers.23.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "3de22bdb23771bfc14d719fa125c6be6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.25.self_attn.o_proj.q_weight", + "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "6edf9c36cfa4e3b3b5afaa14b62fa842" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.25.self_attn.o_proj.q_scale", + "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "52a49b8afd861c3081f11803f008dba7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.26.input_layernorm.weight", + "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "07cad64074e88e27ed0ab89a91052581" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.26.mlp.down_proj.q_weight", + "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "f373b934ab548fb2b04a729c5f22dd0b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.26.mlp.down_proj.q_scale", + "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "ceb962974aa3ffd06a616a6c52270097" + "md5sum": "496023d69be445e33b091efe17662be2" }, { - "dataPath": "params_shard_22.bin", + "dataPath": "params_shard_45.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.26.post_attention_layernorm.weight", + "name": "model.layers.23.input_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.26.self_attn.c_attn.bias", + "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.26.self_attn.c_attn.q_weight", + "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.26.self_attn.c_attn.q_scale", + "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.26.self_attn.o_proj.q_weight", + "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.26.self_attn.o_proj.q_scale", + "name": "model.layers.24.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.27.input_layernorm.weight", + "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.27.mlp.down_proj.q_weight", + "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.27.mlp.down_proj.q_scale", + "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "ab9d7f443673b7890cfbf49008594ab9" - }, - { - "dataPath": "params_shard_23.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "name": "model.layers.25.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.27.post_attention_layernorm.weight", + "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.27.self_attn.c_attn.bias", + "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.27.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "b162a58ff40747786153f9fbf3562eb4" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.27.self_attn.c_attn.q_scale", + "name": "model.layers.26.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "caa76d72478fe6666accec3e8334136d" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.27.self_attn.o_proj.q_weight", + "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "fbfea3bf12262f3f8f987854e1902f72" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.27.self_attn.o_proj.q_scale", + "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "5ecb3464368f9a2575552ed93f8f800e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.3.input_layernorm.weight", + "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "702514b36ca983e41199b4370005eb76" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.3.mlp.down_proj.q_weight", + "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "093b34e27d9159a6433c31b0ed2b2c39" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.3.mlp.down_proj.q_scale", + "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "61f19df02bf82992f18b0a6ec1370776" + "md5sum": "c3dea40fde4fba6e8b5ec5702df0519d" }, { - "dataPath": "params_shard_24.bin", + "dataPath": "params_shard_52.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.3.post_attention_layernorm.weight", + "name": "model.layers.26.input_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.3.self_attn.c_attn.bias", + "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.3.self_attn.c_attn.q_weight", + "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.3.self_attn.c_attn.q_scale", + "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.3.self_attn.o_proj.q_weight", + "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.3.self_attn.o_proj.q_scale", + "name": "model.layers.27.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.4.input_layernorm.weight", + "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.4.mlp.down_proj.q_weight", + "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.4.mlp.down_proj.q_scale", + "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "9bd46c03febaca7936dd5c670e87c9db" - }, - { - "dataPath": "params_shard_25.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "name": "model.layers.3.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.4.post_attention_layernorm.weight", + "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.4.self_attn.c_attn.bias", + "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.4.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "3e354febc906d34d54536ea7885c5889" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.4.self_attn.c_attn.q_scale", + "name": "model.layers.4.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "b6824e2ce69ee5ac3b231da74405c28d" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.4.self_attn.o_proj.q_weight", + "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "577fbe9d5221ad319404760818265f2f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.4.self_attn.o_proj.q_scale", + "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "dc97e694cd68d26939b3fa7a68b6437e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.5.input_layernorm.weight", + "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "e029aaef82109dae100ed0d76abf03f4" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.5.mlp.down_proj.q_weight", + "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "0cebedd6e22c5f3c4ad9de9a4a301b0c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.5.mlp.down_proj.q_scale", + "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "a967b6249d494149ba4d36888fb580be" + "md5sum": "d021ceb96965eac2d6acd5040e31ff2b" }, { - "dataPath": "params_shard_26.bin", + "dataPath": "params_shard_59.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.5.post_attention_layernorm.weight", + "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.5.self_attn.c_attn.bias", + "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.5.self_attn.c_attn.q_weight", + "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.5.self_attn.c_attn.q_scale", + "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.5.self_attn.o_proj.q_weight", + "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.5.self_attn.o_proj.q_scale", + "name": "model.layers.5.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.6.input_layernorm.weight", + "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.6.mlp.down_proj.q_weight", + "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.6.mlp.down_proj.q_scale", + "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "c456c605796669a1a035b5509af8b906" - }, - { - "dataPath": "params_shard_27.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "name": "model.layers.6.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { "name": "model.layers.6.post_attention_layernorm.weight", @@ -3164,7 +2320,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { "name": "model.layers.6.self_attn.c_attn.bias", @@ -3174,239 +2330,213 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.6.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "ae36f42102ecf32968c4b9a5494a5632" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.6.self_attn.c_attn.q_scale", + "name": "model.layers.7.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "4f8b37541d2669f35474558755ad3595" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.6.self_attn.o_proj.q_weight", + "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "301e207ed6aae4c43a940a2cb0041422" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.6.self_attn.o_proj.q_scale", + "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "94c308f7fab6e6806497419fb2bddd14" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.7.input_layernorm.weight", + "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.7.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "125d1635c4fb0ef278b46c2b6f644c0a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.7.mlp.down_proj.q_scale", + "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1536, - 280 + 8960 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 27525120, + "byteOffset": 0 } ], - "md5sum": "4932c078feff0216f175050579e08a9a" + "md5sum": "e77e14c76d670413b391d98877a76d86" }, { - "dataPath": "params_shard_28.bin", + "dataPath": "params_shard_65.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 55050240, "records": [ { - "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 17920, - 192 + 1536 ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, + "dtype": "bfloat16", + "format": "raw", + "nbytes": 55050240, "byteOffset": 0 - }, + } + ], + "md5sum": "7ade211e79fd496efc5cbfcad1c50060" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33060864, + "records": [ { - "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ - 17920, - 48 + 2048, + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.7.post_attention_layernorm.weight", + "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 15482880 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.7.self_attn.c_attn.bias", + "name": "model.layers.7.input_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.7.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 3072, + "byteOffset": 11010048 }, { - "name": "model.layers.7.self_attn.c_attn.q_scale", + "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ - 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.7.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.7.self_attn.o_proj.q_scale", + "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ - 1536, - 48 + 2048 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.8.input_layernorm.weight", + "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ + 2048, 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.8.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.8.mlp.down_proj.q_scale", + "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1536, - 280 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "8686b64ff10449b640afc32ed7439a65" - }, - { - "dataPath": "params_shard_29.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.8.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "name": "model.layers.8.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 22030336 }, { "name": "model.layers.8.post_attention_layernorm.weight", @@ -3416,7 +2546,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 22033408 }, { "name": "model.layers.8.self_attn.c_attn.bias", @@ -3426,51 +2556,29 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.8.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "byteOffset": 22036480 }, { - "name": "model.layers.8.self_attn.c_attn.q_scale", + "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.8.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 22040576 }, { - "name": "model.layers.8.self_attn.o_proj.q_scale", + "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 28332032 }, { "name": "model.layers.9.input_layernorm.weight", @@ -3480,59 +2588,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.9.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.9.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "037698a9234e2d563a950ced66a97bf3" - }, - { - "dataPath": "params_shard_30.bin", - "format": "raw-shard", - "nbytes": 18589696, - "records": [ - { - "name": "model.layers.9.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.9.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "bfloat16", - "format": "raw", - "nbytes": 1720320, - "byteOffset": 13762560 + "byteOffset": 33050624 }, { "name": "model.layers.9.post_attention_layernorm.weight", @@ -3542,7 +2598,7 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { "name": "model.layers.9.self_attn.c_attn.bias", @@ -3552,51 +2608,37 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.9.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "5471237681d93c50e780f7a581d37dce" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 11013120, + "records": [ { - "name": "model.layers.9.self_attn.c_attn.q_scale", + "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.9.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.9.self_attn.o_proj.q_scale", + "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "bfloat16", "format": "raw", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 6291456 }, { "name": "model.norm.weight", @@ -3606,10 +2648,10 @@ "dtype": "bfloat16", "format": "raw", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 11010048 } ], - "md5sum": "e5c2ee400f5502dbba790e85967f0990" + "md5sum": "f709c941037554e6d4ae56ecba08fc03" } ] } \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json index 9a2b5f2b2a8931571359a2048bfed7ce76350ed0..2a39ca334ac0ecad552baf83e4bdff0991cd6688 100644 --- a/ndarray-cache.json +++ b/ndarray-cache.json @@ -1,136 +1,92 @@ { "metadata": { - "ParamSize": 313, - "ParamBytes": 1111169024.0, - "BitsPerParam": 5.002201462167321 + "ParamSize": 199, + "ParamBytes": 7108352000.0, + "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", - "nbytes": 116686848, + "nbytes": 466747392, "records": [ { - "name": "lm_head.q_weight", + "name": "lm_head.weight", "shape": [ 151936, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 116686848, + "nbytes": 466747392, "byteOffset": 0 } ], - "md5sum": "3eba7944239eea590b63e87f00016a6b" + "md5sum": "e376b8cb28d4376590f01ad535d6071d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", - "nbytes": 116686848, + "nbytes": 466747392, "records": [ { - "name": "model.embed_tokens.q_weight", + "name": "model.embed_tokens.weight", "shape": [ 151936, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 116686848, + "nbytes": 466747392, "byteOffset": 0 } ], - "md5sum": "be8d442fc5234135620521550a8148fa" + "md5sum": "6a2ea36ede6c428672115e3a3d8ff0d2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", - "nbytes": 29174784, + "nbytes": 55050240, "records": [ { - "name": "lm_head.q_scale", - "shape": [ - 151936, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 14585856, - "byteOffset": 0 - }, - { - "name": "model.embed_tokens.q_scale", - "shape": [ - 151936, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 14585856, - "byteOffset": 14585856 - }, - { - "name": "model.layers.0.input_layernorm.weight", + "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 29171712 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "51e65235b4e65bd14f51a603e8ef7b49" + "md5sum": "9463e4903f97fd312a8f057495aa1239" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", - "nbytes": 33212416, + "nbytes": 27535360, "records": [ { - "name": "model.layers.0.mlp.down_proj.q_weight", + "name": "model.layers.0.input_layernorm.weight", "shape": [ - 1536, - 1120 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, + "nbytes": 3072, "byteOffset": 0 }, { - "name": "model.layers.0.mlp.down_proj.q_scale", + "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 6881280 - }, - { - "name": "model.layers.0.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 7741440 - }, - { - "name": "model.layers.0.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 21504000 + "nbytes": 27525120, + "byteOffset": 3072 }, { "name": "model.layers.0.post_attention_layernorm.weight", @@ -140,7 +96,7 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 23224320 + "byteOffset": 27528192 }, { "name": "model.layers.0.self_attn.c_attn.bias", @@ -150,113 +106,161 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 23227392 - }, + "byteOffset": 27531264 + } + ], + "md5sum": "1431994b9042b3fd664eae27a814f0dd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.0.self_attn.c_attn.q_weight", + "name": "model.layers.1.mlp.down_proj.weight", "shape": [ - 2048, - 192 + 1536, + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23231488 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "22267d86aa57d6448950d6b1fd5d3a1b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.0.self_attn.c_attn.q_scale", + "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ - 2048, - 48 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 24804352 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "e545b60a4a590e0d1c5e3b9140db35f3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.0.self_attn.o_proj.q_weight", + "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1536, - 192 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 25000960 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "8e38f424f7c0d358cd84cfd6d524534b" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.0.self_attn.o_proj.q_scale", + "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ - 1536, - 48 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 26180608 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "631ad4762294e6034a11b0b66bae13b5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.1.input_layernorm.weight", + "name": "model.layers.11.mlp.down_proj.weight", "shape": [ - 1536 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 26328064 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "4c542b91b3dc431e62e87024c4b79779" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.1.mlp.down_proj.q_weight", + "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ - 1536, - 1120 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 26331136 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "59cea6a143c3e50522e6a7ab56fd1302" + "md5sum": "da04ac4500d98575c04bc5229b7fcafb" }, { - "dataPath": "params_shard_4.bin", + "dataPath": "params_shard_10.bin", "format": "raw-shard", - "nbytes": 27191296, + "nbytes": 33060864, "records": [ { - "name": "model.layers.1.mlp.down_proj.q_scale", + "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 860160 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "name": "model.layers.1.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 14622720 + "nbytes": 3072, + "byteOffset": 11010048 }, { "name": "model.layers.1.post_attention_layernorm.weight", @@ -266,7 +270,7 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 16343040 + "byteOffset": 11013120 }, { "name": "model.layers.1.self_attn.c_attn.bias", @@ -276,3263 +280,2315 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 16346112 - }, - { - "name": "model.layers.1.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 16350208 + "byteOffset": 11016192 }, { - "name": "model.layers.1.self_attn.c_attn.q_scale", + "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17923072 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.1.self_attn.o_proj.q_weight", + "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 18119680 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.1.self_attn.o_proj.q_scale", + "name": "model.layers.10.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 19299328 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.10.input_layernorm.weight", + "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 19446784 + "byteOffset": 22033408 }, { - "name": "model.layers.10.mlp.down_proj.q_weight", + "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 19449856 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.10.mlp.down_proj.q_scale", + "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 26331136 - } - ], - "md5sum": "13283f546f5e1640e0d179f91c737060" - }, - { - "dataPath": "params_shard_5.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "name": "model.layers.11.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.10.post_attention_layernorm.weight", + "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.10.self_attn.c_attn.bias", + "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.10.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "f1dfff1bdbfc338078cd03dcad4f25ad" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.10.self_attn.c_attn.q_scale", + "name": "model.layers.12.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "b8e570d8afef1127f3e3bd074074d74f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.10.self_attn.o_proj.q_weight", + "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "e7a70f4c903318c16326ef7bcd6181d6" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.10.self_attn.o_proj.q_scale", + "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "a4eab6c623efb1460907f9e4bfbc9ee5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.11.input_layernorm.weight", + "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "127d9f5e010997c546676c086891a0ad" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.11.mlp.down_proj.q_weight", + "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "64c5a74a548ff008fdb5d95636e4fd63" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.11.mlp.down_proj.q_scale", + "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "f80a070c11451222f8169d453336a19c" + "md5sum": "99334446e606608b2aaa2c25815f3f04" }, { - "dataPath": "params_shard_6.bin", + "dataPath": "params_shard_17.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.11.post_attention_layernorm.weight", + "name": "model.layers.12.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.11.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.11.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.11.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.11.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.11.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.12.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.12.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.12.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "114d2917404e99a37c39aacfa57a9a50" - }, - { - "dataPath": "params_shard_7.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.12.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.12.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.12.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.12.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.12.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.12.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.12.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.12.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.13.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.13.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.13.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "dc3b5e347d2aba0026bc6cc0dcf4b005" - }, - { - "dataPath": "params_shard_8.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.13.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.13.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.13.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.13.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.13.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.13.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.13.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.13.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.14.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.14.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.14.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "60e7284515bb4c2999432289d45839c4" - }, - { - "dataPath": "params_shard_9.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.14.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.14.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.14.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.14.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.14.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.14.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.14.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.14.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.15.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.15.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.15.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "f48e561b8d2b76519c23c5db3e8f6f16" - }, - { - "dataPath": "params_shard_10.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.15.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.15.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.15.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.15.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.15.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.15.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.16.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.16.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.16.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "252c5013974cfddd0dbba30e029f877e" - }, - { - "dataPath": "params_shard_11.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.16.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.16.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.16.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.16.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.16.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.16.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.17.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.17.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.17.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "390cb82a590d93b12129b8d3c50f73f1" - }, - { - "dataPath": "params_shard_12.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.17.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.17.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.17.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.17.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.17.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.17.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.17.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.17.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.18.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.18.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.18.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "a3495d635ea3c53c9b0bbbcef749ceb8" - }, - { - "dataPath": "params_shard_13.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.18.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.18.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.18.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.18.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.18.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.18.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_scale", - "shape": [ - 1536, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, - { - "name": "model.layers.19.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.19.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.19.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "35700f4804c01da98c6241cee2ff5732" - }, - { - "dataPath": "params_shard_14.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.19.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.19.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.19.post_attention_layernorm.weight", + "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11013120 }, { - "name": "model.layers.19.self_attn.c_attn.bias", + "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.19.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "byteOffset": 11016192 }, { - "name": "model.layers.19.self_attn.c_attn.q_scale", + "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.19.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.19.self_attn.o_proj.q_scale", + "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.2.input_layernorm.weight", + "name": "model.layers.13.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 - }, - { - "name": "model.layers.2.mlp.down_proj.q_weight", - "shape": [ - 1536, - 1120 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, - { - "name": "model.layers.2.mlp.down_proj.q_scale", - "shape": [ - 1536, - 280 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "fc44a49e8c67d1e47c7d9f9b47c32cb5" - }, - { - "dataPath": "params_shard_15.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ - { - "name": "model.layers.2.mlp.gate_up_proj.q_weight", - "shape": [ - 17920, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 - }, - { - "name": "model.layers.2.mlp.gate_up_proj.q_scale", - "shape": [ - 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "byteOffset": 22030336 }, { - "name": "model.layers.2.post_attention_layernorm.weight", + "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 22033408 }, { - "name": "model.layers.2.self_attn.c_attn.bias", + "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.2.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "byteOffset": 22036480 }, { - "name": "model.layers.2.self_attn.c_attn.q_scale", + "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.2.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 22040576 }, { - "name": "model.layers.2.self_attn.o_proj.q_scale", + "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.20.input_layernorm.weight", + "name": "model.layers.14.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 33050624 }, { - "name": "model.layers.20.mlp.down_proj.q_weight", + "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ - 1536, - 1120 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 3072, + "byteOffset": 33053696 }, { - "name": "model.layers.20.mlp.down_proj.q_scale", + "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ - 1536, - 280 + 2048 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 4096, + "byteOffset": 33056768 } ], - "md5sum": "66952571c7809812a74701771112d5ac" + "md5sum": "f94fe101baadd653c719760c39936366" }, { - "dataPath": "params_shard_16.bin", + "dataPath": "params_shard_18.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 27525120, "records": [ { - "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "name": "model.layers.15.mlp.down_proj.weight", "shape": [ - 17920, - 192 + 1536, + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 27525120, "byteOffset": 0 - }, + } + ], + "md5sum": "6a3fc846bb0b5a13f659c6a1dc4c4694" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 17920, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 - }, - { - "name": "model.layers.20.post_attention_layernorm.weight", - "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 15482880 - }, - { - "name": "model.layers.20.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.20.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, - { - "name": "model.layers.20.self_attn.c_attn.q_scale", - "shape": [ - 2048, - 48 - ], - "dtype": "float32", - "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.20.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "ad4b8262ad73b103f7d24ae8a372da04" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.20.self_attn.o_proj.q_scale", + "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "8a98a1a048e006dd076822db1ae73846" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.21.input_layernorm.weight", + "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "72543b4e31346428357244f70b615d12" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.21.mlp.down_proj.q_weight", + "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "284d2e9aa13b996ea3297594b10df1f1" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.21.mlp.down_proj.q_scale", + "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "451ddd6918f481774a5ddcae93fcc19a" + "md5sum": "a150d9ebc7ec5c2915ef543348a130cf" }, { - "dataPath": "params_shard_17.bin", + "dataPath": "params_shard_24.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.21.post_attention_layernorm.weight", + "name": "model.layers.15.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.21.self_attn.c_attn.bias", + "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.21.self_attn.c_attn.q_weight", + "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.21.self_attn.c_attn.q_scale", + "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.21.self_attn.o_proj.q_weight", + "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.21.self_attn.o_proj.q_scale", + "name": "model.layers.16.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.22.input_layernorm.weight", + "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.22.mlp.down_proj.q_weight", + "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.22.mlp.down_proj.q_scale", + "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "6d6e530a4f2c516481ef459c432eab73" - }, - { - "dataPath": "params_shard_18.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "name": "model.layers.17.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.22.post_attention_layernorm.weight", + "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.22.self_attn.c_attn.bias", + "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.22.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "eccb4e34a4523e8f5ddad7512bce96cd" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.22.self_attn.c_attn.q_scale", + "name": "model.layers.18.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "98c6b2f2b0a3fbdaa97b34ff1c44f19f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.22.self_attn.o_proj.q_weight", + "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "a27c1d063ffa5747651e4090e48e7b4c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.22.self_attn.o_proj.q_scale", + "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "bce8361e10f3eba7353b8f498d779676" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.23.input_layernorm.weight", + "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "8528ddaa61b8703c89aa81d92afff1cc" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.23.mlp.down_proj.q_weight", + "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "d33d62ce5f4294e21b34df2db479f5d3" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.23.mlp.down_proj.q_scale", + "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "0e25e76a52534e27e009f1966b60e908" + "md5sum": "2dd687b44fa54cc3489a523d9797e2f0" }, { - "dataPath": "params_shard_19.bin", + "dataPath": "params_shard_31.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.23.post_attention_layernorm.weight", + "name": "model.layers.18.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.23.self_attn.c_attn.bias", + "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.23.self_attn.c_attn.q_weight", + "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.23.self_attn.c_attn.q_scale", + "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.23.self_attn.o_proj.q_weight", + "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.23.self_attn.o_proj.q_scale", + "name": "model.layers.19.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.24.input_layernorm.weight", + "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.24.mlp.down_proj.q_weight", + "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.24.mlp.down_proj.q_scale", + "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "0b2f3521eb647a178bf4fe69ebbefca4" - }, - { - "dataPath": "params_shard_20.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "name": "model.layers.2.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.24.post_attention_layernorm.weight", + "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.24.self_attn.c_attn.bias", + "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.24.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "7cc1e4b5ba621f3e55b0dcc20acdbc09" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.24.self_attn.c_attn.q_scale", + "name": "model.layers.20.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "7050d538a01ea8409da478ecfe4b7308" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.24.self_attn.o_proj.q_weight", + "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "a1a18612a0019a522efe32bbf85b09d3" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.24.self_attn.o_proj.q_scale", + "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "449087c60ab28cead04f4d648e9c115b" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.25.input_layernorm.weight", + "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "349c72f8fe7053102705ccc87332b90d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.25.mlp.down_proj.q_weight", + "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "b5995018e8c8653dff6649c8deb76b4b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.25.mlp.down_proj.q_scale", + "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "176c5666e37994d7f23960de639e90cc" + "md5sum": "c9f851e6d38fbb3193e8401ceb8a1d43" }, { - "dataPath": "params_shard_21.bin", + "dataPath": "params_shard_38.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.25.post_attention_layernorm.weight", + "name": "model.layers.20.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.25.self_attn.c_attn.bias", + "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.25.self_attn.c_attn.q_weight", + "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.25.self_attn.c_attn.q_scale", + "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.25.self_attn.o_proj.q_weight", + "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.25.self_attn.o_proj.q_scale", + "name": "model.layers.21.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.26.input_layernorm.weight", + "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.26.mlp.down_proj.q_weight", + "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.26.mlp.down_proj.q_scale", + "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "ceb962974aa3ffd06a616a6c52270097" - }, - { - "dataPath": "params_shard_22.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "name": "model.layers.22.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.26.post_attention_layernorm.weight", + "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.26.self_attn.c_attn.bias", + "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.26.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "49f162c562d76a1aa323b681ae8bf59b" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.26.self_attn.c_attn.q_scale", + "name": "model.layers.23.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "3de22bdb23771bfc14d719fa125c6be6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.26.self_attn.o_proj.q_weight", + "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "6edf9c36cfa4e3b3b5afaa14b62fa842" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.26.self_attn.o_proj.q_scale", + "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "52a49b8afd861c3081f11803f008dba7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.27.input_layernorm.weight", + "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "07cad64074e88e27ed0ab89a91052581" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.27.mlp.down_proj.q_weight", + "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "f373b934ab548fb2b04a729c5f22dd0b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.27.mlp.down_proj.q_scale", + "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "ab9d7f443673b7890cfbf49008594ab9" + "md5sum": "496023d69be445e33b091efe17662be2" }, { - "dataPath": "params_shard_23.bin", + "dataPath": "params_shard_45.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.27.post_attention_layernorm.weight", + "name": "model.layers.23.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.27.self_attn.c_attn.bias", + "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.27.self_attn.c_attn.q_weight", + "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.27.self_attn.c_attn.q_scale", + "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.27.self_attn.o_proj.q_weight", + "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.27.self_attn.o_proj.q_scale", + "name": "model.layers.24.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.3.input_layernorm.weight", + "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.3.mlp.down_proj.q_weight", + "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.3.mlp.down_proj.q_scale", + "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "61f19df02bf82992f18b0a6ec1370776" - }, - { - "dataPath": "params_shard_24.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "name": "model.layers.25.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.3.post_attention_layernorm.weight", + "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.3.self_attn.c_attn.bias", + "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.3.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "b162a58ff40747786153f9fbf3562eb4" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.3.self_attn.c_attn.q_scale", + "name": "model.layers.26.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "caa76d72478fe6666accec3e8334136d" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.3.self_attn.o_proj.q_weight", + "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "fbfea3bf12262f3f8f987854e1902f72" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.3.self_attn.o_proj.q_scale", + "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "5ecb3464368f9a2575552ed93f8f800e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.4.input_layernorm.weight", + "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "702514b36ca983e41199b4370005eb76" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.4.mlp.down_proj.q_weight", + "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "093b34e27d9159a6433c31b0ed2b2c39" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.4.mlp.down_proj.q_scale", + "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "9bd46c03febaca7936dd5c670e87c9db" + "md5sum": "c3dea40fde4fba6e8b5ec5702df0519d" }, { - "dataPath": "params_shard_25.bin", + "dataPath": "params_shard_52.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.4.post_attention_layernorm.weight", + "name": "model.layers.26.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.4.self_attn.c_attn.bias", + "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.4.self_attn.c_attn.q_weight", + "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.4.self_attn.c_attn.q_scale", + "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.4.self_attn.o_proj.q_weight", + "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.4.self_attn.o_proj.q_scale", + "name": "model.layers.27.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.5.input_layernorm.weight", + "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.5.mlp.down_proj.q_weight", + "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.5.mlp.down_proj.q_scale", + "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "a967b6249d494149ba4d36888fb580be" - }, - { - "dataPath": "params_shard_26.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "name": "model.layers.3.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.5.post_attention_layernorm.weight", + "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.5.self_attn.c_attn.bias", + "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.5.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "3e354febc906d34d54536ea7885c5889" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.5.self_attn.c_attn.q_scale", + "name": "model.layers.4.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "b6824e2ce69ee5ac3b231da74405c28d" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.5.self_attn.o_proj.q_weight", + "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "577fbe9d5221ad319404760818265f2f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.5.self_attn.o_proj.q_scale", + "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "dc97e694cd68d26939b3fa7a68b6437e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.6.input_layernorm.weight", + "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "e029aaef82109dae100ed0d76abf03f4" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.6.mlp.down_proj.q_weight", + "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "0cebedd6e22c5f3c4ad9de9a4a301b0c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.6.mlp.down_proj.q_scale", + "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "c456c605796669a1a035b5509af8b906" + "md5sum": "d021ceb96965eac2d6acd5040e31ff2b" }, { - "dataPath": "params_shard_27.bin", + "dataPath": "params_shard_59.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.6.post_attention_layernorm.weight", + "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.6.self_attn.c_attn.bias", + "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.6.self_attn.c_attn.q_weight", + "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.6.self_attn.c_attn.q_scale", + "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.6.self_attn.o_proj.q_weight", + "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.6.self_attn.o_proj.q_scale", + "name": "model.layers.5.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.7.input_layernorm.weight", + "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.7.mlp.down_proj.q_weight", + "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.7.mlp.down_proj.q_scale", + "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "4932c078feff0216f175050579e08a9a" - }, - { - "dataPath": "params_shard_28.bin", - "format": "raw-shard", - "nbytes": 26331136, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "name": "model.layers.6.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { - "name": "model.layers.7.post_attention_layernorm.weight", + "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { - "name": "model.layers.7.self_attn.c_attn.bias", + "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.7.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "ae36f42102ecf32968c4b9a5494a5632" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.7.self_attn.c_attn.q_scale", + "name": "model.layers.7.mlp.down_proj.weight", "shape": [ - 2048, - 48 + 1536, + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "4f8b37541d2669f35474558755ad3595" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.7.self_attn.o_proj.q_weight", + "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ - 1536, - 192 + 17920, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "301e207ed6aae4c43a940a2cb0041422" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.7.self_attn.o_proj.q_scale", + "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1536, - 48 + 8960 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "94c308f7fab6e6806497419fb2bddd14" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.8.input_layernorm.weight", + "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ + 17920, 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 18586624 - }, + "nbytes": 55050240, + "byteOffset": 0 + } + ], + "md5sum": "125d1635c4fb0ef278b46c2b6f644c0a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ { - "name": "model.layers.8.mlp.down_proj.q_weight", + "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1536, - 1120 + 8960 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 - }, + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "e77e14c76d670413b391d98877a76d86" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.8.mlp.down_proj.q_scale", + "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ - 1536, - 280 + 17920, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "8686b64ff10449b640afc32ed7439a65" + "md5sum": "7ade211e79fd496efc5cbfcad1c50060" }, { - "dataPath": "params_shard_29.bin", + "dataPath": "params_shard_66.bin", "format": "raw-shard", - "nbytes": 26331136, + "nbytes": 33060864, "records": [ { - "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ - 17920, - 192 + 2048, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, + "nbytes": 6291456, "byteOffset": 0 }, { - "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ - 17920, - 48 + 1536, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 4718592, + "byteOffset": 6291456 }, { - "name": "model.layers.8.post_attention_layernorm.weight", + "name": "model.layers.7.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 11010048 }, { - "name": "model.layers.8.self_attn.c_attn.bias", + "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 15485952 + "nbytes": 3072, + "byteOffset": 11013120 }, { - "name": "model.layers.8.self_attn.c_attn.q_weight", + "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ - 2048, - 192 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 + "nbytes": 4096, + "byteOffset": 11016192 }, { - "name": "model.layers.8.self_attn.c_attn.q_scale", + "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 + "nbytes": 6291456, + "byteOffset": 11020288 }, { - "name": "model.layers.8.self_attn.o_proj.q_weight", + "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1536, - 192 + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 4718592, + "byteOffset": 17311744 }, { - "name": "model.layers.8.self_attn.o_proj.q_scale", + "name": "model.layers.8.input_layernorm.weight", "shape": [ - 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 3072, + "byteOffset": 22030336 }, { - "name": "model.layers.9.input_layernorm.weight", + "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 22033408 }, { - "name": "model.layers.9.mlp.down_proj.q_weight", + "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ - 1536, - 1120 + 2048 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 6881280, - "byteOffset": 18589696 + "nbytes": 4096, + "byteOffset": 22036480 }, { - "name": "model.layers.9.mlp.down_proj.q_scale", + "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ - 1536, - 280 + 2048, + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 860160, - "byteOffset": 25470976 - } - ], - "md5sum": "037698a9234e2d563a950ced66a97bf3" - }, - { - "dataPath": "params_shard_30.bin", - "format": "raw-shard", - "nbytes": 18589696, - "records": [ + "nbytes": 6291456, + "byteOffset": 22040576 + }, { - "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ - 17920, - 192 + 1536, + 1536 ], - "dtype": "uint32", + "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 13762560, - "byteOffset": 0 + "nbytes": 4718592, + "byteOffset": 28332032 }, { - "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "name": "model.layers.9.input_layernorm.weight", "shape": [ - 17920, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 1720320, - "byteOffset": 13762560 + "nbytes": 3072, + "byteOffset": 33050624 }, { "name": "model.layers.9.post_attention_layernorm.weight", @@ -3542,7 +2598,7 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 15482880 + "byteOffset": 33053696 }, { "name": "model.layers.9.self_attn.c_attn.bias", @@ -3552,51 +2608,37 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, - "byteOffset": 15485952 - }, - { - "name": "model.layers.9.self_attn.c_attn.q_weight", - "shape": [ - 2048, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 15490048 - }, + "byteOffset": 33056768 + } + ], + "md5sum": "5471237681d93c50e780f7a581d37dce" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 11013120, + "records": [ { - "name": "model.layers.9.self_attn.c_attn.q_scale", + "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2048, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 196608, - "byteOffset": 17062912 - }, - { - "name": "model.layers.9.self_attn.o_proj.q_weight", - "shape": [ - 1536, - 192 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 1179648, - "byteOffset": 17259520 + "nbytes": 6291456, + "byteOffset": 0 }, { - "name": "model.layers.9.self_attn.o_proj.q_scale", + "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1536, - 48 + 1536 ], "dtype": "float32", "format": "f32-to-bf16", - "nbytes": 147456, - "byteOffset": 18439168 + "nbytes": 4718592, + "byteOffset": 6291456 }, { "name": "model.norm.weight", @@ -3606,10 +2648,10 @@ "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 18586624 + "byteOffset": 11010048 } ], - "md5sum": "e5c2ee400f5502dbba790e85967f0990" + "md5sum": "f709c941037554e6d4ae56ecba08fc03" } ] } \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin index 845f98e6d8c4698e7dd52b50badaac72eb723643..0cc5e45e3895df87daf0d2fe4d5fbd3ad0aac3a0 100644 --- a/params_shard_0.bin +++ b/params_shard_0.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4d1fa25bab93bdee27531e61adba6dc2caea6073c31a2fd544f63dda2b338af -size 116686848 +oid sha256:93463c141a5f6a791f7ce5c04fdc337f57a6f5023e15484b622bc10b9abd3c0f +size 466747392 diff --git a/params_shard_1.bin b/params_shard_1.bin index e0b428e89cec647290949dc7a6e22590ccc1cf3a..368d02a3beff3ac8f876081c5c2fc3775fcf4fec 100644 --- a/params_shard_1.bin +++ b/params_shard_1.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29ab837f64b3ac1937cbbe3dfd24c9958bc8208f9d3bf96e27c64164ba5ed5e7 -size 116686848 +oid sha256:ceae2992cd5aa74dd18a9bed0313da6db56b4c6c47e804fd1181bb6afb1d6668 +size 466747392 diff --git a/params_shard_10.bin b/params_shard_10.bin index 90735fb0ed246f641772f8d34bf30a1c08b7189c..83b046d601c39411b03e87bb1319322c48bceb01 100644 --- a/params_shard_10.bin +++ b/params_shard_10.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b9804d09b1e919ed3b75a68ce2ee69ee3d710337b06af79a9bef033208ad4a3 -size 26331136 +oid sha256:b27df7366ce72887bd430dd0f94245df259d55a79f4d9875fa4afaed39486a58 +size 33060864 diff --git a/params_shard_11.bin b/params_shard_11.bin index d46174ba0d071f5145ed5868b047b4d0c65f61f6..b149d9543e00c74d4534691582f5174f1ece7b5a 100644 --- a/params_shard_11.bin +++ b/params_shard_11.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db2a5b10f92bf55dea115a922c3d0542a8f4d37a504a2decd7f96d9b67bf8dd5 -size 26331136 +oid sha256:a00abb003d6640509bcd59beaf2948f7f6e8069ba45789f07a6faf2c7b850467 +size 27525120 diff --git a/params_shard_12.bin b/params_shard_12.bin index 35bc1b9f8266fb8f0d1a41bfa2044c217eb8bb06..81e1329d33eaea859d266bb116c21aff5da6df90 100644 --- a/params_shard_12.bin +++ b/params_shard_12.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24b30d6f4efacbf535ef4c4178e181ce6f9f1b68e9e5b60568afd1f534dbad91 -size 26331136 +oid sha256:3d0becabe9c7e1e9d05a0e4210c3a71045cebc4f96c3538fd9462e2b18ef08ba +size 55050240 diff --git a/params_shard_13.bin b/params_shard_13.bin index dbabc55fc615a6a44c0f3aa2b405d0cdd1582755..cdb68c0b7af128729c49ddcd4677fd4228cacd67 100644 --- a/params_shard_13.bin +++ b/params_shard_13.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30467c7f05df29c3117913518d89d634dea9a6d38b6c487a3b3e74d88fc3a583 -size 26331136 +oid sha256:d78a9beb195beae3010695fe2526eb38a55a7c7656ae68ecd23dd624bf9bc0e8 +size 27525120 diff --git a/params_shard_14.bin b/params_shard_14.bin index 805348793d87bd0a35acc46cf839be207a1e7921..93d3d48411ae1918a464a996c25a62e9638b706e 100644 --- a/params_shard_14.bin +++ b/params_shard_14.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c066570bc414c818ed3a7ae2feefc9c154f83bb3e5a14eb134e409b2b03c353 -size 26331136 +oid sha256:d8c405bfc1f3492c8f31c6a2dca6f9f46fad1a27f98d21b4534c9370cbfc3993 +size 55050240 diff --git a/params_shard_15.bin b/params_shard_15.bin index 3b2c7748faa76bb513613b1473f938a22e2983ed..0f10228e09d1358bdb3933981c713bde67887c28 100644 --- a/params_shard_15.bin +++ b/params_shard_15.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02f86898b41c5670edf82e3afa5a91760c1b33d88b30e8fbd664a6e87ce42c4b -size 26331136 +oid sha256:38f2e17ba2992156477f409e2632ec89fd866e2fb64dae4aaf48607e9566f23c +size 27525120 diff --git a/params_shard_16.bin b/params_shard_16.bin index e4ba5183b1c122d16140ec0e326c765a2c295d9e..0c4b02bb43476900056f278359452dd4753b37fe 100644 --- a/params_shard_16.bin +++ b/params_shard_16.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29c070a0392cf4a29855b381d126fa1dff21567162b0a9305fe7244a29060b21 -size 26331136 +oid sha256:6de5c9537428dbbd2c6983c260e18f1edc3fbc726073300f78d92659ba5c9efb +size 55050240 diff --git a/params_shard_17.bin b/params_shard_17.bin index de6ebe924e044238caa177c615fad109ee95093c..f73b6a73cf57307fb62a75dfc4261b87cf8ddf25 100644 --- a/params_shard_17.bin +++ b/params_shard_17.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3f37453bfee2894fd2225c75bcecf3b8ac63a1a732b90c0f7a387c824812476 -size 26331136 +oid sha256:73571dda6984c68a8b9ed6dbcff22469831e72554ac01dfdb92199a604789afe +size 33060864 diff --git a/params_shard_18.bin b/params_shard_18.bin index 927de73a039b15ed1e092ca231ab15c0ebc5b55c..a8489cc333eb556bec5d4d3abb1acc1cebbf4fc6 100644 --- a/params_shard_18.bin +++ b/params_shard_18.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f52432700ffb5c2506d6939133fc0e1366842bfc5b6398830498e0695ee7460e -size 26331136 +oid sha256:9cf9ecc5503d9195fae321409eb54416330567ea0843e14925f714b64e45bf25 +size 27525120 diff --git a/params_shard_19.bin b/params_shard_19.bin index d9315578001aab078052f1c2b9fa182046f1a8b6..1a06ebcf5de5c84b46c5fef3556b8138fe3d5946 100644 --- a/params_shard_19.bin +++ b/params_shard_19.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3692ddfabe072c1376ed61209bcb42d2cb4f430ee01911cac98ad541212c7ad -size 26331136 +oid sha256:c10298bb1fd8c4fe294f07e4232180a7cb0c01367521730752da994dd8ab65fe +size 55050240 diff --git a/params_shard_2.bin b/params_shard_2.bin index 872a5d124b72038751798b4f4fba185c098d2476..7d0ad356974cc4a438a31b8839d5e4fd6377fd72 100644 --- a/params_shard_2.bin +++ b/params_shard_2.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0168a8b8f6459dad35d5fc9956219d31414b7ec246b3a019bfb29d92968c496e -size 29174784 +oid sha256:efb510a1841bf478c670b06cc3514bd079cc28c29e5ba96bf09e194fd144daeb +size 55050240 diff --git a/params_shard_20.bin b/params_shard_20.bin index 1722912ad0a09797c9c0f96ad4a67f5c844c8af3..f94337801e48d3efda87913cb4b6a541f4b2f545 100644 --- a/params_shard_20.bin +++ b/params_shard_20.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d30e8dc087c3801f92bf962d514c99b6e54e07e3cd4e726b371df4090c03f861 -size 26331136 +oid sha256:5eb1b40c168c0aba441a5832903504c22f7a4d31657e9637936be0b4f7bba29c +size 27525120 diff --git a/params_shard_21.bin b/params_shard_21.bin index 4bbdcdfa32ae43c68ebbafb6979121d7469a414c..483bd4474719b30327c1ffcfeb48ba14444815e9 100644 --- a/params_shard_21.bin +++ b/params_shard_21.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:befae26ea25cc5919bb9646fea6496b78e7100b836db9c52439fd5910cea3703 -size 26331136 +oid sha256:dbdbb2a2e5e16f9823400e7f32499729630dd36ae2948a8a5d30e883f0f34c62 +size 55050240 diff --git a/params_shard_22.bin b/params_shard_22.bin index c76c50af4f02743fb11584ee2b0a9ccbc1ab7f38..bb7c93974f8dac5f8bbc868b20de9e5abcd5ce28 100644 --- a/params_shard_22.bin +++ b/params_shard_22.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65c5c86b043d62bac22d6d7f591e345568eaffe9a0484f2abdaefac15cae57df -size 26331136 +oid sha256:d7df70aaced8b988950f0579abff1bef08bc1f1eb7bc4fcd3d97168b62fd9781 +size 27525120 diff --git a/params_shard_23.bin b/params_shard_23.bin index 66c9ef6f18d4de07be60cc30ef02e12892aa8e6c..64f9ee994f431ae1419d17ce26048eafb98c1967 100644 --- a/params_shard_23.bin +++ b/params_shard_23.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00b663c5bb64030d233a6f0c449fa5343c61b036e432d1d6928544cd85b49b7f -size 26331136 +oid sha256:9d779c3787f144f666e3cbbef254d4dea718d6b2202aa4ef5e568da711c67745 +size 55050240 diff --git a/params_shard_24.bin b/params_shard_24.bin index e4cf52ea1a50c1c16dd8ec04c37a7b3dba6de25c..8c8414fb9b2d7f67f6aef4f62610be6fa1e70b2d 100644 --- a/params_shard_24.bin +++ b/params_shard_24.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:677d1df1179512e30f372dc91f90684ee0af2ad0e3a4f63094f2a73ba6221b05 -size 26331136 +oid sha256:2db195c2704ec8b3b54e8b584bb56de82de51356a661569a88220aed5ad7ddc7 +size 33060864 diff --git a/params_shard_25.bin b/params_shard_25.bin index fadcdef36bd5be1451dc074dd54622d578d2b171..c945da431f0892056e2552734d56cc5975268f5d 100644 --- a/params_shard_25.bin +++ b/params_shard_25.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83d0ff8801ee8dce27cf701d7d2362672e1bb5f740ef7b38b74018fa42e6c129 -size 26331136 +oid sha256:e85e04059e6d242c11df06945f1155583bd93e288442e9e2fb25f61a5e065674 +size 27525120 diff --git a/params_shard_26.bin b/params_shard_26.bin index 21c3113a755d3d01914952061ffa40064cc6a1e1..e683b2e76e0093aed32216637e299ab4e6aaea23 100644 --- a/params_shard_26.bin +++ b/params_shard_26.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:616c37ad221a4b3d79b8bdad4e204507469e858a7724ab1d5da293134b21eca4 -size 26331136 +oid sha256:83550d787bacd9ea9facca5c68e468653f50b2584a287f3522293d2ff716e10c +size 55050240 diff --git a/params_shard_27.bin b/params_shard_27.bin index 6ac415adb48216ee2c4835b62542d984ec45961e..982e998683405c18d659ea1877a8c15ea308a1cb 100644 --- a/params_shard_27.bin +++ b/params_shard_27.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fae875a6701fb126172d6b67d46269c97c3a58902143dd5c2f3b5f17f41a08dc -size 26331136 +oid sha256:db17915a3a01623876b0d06f82324e573c245cba95691678358edfd2026d1d64 +size 27525120 diff --git a/params_shard_28.bin b/params_shard_28.bin index 37e0e31f639790d450ca7b5892d4641e26bbc774..eae580cc388769b2a493363df04a6a44a87e73e6 100644 --- a/params_shard_28.bin +++ b/params_shard_28.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9f0b0eeee4dcbfe3b1bafbe1d006a8203ebc43c15b8a17a6694d7a588d203c0 -size 26331136 +oid sha256:f6841a522843a48d9017fcfe916373600eac643a794bbcbea581a2b706dbd092 +size 55050240 diff --git a/params_shard_29.bin b/params_shard_29.bin index eb64d76065786580f4c48f8cd296512b1e9bb987..5ac45f416bb79f11f7bd1e46e7518e031576ae5b 100644 --- a/params_shard_29.bin +++ b/params_shard_29.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70694802b48fa31110c5b12bfb64c3f8d93dedc0c835197c72dad01026b92d54 -size 26331136 +oid sha256:e6d5a480f8202dcf3dddb2a80011bd988de4e9ce69033af56c0e9f9212fd264d +size 27525120 diff --git a/params_shard_3.bin b/params_shard_3.bin index 614d5a1dec2c4941aa44961943b9c6012e767ed0..548a7eea214c0506ce8412a92095796856021243 100644 --- a/params_shard_3.bin +++ b/params_shard_3.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3340731366f5eef347332610f751ebb53a41d6c6058294f867daa6e7f09a3f14 -size 33212416 +oid sha256:1f2904a0e6da41c8d6ac49d6e464ea59336dec21358181e947f1ac756645c654 +size 27535360 diff --git a/params_shard_30.bin b/params_shard_30.bin index 7b65442313d129cffb9b824c02f5ad2d62c792fa..ce78e8ceca41f9c18c0a830a7e8b20d8d1bc0e86 100644 --- a/params_shard_30.bin +++ b/params_shard_30.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86074eb00eb1ba19a4b9843af5d0921f40a0ea0465a23683365ad97da549e0e5 -size 18589696 +oid sha256:079ee9b174aa49859715919dfdaf661d2b33db250a51afeebb9ad175cfbb52b2 +size 55050240 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fdfc9d3240740a32a4fdce1e1d32db53712fd08 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b0891ce6540d80ba4fbf9bf8a783afd71223b5b634945f4627ee717241ec3b +size 33060864 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc11d08a7bb6820de18979eebd22ea68483408cb --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b5fe2d7cea0ecea699a8f95e49d6150191ff5df0804b9f6075e033e6c11b1d +size 27525120 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b86e83be866c89c86bed777a95a97281d4b6b5d --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca07e81deeed64d823f3859fee7c482c98571ae50e3263df9d4266e24491bd96 +size 55050240 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f24024abc012dc57c05640effe1128737660d7b --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58fcb21d8a86d8cbb11172d997090b6efbc2b7c6f913e0dd6888eaebb3bc407a +size 27525120 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..695f1c88f7ef31b3ccc75b612822b5e6e8f6ae34 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b17ac082fdde516b07f4170d4b15c70a0f4cdc3d92cd8a75509a33c5efd472f +size 55050240 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca244acbe4094aba7c5fe2bfbed72484d61ea08c --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fffa9d777245886976836b2a06a93f74e270d02cc2f4e78b2b08936e4d07118 +size 27525120 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..90f1f706a8207f7149badfd16993d2c3c26af419 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87be399384b958b24b0380f7f8cdec3d277b2d984ac711ddf7df38a48b0eda7 +size 55050240 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..d485f4d2c80306f8f18ec898a3e73f5cfd0aca13 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c30cac382bd284437b43beab4ded2a3f5d22e3af15543d1228e2c14a762c070 +size 33060864 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..0da4af1cab9b3f07690637a31cce56a23063da97 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230e470228433d904194e42d52f1a3821210da3eab03b0942b93695707e229cd +size 27525120 diff --git a/params_shard_4.bin b/params_shard_4.bin index e937a5277643e2d8aae08c9a4be0fad7a7893925..a9b1ec33f81f93c7aba63f38801c49b7c0ca5621 100644 --- a/params_shard_4.bin +++ b/params_shard_4.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35c1be0353e94b955b7d6b2040984634eaf25bfd184c2d97fc2ed23e37e83048 -size 27191296 +oid sha256:743e4de14f5d7ecb9da572e521b4ef4b74b1291e17f7ee9ebcb89ac4504fa858 +size 27525120 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee8cc395944c24e5fa8e4d4c3f42212111c673cc --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64cbab1ec5ca73f468686930cf81d2a9c3c30264704008e84ab72124505bb53f +size 55050240 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7ec6ff4e82e5ba5479c37816f6bb399ec255aac --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e5591c08ac201fb9d1548641cfe960b2f059e03ea6e3a5387ceccbfedc0137 +size 27525120 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..b35aea71b023c24656437172fb58ee7c1a5a9448 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abda20a96f807108cb0fd14af20b7a121bed07c18c4bfb2397350ad610776f6 +size 55050240 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..508e37d9e84ed37353c6a81506ff1f214409392b --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0285503c4e5469591229d2e2655e7189463d4ff3a7069a23fb5c36104bf283db +size 27525120 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bc1c64529c430de62e90d23d2dd0cfcffab961e --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e35a635d36ac0dcba206315c53a6f8f4fe2809eff344f430a6333abacc52878 +size 55050240 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..24e618489e798692b1c39065760313ead90fe10f --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0f85a97b9ac7b832b9192cc7b2e1a9cbab359ed777a2003d7f5df6920ad51c +size 33060864 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..a22ba0353d5c1b412c1a30565ed0ca090e36eb57 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be91dab69c1ca4aed27bed4a14ac781db3812a56e5d68dc2073cac6e9ecdeba3 +size 27525120 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e26272ce1a3655f736c14cdffa3d085b852ee5d --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca07504537b242691e974cf7bcd8d82644d1240ee6e2d106a7bd6b90cea6426 +size 55050240 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..51545b37e92fd6155a15c3b6da1f95f5fb958b91 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e57a3601686e003f82f9b863d961d8e3633f07b198666ff9a61f46e710b5fa +size 27525120 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b1d750ce4eb02ed5f5551965c653d4051f0855b --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651f500ced36f2c5f36177ef43c3bfc78dbcc9524d2d9f1fe8d3fc6a4bdb46bd +size 55050240 diff --git a/params_shard_5.bin b/params_shard_5.bin index c3d6f5c6376e8ea88dd8ce3254bdb5f3178f8ad8..dba8238c5192a0797659d8bfd443850b3f77e218 100644 --- a/params_shard_5.bin +++ b/params_shard_5.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03a0ce57941633fa5f6a2e1d8faae013278e1fd22caf5d029cf0686da836a3ec -size 26331136 +oid sha256:671bd60432e6e12f6d6563abaf9f6d4b733d813c2fa6bf59e23a99805a4e7dc3 +size 55050240 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b7e9244d60e3c2f58e1f23dff49747a633b3d62 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1bef588e4e4a0ea65c2511430933bd4271e4cdab0fa67ba87ed332a4d1af61 +size 27525120 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5fa0192cc23f628572705338df885736f11808a --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f1b21722c8db89745ea4627368d52dcd25db631d7d3f585e1a46522a537b11 +size 55050240 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..340ec2150bcd7badfcb6278dadf798ebcb77248d --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26996a554dc0450580da897304594075fa68b81e51ee8bcbe89c9f90293f143a +size 33060864 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ad7fdcd5acdf3f6a356915162113a2edd947612 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3103e027ea98a1dfd752557c4721104045fde09ad578463252784102a34b251 +size 27525120 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..0004ff7f4d591145e0b0e17e2c92c119116b7a87 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1830571e2bacd547e34b1b8ec9c0cb620f6f8b95c39e8792a30a62f048351e +size 55050240 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b8135e56835b022d4f5537bb66d641f3443a829 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6137200b2bf05f58219dca9c41ed16620a8d060327a802780d95e849ba16a042 +size 27525120 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..19d09608d33ef1332cdfd4e303cf16ab311840ef --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d47ea0b2f4f005419852fec80669e54f9674fd321ade98e55879d53103f23c +size 55050240 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1a87e0e18963c6280ec574d1bced0d4ab80e639 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f28d9837d94cba1ba932f8c2a253e878542c4e2c968f8456678949aad9f2485 +size 27525120 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b5396740cc8c01cf00e1f2a295ffb0e108d15e0 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01807dda712e64e3a66a6529f82837ae87e9598daa6e2f26cee8017d24eab377 +size 55050240 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..ceab141bcaa8da24820378418861120340c75b4e --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4f890402b89c477e9ffe0637cc085a5e360b230316208b676ffbf7cecf3f87 +size 33060864 diff --git a/params_shard_6.bin b/params_shard_6.bin index 0d55275a6280245e30deb86c683d5fa5f1383347..b21d906d832cb77d24c0414469348ba7acbf97e7 100644 --- a/params_shard_6.bin +++ b/params_shard_6.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcea40b18bd7ef899c2aff644d66770baa7000e8fd5659ab28941d5e2bb2cc80 -size 26331136 +oid sha256:2a9d0812092c90672fda99cc409f4fb749247d0911c5573157fe7d739bd3a947 +size 27525120 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..41c726464e903ac30c2ed6edb73c22d6438c54d4 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b85bd14ae7f0207206e0e6c59f41b33337ab3594b3f65dd230239598aff57f +size 27525120 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..e326422113e6a5c6d83879042a58990c23e073e7 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4017410f5b12ba43f8fc57b8700f6fd4c239b35e78bc25b87b74631efc5875 +size 55050240 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7f4955b59cea96def56ee101b0981c9abd6e41c --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fbf742a2bfcdd76b76f20f17c5810e7d2f690beaefc3c6e65d1dcad829ef06 +size 27525120 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac3a9b75a9cdd917ef72546134af74c972ef812a --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f3dcdd21f5fa1967b1d168a43f3e720f191358c57e2ba9e8251dd9683e9c6cc +size 55050240 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..66527c81255276921e390988fca8a0648d3ff2dc --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a93b77bb8a11d3024d0453ba2e6ceefa7b55f98c144e95dd1243ebbedce4b68 +size 27525120 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd0a97311842e0e0d7192009569fd4a42dc97204 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289e366efba2178944a89f5e37a02fe1d1b7d1c2b2f5dc52beca78212b0b7964 +size 55050240 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8680c17e07ed02b34db75977d5f7c1a50192d62 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6a1478a191883671f1915c7759a5a9cd464546c27965995226ac34614afc5f +size 33060864 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d1e1b5598ad424a30f148eccac0297cd3a174db --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae610566624302abc08e5524784452db7dd933c530c87cd9852d7cf67b37b9e +size 11013120 diff --git a/params_shard_7.bin b/params_shard_7.bin index b4a9e154a9bbf2f2cce5280f2ce66594c50c6f5f..6880246cfa9be6e36655d769d20aed9daf75829d 100644 --- a/params_shard_7.bin +++ b/params_shard_7.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:720ad30c2c64af45206f7c52a25091a5b35b463d0d86a31d584dbf525eabbe27 -size 26331136 +oid sha256:7849cb6c21f2358f223a0db86f5230de247f369b8d92937cd672164eee227b87 +size 55050240 diff --git a/params_shard_8.bin b/params_shard_8.bin index 6b4ea59421e53a17539ecd916cf131e574c2c4bb..27068fe1d787a3c4b6a55fbd65e3cceb61154e52 100644 --- a/params_shard_8.bin +++ b/params_shard_8.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d71409e44c67ea5ff80283a9cdc44e72d2b14bcc1977991fe1a36e7d981e3f09 -size 26331136 +oid sha256:6a5136a4cb4a0f2268777231faae5fe58be0930e1709f48934fa9d7be92e8d0d +size 27525120 diff --git a/params_shard_9.bin b/params_shard_9.bin index 5c768e447312bbe905a33a7fd5be2a089360f530..50f877d9bab8efab9301c6077403d18b43f78814 100644 --- a/params_shard_9.bin +++ b/params_shard_9.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65d2fac827f0f40496b8ea02ff1e9d560d1fe01e69b4d60156889d9cd38a75fd -size 26331136 +oid sha256:085fdd3efa05a4764249c1b36f30df3bf8ac2255b381f113a87c2b88a9f59d5d +size 55050240