Upload folder using huggingface_hub
Browse files- config.json +244 -243
- generation_config.json +2 -8
- model-00001-of-00002.safetensors +2 -2
- model.safetensors.index.json +0 -0
- tokenizer_config.json +2 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -11,6 +11,7 @@
|
|
11 |
128008,
|
12 |
128009
|
13 |
],
|
|
|
14 |
"hidden_act": "silu",
|
15 |
"hidden_size": 4096,
|
16 |
"initializer_range": 0.02,
|
@@ -22,27 +23,12 @@
|
|
22 |
"num_hidden_layers": 32,
|
23 |
"num_key_value_heads": 8,
|
24 |
"pretraining_tp": 1,
|
25 |
-
"rms_norm_eps": 1e-05,
|
26 |
-
"rope_scaling": {
|
27 |
-
"factor": 8.0,
|
28 |
-
"high_freq_factor": 4.0,
|
29 |
-
"low_freq_factor": 1.0,
|
30 |
-
"original_max_position_embeddings": 8192,
|
31 |
-
"rope_type": "llama3"
|
32 |
-
},
|
33 |
-
"rope_theta": 500000.0,
|
34 |
-
"tie_word_embeddings": false,
|
35 |
-
"torch_dtype": "bfloat16",
|
36 |
-
"transformers_version": "4.45.1",
|
37 |
-
"use_cache": true,
|
38 |
-
"vocab_size": 128256,
|
39 |
"quantization_config": {
|
40 |
-
"quant_method": "vptq",
|
41 |
"config_for_layers": {
|
42 |
"model.layers.0.mlp.down_proj": {
|
43 |
"bias": false,
|
44 |
"enable_norm": true,
|
45 |
-
"enable_perm":
|
46 |
"group_num": 1,
|
47 |
"group_size": 14336,
|
48 |
"in_features": 14336,
|
@@ -66,7 +52,7 @@
|
|
66 |
"model.layers.0.mlp.gate_proj": {
|
67 |
"bias": false,
|
68 |
"enable_norm": true,
|
69 |
-
"enable_perm":
|
70 |
"group_num": 1,
|
71 |
"group_size": 4096,
|
72 |
"in_features": 4096,
|
@@ -90,7 +76,7 @@
|
|
90 |
"model.layers.0.mlp.up_proj": {
|
91 |
"bias": false,
|
92 |
"enable_norm": true,
|
93 |
-
"enable_perm":
|
94 |
"group_num": 1,
|
95 |
"group_size": 4096,
|
96 |
"in_features": 4096,
|
@@ -114,7 +100,7 @@
|
|
114 |
"model.layers.0.self_attn.k_proj": {
|
115 |
"bias": false,
|
116 |
"enable_norm": true,
|
117 |
-
"enable_perm":
|
118 |
"group_num": 1,
|
119 |
"group_size": 4096,
|
120 |
"in_features": 4096,
|
@@ -138,7 +124,7 @@
|
|
138 |
"model.layers.0.self_attn.o_proj": {
|
139 |
"bias": false,
|
140 |
"enable_norm": true,
|
141 |
-
"enable_perm":
|
142 |
"group_num": 1,
|
143 |
"group_size": 4096,
|
144 |
"in_features": 4096,
|
@@ -162,7 +148,7 @@
|
|
162 |
"model.layers.0.self_attn.q_proj": {
|
163 |
"bias": false,
|
164 |
"enable_norm": true,
|
165 |
-
"enable_perm":
|
166 |
"group_num": 1,
|
167 |
"group_size": 4096,
|
168 |
"in_features": 4096,
|
@@ -186,7 +172,7 @@
|
|
186 |
"model.layers.0.self_attn.v_proj": {
|
187 |
"bias": false,
|
188 |
"enable_norm": true,
|
189 |
-
"enable_perm":
|
190 |
"group_num": 1,
|
191 |
"group_size": 4096,
|
192 |
"in_features": 4096,
|
@@ -210,7 +196,7 @@
|
|
210 |
"model.layers.1.mlp.down_proj": {
|
211 |
"bias": false,
|
212 |
"enable_norm": true,
|
213 |
-
"enable_perm":
|
214 |
"group_num": 1,
|
215 |
"group_size": 14336,
|
216 |
"in_features": 14336,
|
@@ -234,7 +220,7 @@
|
|
234 |
"model.layers.1.mlp.gate_proj": {
|
235 |
"bias": false,
|
236 |
"enable_norm": true,
|
237 |
-
"enable_perm":
|
238 |
"group_num": 1,
|
239 |
"group_size": 4096,
|
240 |
"in_features": 4096,
|
@@ -258,7 +244,7 @@
|
|
258 |
"model.layers.1.mlp.up_proj": {
|
259 |
"bias": false,
|
260 |
"enable_norm": true,
|
261 |
-
"enable_perm":
|
262 |
"group_num": 1,
|
263 |
"group_size": 4096,
|
264 |
"in_features": 4096,
|
@@ -282,7 +268,7 @@
|
|
282 |
"model.layers.1.self_attn.k_proj": {
|
283 |
"bias": false,
|
284 |
"enable_norm": true,
|
285 |
-
"enable_perm":
|
286 |
"group_num": 1,
|
287 |
"group_size": 4096,
|
288 |
"in_features": 4096,
|
@@ -306,7 +292,7 @@
|
|
306 |
"model.layers.1.self_attn.o_proj": {
|
307 |
"bias": false,
|
308 |
"enable_norm": true,
|
309 |
-
"enable_perm":
|
310 |
"group_num": 1,
|
311 |
"group_size": 4096,
|
312 |
"in_features": 4096,
|
@@ -330,7 +316,7 @@
|
|
330 |
"model.layers.1.self_attn.q_proj": {
|
331 |
"bias": false,
|
332 |
"enable_norm": true,
|
333 |
-
"enable_perm":
|
334 |
"group_num": 1,
|
335 |
"group_size": 4096,
|
336 |
"in_features": 4096,
|
@@ -354,7 +340,7 @@
|
|
354 |
"model.layers.1.self_attn.v_proj": {
|
355 |
"bias": false,
|
356 |
"enable_norm": true,
|
357 |
-
"enable_perm":
|
358 |
"group_num": 1,
|
359 |
"group_size": 4096,
|
360 |
"in_features": 4096,
|
@@ -378,7 +364,7 @@
|
|
378 |
"model.layers.10.mlp.down_proj": {
|
379 |
"bias": false,
|
380 |
"enable_norm": true,
|
381 |
-
"enable_perm":
|
382 |
"group_num": 1,
|
383 |
"group_size": 14336,
|
384 |
"in_features": 14336,
|
@@ -402,7 +388,7 @@
|
|
402 |
"model.layers.10.mlp.gate_proj": {
|
403 |
"bias": false,
|
404 |
"enable_norm": true,
|
405 |
-
"enable_perm":
|
406 |
"group_num": 1,
|
407 |
"group_size": 4096,
|
408 |
"in_features": 4096,
|
@@ -426,7 +412,7 @@
|
|
426 |
"model.layers.10.mlp.up_proj": {
|
427 |
"bias": false,
|
428 |
"enable_norm": true,
|
429 |
-
"enable_perm":
|
430 |
"group_num": 1,
|
431 |
"group_size": 4096,
|
432 |
"in_features": 4096,
|
@@ -450,7 +436,7 @@
|
|
450 |
"model.layers.10.self_attn.k_proj": {
|
451 |
"bias": false,
|
452 |
"enable_norm": true,
|
453 |
-
"enable_perm":
|
454 |
"group_num": 1,
|
455 |
"group_size": 4096,
|
456 |
"in_features": 4096,
|
@@ -474,7 +460,7 @@
|
|
474 |
"model.layers.10.self_attn.o_proj": {
|
475 |
"bias": false,
|
476 |
"enable_norm": true,
|
477 |
-
"enable_perm":
|
478 |
"group_num": 1,
|
479 |
"group_size": 4096,
|
480 |
"in_features": 4096,
|
@@ -498,7 +484,7 @@
|
|
498 |
"model.layers.10.self_attn.q_proj": {
|
499 |
"bias": false,
|
500 |
"enable_norm": true,
|
501 |
-
"enable_perm":
|
502 |
"group_num": 1,
|
503 |
"group_size": 4096,
|
504 |
"in_features": 4096,
|
@@ -522,7 +508,7 @@
|
|
522 |
"model.layers.10.self_attn.v_proj": {
|
523 |
"bias": false,
|
524 |
"enable_norm": true,
|
525 |
-
"enable_perm":
|
526 |
"group_num": 1,
|
527 |
"group_size": 4096,
|
528 |
"in_features": 4096,
|
@@ -546,7 +532,7 @@
|
|
546 |
"model.layers.11.mlp.down_proj": {
|
547 |
"bias": false,
|
548 |
"enable_norm": true,
|
549 |
-
"enable_perm":
|
550 |
"group_num": 1,
|
551 |
"group_size": 14336,
|
552 |
"in_features": 14336,
|
@@ -570,7 +556,7 @@
|
|
570 |
"model.layers.11.mlp.gate_proj": {
|
571 |
"bias": false,
|
572 |
"enable_norm": true,
|
573 |
-
"enable_perm":
|
574 |
"group_num": 1,
|
575 |
"group_size": 4096,
|
576 |
"in_features": 4096,
|
@@ -594,7 +580,7 @@
|
|
594 |
"model.layers.11.mlp.up_proj": {
|
595 |
"bias": false,
|
596 |
"enable_norm": true,
|
597 |
-
"enable_perm":
|
598 |
"group_num": 1,
|
599 |
"group_size": 4096,
|
600 |
"in_features": 4096,
|
@@ -618,7 +604,7 @@
|
|
618 |
"model.layers.11.self_attn.k_proj": {
|
619 |
"bias": false,
|
620 |
"enable_norm": true,
|
621 |
-
"enable_perm":
|
622 |
"group_num": 1,
|
623 |
"group_size": 4096,
|
624 |
"in_features": 4096,
|
@@ -642,7 +628,7 @@
|
|
642 |
"model.layers.11.self_attn.o_proj": {
|
643 |
"bias": false,
|
644 |
"enable_norm": true,
|
645 |
-
"enable_perm":
|
646 |
"group_num": 1,
|
647 |
"group_size": 4096,
|
648 |
"in_features": 4096,
|
@@ -666,7 +652,7 @@
|
|
666 |
"model.layers.11.self_attn.q_proj": {
|
667 |
"bias": false,
|
668 |
"enable_norm": true,
|
669 |
-
"enable_perm":
|
670 |
"group_num": 1,
|
671 |
"group_size": 4096,
|
672 |
"in_features": 4096,
|
@@ -690,7 +676,7 @@
|
|
690 |
"model.layers.11.self_attn.v_proj": {
|
691 |
"bias": false,
|
692 |
"enable_norm": true,
|
693 |
-
"enable_perm":
|
694 |
"group_num": 1,
|
695 |
"group_size": 4096,
|
696 |
"in_features": 4096,
|
@@ -714,7 +700,7 @@
|
|
714 |
"model.layers.12.mlp.down_proj": {
|
715 |
"bias": false,
|
716 |
"enable_norm": true,
|
717 |
-
"enable_perm":
|
718 |
"group_num": 1,
|
719 |
"group_size": 14336,
|
720 |
"in_features": 14336,
|
@@ -738,7 +724,7 @@
|
|
738 |
"model.layers.12.mlp.gate_proj": {
|
739 |
"bias": false,
|
740 |
"enable_norm": true,
|
741 |
-
"enable_perm":
|
742 |
"group_num": 1,
|
743 |
"group_size": 4096,
|
744 |
"in_features": 4096,
|
@@ -762,7 +748,7 @@
|
|
762 |
"model.layers.12.mlp.up_proj": {
|
763 |
"bias": false,
|
764 |
"enable_norm": true,
|
765 |
-
"enable_perm":
|
766 |
"group_num": 1,
|
767 |
"group_size": 4096,
|
768 |
"in_features": 4096,
|
@@ -786,7 +772,7 @@
|
|
786 |
"model.layers.12.self_attn.k_proj": {
|
787 |
"bias": false,
|
788 |
"enable_norm": true,
|
789 |
-
"enable_perm":
|
790 |
"group_num": 1,
|
791 |
"group_size": 4096,
|
792 |
"in_features": 4096,
|
@@ -810,7 +796,7 @@
|
|
810 |
"model.layers.12.self_attn.o_proj": {
|
811 |
"bias": false,
|
812 |
"enable_norm": true,
|
813 |
-
"enable_perm":
|
814 |
"group_num": 1,
|
815 |
"group_size": 4096,
|
816 |
"in_features": 4096,
|
@@ -834,7 +820,7 @@
|
|
834 |
"model.layers.12.self_attn.q_proj": {
|
835 |
"bias": false,
|
836 |
"enable_norm": true,
|
837 |
-
"enable_perm":
|
838 |
"group_num": 1,
|
839 |
"group_size": 4096,
|
840 |
"in_features": 4096,
|
@@ -858,7 +844,7 @@
|
|
858 |
"model.layers.12.self_attn.v_proj": {
|
859 |
"bias": false,
|
860 |
"enable_norm": true,
|
861 |
-
"enable_perm":
|
862 |
"group_num": 1,
|
863 |
"group_size": 4096,
|
864 |
"in_features": 4096,
|
@@ -882,7 +868,7 @@
|
|
882 |
"model.layers.13.mlp.down_proj": {
|
883 |
"bias": false,
|
884 |
"enable_norm": true,
|
885 |
-
"enable_perm":
|
886 |
"group_num": 1,
|
887 |
"group_size": 14336,
|
888 |
"in_features": 14336,
|
@@ -906,7 +892,7 @@
|
|
906 |
"model.layers.13.mlp.gate_proj": {
|
907 |
"bias": false,
|
908 |
"enable_norm": true,
|
909 |
-
"enable_perm":
|
910 |
"group_num": 1,
|
911 |
"group_size": 4096,
|
912 |
"in_features": 4096,
|
@@ -930,7 +916,7 @@
|
|
930 |
"model.layers.13.mlp.up_proj": {
|
931 |
"bias": false,
|
932 |
"enable_norm": true,
|
933 |
-
"enable_perm":
|
934 |
"group_num": 1,
|
935 |
"group_size": 4096,
|
936 |
"in_features": 4096,
|
@@ -954,7 +940,7 @@
|
|
954 |
"model.layers.13.self_attn.k_proj": {
|
955 |
"bias": false,
|
956 |
"enable_norm": true,
|
957 |
-
"enable_perm":
|
958 |
"group_num": 1,
|
959 |
"group_size": 4096,
|
960 |
"in_features": 4096,
|
@@ -978,7 +964,7 @@
|
|
978 |
"model.layers.13.self_attn.o_proj": {
|
979 |
"bias": false,
|
980 |
"enable_norm": true,
|
981 |
-
"enable_perm":
|
982 |
"group_num": 1,
|
983 |
"group_size": 4096,
|
984 |
"in_features": 4096,
|
@@ -1002,7 +988,7 @@
|
|
1002 |
"model.layers.13.self_attn.q_proj": {
|
1003 |
"bias": false,
|
1004 |
"enable_norm": true,
|
1005 |
-
"enable_perm":
|
1006 |
"group_num": 1,
|
1007 |
"group_size": 4096,
|
1008 |
"in_features": 4096,
|
@@ -1026,7 +1012,7 @@
|
|
1026 |
"model.layers.13.self_attn.v_proj": {
|
1027 |
"bias": false,
|
1028 |
"enable_norm": true,
|
1029 |
-
"enable_perm":
|
1030 |
"group_num": 1,
|
1031 |
"group_size": 4096,
|
1032 |
"in_features": 4096,
|
@@ -1050,7 +1036,7 @@
|
|
1050 |
"model.layers.14.mlp.down_proj": {
|
1051 |
"bias": false,
|
1052 |
"enable_norm": true,
|
1053 |
-
"enable_perm":
|
1054 |
"group_num": 1,
|
1055 |
"group_size": 14336,
|
1056 |
"in_features": 14336,
|
@@ -1074,7 +1060,7 @@
|
|
1074 |
"model.layers.14.mlp.gate_proj": {
|
1075 |
"bias": false,
|
1076 |
"enable_norm": true,
|
1077 |
-
"enable_perm":
|
1078 |
"group_num": 1,
|
1079 |
"group_size": 4096,
|
1080 |
"in_features": 4096,
|
@@ -1098,7 +1084,7 @@
|
|
1098 |
"model.layers.14.mlp.up_proj": {
|
1099 |
"bias": false,
|
1100 |
"enable_norm": true,
|
1101 |
-
"enable_perm":
|
1102 |
"group_num": 1,
|
1103 |
"group_size": 4096,
|
1104 |
"in_features": 4096,
|
@@ -1122,7 +1108,7 @@
|
|
1122 |
"model.layers.14.self_attn.k_proj": {
|
1123 |
"bias": false,
|
1124 |
"enable_norm": true,
|
1125 |
-
"enable_perm":
|
1126 |
"group_num": 1,
|
1127 |
"group_size": 4096,
|
1128 |
"in_features": 4096,
|
@@ -1146,7 +1132,7 @@
|
|
1146 |
"model.layers.14.self_attn.o_proj": {
|
1147 |
"bias": false,
|
1148 |
"enable_norm": true,
|
1149 |
-
"enable_perm":
|
1150 |
"group_num": 1,
|
1151 |
"group_size": 4096,
|
1152 |
"in_features": 4096,
|
@@ -1170,7 +1156,7 @@
|
|
1170 |
"model.layers.14.self_attn.q_proj": {
|
1171 |
"bias": false,
|
1172 |
"enable_norm": true,
|
1173 |
-
"enable_perm":
|
1174 |
"group_num": 1,
|
1175 |
"group_size": 4096,
|
1176 |
"in_features": 4096,
|
@@ -1194,7 +1180,7 @@
|
|
1194 |
"model.layers.14.self_attn.v_proj": {
|
1195 |
"bias": false,
|
1196 |
"enable_norm": true,
|
1197 |
-
"enable_perm":
|
1198 |
"group_num": 1,
|
1199 |
"group_size": 4096,
|
1200 |
"in_features": 4096,
|
@@ -1218,7 +1204,7 @@
|
|
1218 |
"model.layers.15.mlp.down_proj": {
|
1219 |
"bias": false,
|
1220 |
"enable_norm": true,
|
1221 |
-
"enable_perm":
|
1222 |
"group_num": 1,
|
1223 |
"group_size": 14336,
|
1224 |
"in_features": 14336,
|
@@ -1242,7 +1228,7 @@
|
|
1242 |
"model.layers.15.mlp.gate_proj": {
|
1243 |
"bias": false,
|
1244 |
"enable_norm": true,
|
1245 |
-
"enable_perm":
|
1246 |
"group_num": 1,
|
1247 |
"group_size": 4096,
|
1248 |
"in_features": 4096,
|
@@ -1266,7 +1252,7 @@
|
|
1266 |
"model.layers.15.mlp.up_proj": {
|
1267 |
"bias": false,
|
1268 |
"enable_norm": true,
|
1269 |
-
"enable_perm":
|
1270 |
"group_num": 1,
|
1271 |
"group_size": 4096,
|
1272 |
"in_features": 4096,
|
@@ -1290,7 +1276,7 @@
|
|
1290 |
"model.layers.15.self_attn.k_proj": {
|
1291 |
"bias": false,
|
1292 |
"enable_norm": true,
|
1293 |
-
"enable_perm":
|
1294 |
"group_num": 1,
|
1295 |
"group_size": 4096,
|
1296 |
"in_features": 4096,
|
@@ -1314,7 +1300,7 @@
|
|
1314 |
"model.layers.15.self_attn.o_proj": {
|
1315 |
"bias": false,
|
1316 |
"enable_norm": true,
|
1317 |
-
"enable_perm":
|
1318 |
"group_num": 1,
|
1319 |
"group_size": 4096,
|
1320 |
"in_features": 4096,
|
@@ -1338,7 +1324,7 @@
|
|
1338 |
"model.layers.15.self_attn.q_proj": {
|
1339 |
"bias": false,
|
1340 |
"enable_norm": true,
|
1341 |
-
"enable_perm":
|
1342 |
"group_num": 1,
|
1343 |
"group_size": 4096,
|
1344 |
"in_features": 4096,
|
@@ -1362,7 +1348,7 @@
|
|
1362 |
"model.layers.15.self_attn.v_proj": {
|
1363 |
"bias": false,
|
1364 |
"enable_norm": true,
|
1365 |
-
"enable_perm":
|
1366 |
"group_num": 1,
|
1367 |
"group_size": 4096,
|
1368 |
"in_features": 4096,
|
@@ -1386,7 +1372,7 @@
|
|
1386 |
"model.layers.16.mlp.down_proj": {
|
1387 |
"bias": false,
|
1388 |
"enable_norm": true,
|
1389 |
-
"enable_perm":
|
1390 |
"group_num": 1,
|
1391 |
"group_size": 14336,
|
1392 |
"in_features": 14336,
|
@@ -1410,7 +1396,7 @@
|
|
1410 |
"model.layers.16.mlp.gate_proj": {
|
1411 |
"bias": false,
|
1412 |
"enable_norm": true,
|
1413 |
-
"enable_perm":
|
1414 |
"group_num": 1,
|
1415 |
"group_size": 4096,
|
1416 |
"in_features": 4096,
|
@@ -1434,7 +1420,7 @@
|
|
1434 |
"model.layers.16.mlp.up_proj": {
|
1435 |
"bias": false,
|
1436 |
"enable_norm": true,
|
1437 |
-
"enable_perm":
|
1438 |
"group_num": 1,
|
1439 |
"group_size": 4096,
|
1440 |
"in_features": 4096,
|
@@ -1458,7 +1444,7 @@
|
|
1458 |
"model.layers.16.self_attn.k_proj": {
|
1459 |
"bias": false,
|
1460 |
"enable_norm": true,
|
1461 |
-
"enable_perm":
|
1462 |
"group_num": 1,
|
1463 |
"group_size": 4096,
|
1464 |
"in_features": 4096,
|
@@ -1482,7 +1468,7 @@
|
|
1482 |
"model.layers.16.self_attn.o_proj": {
|
1483 |
"bias": false,
|
1484 |
"enable_norm": true,
|
1485 |
-
"enable_perm":
|
1486 |
"group_num": 1,
|
1487 |
"group_size": 4096,
|
1488 |
"in_features": 4096,
|
@@ -1506,7 +1492,7 @@
|
|
1506 |
"model.layers.16.self_attn.q_proj": {
|
1507 |
"bias": false,
|
1508 |
"enable_norm": true,
|
1509 |
-
"enable_perm":
|
1510 |
"group_num": 1,
|
1511 |
"group_size": 4096,
|
1512 |
"in_features": 4096,
|
@@ -1530,7 +1516,7 @@
|
|
1530 |
"model.layers.16.self_attn.v_proj": {
|
1531 |
"bias": false,
|
1532 |
"enable_norm": true,
|
1533 |
-
"enable_perm":
|
1534 |
"group_num": 1,
|
1535 |
"group_size": 4096,
|
1536 |
"in_features": 4096,
|
@@ -1554,7 +1540,7 @@
|
|
1554 |
"model.layers.17.mlp.down_proj": {
|
1555 |
"bias": false,
|
1556 |
"enable_norm": true,
|
1557 |
-
"enable_perm":
|
1558 |
"group_num": 1,
|
1559 |
"group_size": 14336,
|
1560 |
"in_features": 14336,
|
@@ -1578,7 +1564,7 @@
|
|
1578 |
"model.layers.17.mlp.gate_proj": {
|
1579 |
"bias": false,
|
1580 |
"enable_norm": true,
|
1581 |
-
"enable_perm":
|
1582 |
"group_num": 1,
|
1583 |
"group_size": 4096,
|
1584 |
"in_features": 4096,
|
@@ -1602,7 +1588,7 @@
|
|
1602 |
"model.layers.17.mlp.up_proj": {
|
1603 |
"bias": false,
|
1604 |
"enable_norm": true,
|
1605 |
-
"enable_perm":
|
1606 |
"group_num": 1,
|
1607 |
"group_size": 4096,
|
1608 |
"in_features": 4096,
|
@@ -1626,7 +1612,7 @@
|
|
1626 |
"model.layers.17.self_attn.k_proj": {
|
1627 |
"bias": false,
|
1628 |
"enable_norm": true,
|
1629 |
-
"enable_perm":
|
1630 |
"group_num": 1,
|
1631 |
"group_size": 4096,
|
1632 |
"in_features": 4096,
|
@@ -1650,7 +1636,7 @@
|
|
1650 |
"model.layers.17.self_attn.o_proj": {
|
1651 |
"bias": false,
|
1652 |
"enable_norm": true,
|
1653 |
-
"enable_perm":
|
1654 |
"group_num": 1,
|
1655 |
"group_size": 4096,
|
1656 |
"in_features": 4096,
|
@@ -1674,7 +1660,7 @@
|
|
1674 |
"model.layers.17.self_attn.q_proj": {
|
1675 |
"bias": false,
|
1676 |
"enable_norm": true,
|
1677 |
-
"enable_perm":
|
1678 |
"group_num": 1,
|
1679 |
"group_size": 4096,
|
1680 |
"in_features": 4096,
|
@@ -1698,7 +1684,7 @@
|
|
1698 |
"model.layers.17.self_attn.v_proj": {
|
1699 |
"bias": false,
|
1700 |
"enable_norm": true,
|
1701 |
-
"enable_perm":
|
1702 |
"group_num": 1,
|
1703 |
"group_size": 4096,
|
1704 |
"in_features": 4096,
|
@@ -1722,7 +1708,7 @@
|
|
1722 |
"model.layers.18.mlp.down_proj": {
|
1723 |
"bias": false,
|
1724 |
"enable_norm": true,
|
1725 |
-
"enable_perm":
|
1726 |
"group_num": 1,
|
1727 |
"group_size": 14336,
|
1728 |
"in_features": 14336,
|
@@ -1746,7 +1732,7 @@
|
|
1746 |
"model.layers.18.mlp.gate_proj": {
|
1747 |
"bias": false,
|
1748 |
"enable_norm": true,
|
1749 |
-
"enable_perm":
|
1750 |
"group_num": 1,
|
1751 |
"group_size": 4096,
|
1752 |
"in_features": 4096,
|
@@ -1770,7 +1756,7 @@
|
|
1770 |
"model.layers.18.mlp.up_proj": {
|
1771 |
"bias": false,
|
1772 |
"enable_norm": true,
|
1773 |
-
"enable_perm":
|
1774 |
"group_num": 1,
|
1775 |
"group_size": 4096,
|
1776 |
"in_features": 4096,
|
@@ -1794,7 +1780,7 @@
|
|
1794 |
"model.layers.18.self_attn.k_proj": {
|
1795 |
"bias": false,
|
1796 |
"enable_norm": true,
|
1797 |
-
"enable_perm":
|
1798 |
"group_num": 1,
|
1799 |
"group_size": 4096,
|
1800 |
"in_features": 4096,
|
@@ -1818,7 +1804,7 @@
|
|
1818 |
"model.layers.18.self_attn.o_proj": {
|
1819 |
"bias": false,
|
1820 |
"enable_norm": true,
|
1821 |
-
"enable_perm":
|
1822 |
"group_num": 1,
|
1823 |
"group_size": 4096,
|
1824 |
"in_features": 4096,
|
@@ -1842,7 +1828,7 @@
|
|
1842 |
"model.layers.18.self_attn.q_proj": {
|
1843 |
"bias": false,
|
1844 |
"enable_norm": true,
|
1845 |
-
"enable_perm":
|
1846 |
"group_num": 1,
|
1847 |
"group_size": 4096,
|
1848 |
"in_features": 4096,
|
@@ -1866,7 +1852,7 @@
|
|
1866 |
"model.layers.18.self_attn.v_proj": {
|
1867 |
"bias": false,
|
1868 |
"enable_norm": true,
|
1869 |
-
"enable_perm":
|
1870 |
"group_num": 1,
|
1871 |
"group_size": 4096,
|
1872 |
"in_features": 4096,
|
@@ -1890,7 +1876,7 @@
|
|
1890 |
"model.layers.19.mlp.down_proj": {
|
1891 |
"bias": false,
|
1892 |
"enable_norm": true,
|
1893 |
-
"enable_perm":
|
1894 |
"group_num": 1,
|
1895 |
"group_size": 14336,
|
1896 |
"in_features": 14336,
|
@@ -1914,7 +1900,7 @@
|
|
1914 |
"model.layers.19.mlp.gate_proj": {
|
1915 |
"bias": false,
|
1916 |
"enable_norm": true,
|
1917 |
-
"enable_perm":
|
1918 |
"group_num": 1,
|
1919 |
"group_size": 4096,
|
1920 |
"in_features": 4096,
|
@@ -1938,7 +1924,7 @@
|
|
1938 |
"model.layers.19.mlp.up_proj": {
|
1939 |
"bias": false,
|
1940 |
"enable_norm": true,
|
1941 |
-
"enable_perm":
|
1942 |
"group_num": 1,
|
1943 |
"group_size": 4096,
|
1944 |
"in_features": 4096,
|
@@ -1962,7 +1948,7 @@
|
|
1962 |
"model.layers.19.self_attn.k_proj": {
|
1963 |
"bias": false,
|
1964 |
"enable_norm": true,
|
1965 |
-
"enable_perm":
|
1966 |
"group_num": 1,
|
1967 |
"group_size": 4096,
|
1968 |
"in_features": 4096,
|
@@ -1986,7 +1972,7 @@
|
|
1986 |
"model.layers.19.self_attn.o_proj": {
|
1987 |
"bias": false,
|
1988 |
"enable_norm": true,
|
1989 |
-
"enable_perm":
|
1990 |
"group_num": 1,
|
1991 |
"group_size": 4096,
|
1992 |
"in_features": 4096,
|
@@ -2010,7 +1996,7 @@
|
|
2010 |
"model.layers.19.self_attn.q_proj": {
|
2011 |
"bias": false,
|
2012 |
"enable_norm": true,
|
2013 |
-
"enable_perm":
|
2014 |
"group_num": 1,
|
2015 |
"group_size": 4096,
|
2016 |
"in_features": 4096,
|
@@ -2034,7 +2020,7 @@
|
|
2034 |
"model.layers.19.self_attn.v_proj": {
|
2035 |
"bias": false,
|
2036 |
"enable_norm": true,
|
2037 |
-
"enable_perm":
|
2038 |
"group_num": 1,
|
2039 |
"group_size": 4096,
|
2040 |
"in_features": 4096,
|
@@ -2058,7 +2044,7 @@
|
|
2058 |
"model.layers.2.mlp.down_proj": {
|
2059 |
"bias": false,
|
2060 |
"enable_norm": true,
|
2061 |
-
"enable_perm":
|
2062 |
"group_num": 1,
|
2063 |
"group_size": 14336,
|
2064 |
"in_features": 14336,
|
@@ -2082,7 +2068,7 @@
|
|
2082 |
"model.layers.2.mlp.gate_proj": {
|
2083 |
"bias": false,
|
2084 |
"enable_norm": true,
|
2085 |
-
"enable_perm":
|
2086 |
"group_num": 1,
|
2087 |
"group_size": 4096,
|
2088 |
"in_features": 4096,
|
@@ -2106,7 +2092,7 @@
|
|
2106 |
"model.layers.2.mlp.up_proj": {
|
2107 |
"bias": false,
|
2108 |
"enable_norm": true,
|
2109 |
-
"enable_perm":
|
2110 |
"group_num": 1,
|
2111 |
"group_size": 4096,
|
2112 |
"in_features": 4096,
|
@@ -2130,7 +2116,7 @@
|
|
2130 |
"model.layers.2.self_attn.k_proj": {
|
2131 |
"bias": false,
|
2132 |
"enable_norm": true,
|
2133 |
-
"enable_perm":
|
2134 |
"group_num": 1,
|
2135 |
"group_size": 4096,
|
2136 |
"in_features": 4096,
|
@@ -2154,7 +2140,7 @@
|
|
2154 |
"model.layers.2.self_attn.o_proj": {
|
2155 |
"bias": false,
|
2156 |
"enable_norm": true,
|
2157 |
-
"enable_perm":
|
2158 |
"group_num": 1,
|
2159 |
"group_size": 4096,
|
2160 |
"in_features": 4096,
|
@@ -2178,7 +2164,7 @@
|
|
2178 |
"model.layers.2.self_attn.q_proj": {
|
2179 |
"bias": false,
|
2180 |
"enable_norm": true,
|
2181 |
-
"enable_perm":
|
2182 |
"group_num": 1,
|
2183 |
"group_size": 4096,
|
2184 |
"in_features": 4096,
|
@@ -2202,7 +2188,7 @@
|
|
2202 |
"model.layers.2.self_attn.v_proj": {
|
2203 |
"bias": false,
|
2204 |
"enable_norm": true,
|
2205 |
-
"enable_perm":
|
2206 |
"group_num": 1,
|
2207 |
"group_size": 4096,
|
2208 |
"in_features": 4096,
|
@@ -2226,7 +2212,7 @@
|
|
2226 |
"model.layers.20.mlp.down_proj": {
|
2227 |
"bias": false,
|
2228 |
"enable_norm": true,
|
2229 |
-
"enable_perm":
|
2230 |
"group_num": 1,
|
2231 |
"group_size": 14336,
|
2232 |
"in_features": 14336,
|
@@ -2250,7 +2236,7 @@
|
|
2250 |
"model.layers.20.mlp.gate_proj": {
|
2251 |
"bias": false,
|
2252 |
"enable_norm": true,
|
2253 |
-
"enable_perm":
|
2254 |
"group_num": 1,
|
2255 |
"group_size": 4096,
|
2256 |
"in_features": 4096,
|
@@ -2274,7 +2260,7 @@
|
|
2274 |
"model.layers.20.mlp.up_proj": {
|
2275 |
"bias": false,
|
2276 |
"enable_norm": true,
|
2277 |
-
"enable_perm":
|
2278 |
"group_num": 1,
|
2279 |
"group_size": 4096,
|
2280 |
"in_features": 4096,
|
@@ -2298,7 +2284,7 @@
|
|
2298 |
"model.layers.20.self_attn.k_proj": {
|
2299 |
"bias": false,
|
2300 |
"enable_norm": true,
|
2301 |
-
"enable_perm":
|
2302 |
"group_num": 1,
|
2303 |
"group_size": 4096,
|
2304 |
"in_features": 4096,
|
@@ -2322,7 +2308,7 @@
|
|
2322 |
"model.layers.20.self_attn.o_proj": {
|
2323 |
"bias": false,
|
2324 |
"enable_norm": true,
|
2325 |
-
"enable_perm":
|
2326 |
"group_num": 1,
|
2327 |
"group_size": 4096,
|
2328 |
"in_features": 4096,
|
@@ -2346,7 +2332,7 @@
|
|
2346 |
"model.layers.20.self_attn.q_proj": {
|
2347 |
"bias": false,
|
2348 |
"enable_norm": true,
|
2349 |
-
"enable_perm":
|
2350 |
"group_num": 1,
|
2351 |
"group_size": 4096,
|
2352 |
"in_features": 4096,
|
@@ -2370,7 +2356,7 @@
|
|
2370 |
"model.layers.20.self_attn.v_proj": {
|
2371 |
"bias": false,
|
2372 |
"enable_norm": true,
|
2373 |
-
"enable_perm":
|
2374 |
"group_num": 1,
|
2375 |
"group_size": 4096,
|
2376 |
"in_features": 4096,
|
@@ -2394,7 +2380,7 @@
|
|
2394 |
"model.layers.21.mlp.down_proj": {
|
2395 |
"bias": false,
|
2396 |
"enable_norm": true,
|
2397 |
-
"enable_perm":
|
2398 |
"group_num": 1,
|
2399 |
"group_size": 14336,
|
2400 |
"in_features": 14336,
|
@@ -2418,7 +2404,7 @@
|
|
2418 |
"model.layers.21.mlp.gate_proj": {
|
2419 |
"bias": false,
|
2420 |
"enable_norm": true,
|
2421 |
-
"enable_perm":
|
2422 |
"group_num": 1,
|
2423 |
"group_size": 4096,
|
2424 |
"in_features": 4096,
|
@@ -2442,7 +2428,7 @@
|
|
2442 |
"model.layers.21.mlp.up_proj": {
|
2443 |
"bias": false,
|
2444 |
"enable_norm": true,
|
2445 |
-
"enable_perm":
|
2446 |
"group_num": 1,
|
2447 |
"group_size": 4096,
|
2448 |
"in_features": 4096,
|
@@ -2466,7 +2452,7 @@
|
|
2466 |
"model.layers.21.self_attn.k_proj": {
|
2467 |
"bias": false,
|
2468 |
"enable_norm": true,
|
2469 |
-
"enable_perm":
|
2470 |
"group_num": 1,
|
2471 |
"group_size": 4096,
|
2472 |
"in_features": 4096,
|
@@ -2490,7 +2476,7 @@
|
|
2490 |
"model.layers.21.self_attn.o_proj": {
|
2491 |
"bias": false,
|
2492 |
"enable_norm": true,
|
2493 |
-
"enable_perm":
|
2494 |
"group_num": 1,
|
2495 |
"group_size": 4096,
|
2496 |
"in_features": 4096,
|
@@ -2514,7 +2500,7 @@
|
|
2514 |
"model.layers.21.self_attn.q_proj": {
|
2515 |
"bias": false,
|
2516 |
"enable_norm": true,
|
2517 |
-
"enable_perm":
|
2518 |
"group_num": 1,
|
2519 |
"group_size": 4096,
|
2520 |
"in_features": 4096,
|
@@ -2538,7 +2524,7 @@
|
|
2538 |
"model.layers.21.self_attn.v_proj": {
|
2539 |
"bias": false,
|
2540 |
"enable_norm": true,
|
2541 |
-
"enable_perm":
|
2542 |
"group_num": 1,
|
2543 |
"group_size": 4096,
|
2544 |
"in_features": 4096,
|
@@ -2562,7 +2548,7 @@
|
|
2562 |
"model.layers.22.mlp.down_proj": {
|
2563 |
"bias": false,
|
2564 |
"enable_norm": true,
|
2565 |
-
"enable_perm":
|
2566 |
"group_num": 1,
|
2567 |
"group_size": 14336,
|
2568 |
"in_features": 14336,
|
@@ -2586,7 +2572,7 @@
|
|
2586 |
"model.layers.22.mlp.gate_proj": {
|
2587 |
"bias": false,
|
2588 |
"enable_norm": true,
|
2589 |
-
"enable_perm":
|
2590 |
"group_num": 1,
|
2591 |
"group_size": 4096,
|
2592 |
"in_features": 4096,
|
@@ -2610,7 +2596,7 @@
|
|
2610 |
"model.layers.22.mlp.up_proj": {
|
2611 |
"bias": false,
|
2612 |
"enable_norm": true,
|
2613 |
-
"enable_perm":
|
2614 |
"group_num": 1,
|
2615 |
"group_size": 4096,
|
2616 |
"in_features": 4096,
|
@@ -2634,7 +2620,7 @@
|
|
2634 |
"model.layers.22.self_attn.k_proj": {
|
2635 |
"bias": false,
|
2636 |
"enable_norm": true,
|
2637 |
-
"enable_perm":
|
2638 |
"group_num": 1,
|
2639 |
"group_size": 4096,
|
2640 |
"in_features": 4096,
|
@@ -2658,7 +2644,7 @@
|
|
2658 |
"model.layers.22.self_attn.o_proj": {
|
2659 |
"bias": false,
|
2660 |
"enable_norm": true,
|
2661 |
-
"enable_perm":
|
2662 |
"group_num": 1,
|
2663 |
"group_size": 4096,
|
2664 |
"in_features": 4096,
|
@@ -2682,7 +2668,7 @@
|
|
2682 |
"model.layers.22.self_attn.q_proj": {
|
2683 |
"bias": false,
|
2684 |
"enable_norm": true,
|
2685 |
-
"enable_perm":
|
2686 |
"group_num": 1,
|
2687 |
"group_size": 4096,
|
2688 |
"in_features": 4096,
|
@@ -2706,7 +2692,7 @@
|
|
2706 |
"model.layers.22.self_attn.v_proj": {
|
2707 |
"bias": false,
|
2708 |
"enable_norm": true,
|
2709 |
-
"enable_perm":
|
2710 |
"group_num": 1,
|
2711 |
"group_size": 4096,
|
2712 |
"in_features": 4096,
|
@@ -2730,7 +2716,7 @@
|
|
2730 |
"model.layers.23.mlp.down_proj": {
|
2731 |
"bias": false,
|
2732 |
"enable_norm": true,
|
2733 |
-
"enable_perm":
|
2734 |
"group_num": 1,
|
2735 |
"group_size": 14336,
|
2736 |
"in_features": 14336,
|
@@ -2754,7 +2740,7 @@
|
|
2754 |
"model.layers.23.mlp.gate_proj": {
|
2755 |
"bias": false,
|
2756 |
"enable_norm": true,
|
2757 |
-
"enable_perm":
|
2758 |
"group_num": 1,
|
2759 |
"group_size": 4096,
|
2760 |
"in_features": 4096,
|
@@ -2778,7 +2764,7 @@
|
|
2778 |
"model.layers.23.mlp.up_proj": {
|
2779 |
"bias": false,
|
2780 |
"enable_norm": true,
|
2781 |
-
"enable_perm":
|
2782 |
"group_num": 1,
|
2783 |
"group_size": 4096,
|
2784 |
"in_features": 4096,
|
@@ -2802,7 +2788,7 @@
|
|
2802 |
"model.layers.23.self_attn.k_proj": {
|
2803 |
"bias": false,
|
2804 |
"enable_norm": true,
|
2805 |
-
"enable_perm":
|
2806 |
"group_num": 1,
|
2807 |
"group_size": 4096,
|
2808 |
"in_features": 4096,
|
@@ -2826,7 +2812,7 @@
|
|
2826 |
"model.layers.23.self_attn.o_proj": {
|
2827 |
"bias": false,
|
2828 |
"enable_norm": true,
|
2829 |
-
"enable_perm":
|
2830 |
"group_num": 1,
|
2831 |
"group_size": 4096,
|
2832 |
"in_features": 4096,
|
@@ -2850,7 +2836,7 @@
|
|
2850 |
"model.layers.23.self_attn.q_proj": {
|
2851 |
"bias": false,
|
2852 |
"enable_norm": true,
|
2853 |
-
"enable_perm":
|
2854 |
"group_num": 1,
|
2855 |
"group_size": 4096,
|
2856 |
"in_features": 4096,
|
@@ -2874,7 +2860,7 @@
|
|
2874 |
"model.layers.23.self_attn.v_proj": {
|
2875 |
"bias": false,
|
2876 |
"enable_norm": true,
|
2877 |
-
"enable_perm":
|
2878 |
"group_num": 1,
|
2879 |
"group_size": 4096,
|
2880 |
"in_features": 4096,
|
@@ -2898,7 +2884,7 @@
|
|
2898 |
"model.layers.24.mlp.down_proj": {
|
2899 |
"bias": false,
|
2900 |
"enable_norm": true,
|
2901 |
-
"enable_perm":
|
2902 |
"group_num": 1,
|
2903 |
"group_size": 14336,
|
2904 |
"in_features": 14336,
|
@@ -2922,7 +2908,7 @@
|
|
2922 |
"model.layers.24.mlp.gate_proj": {
|
2923 |
"bias": false,
|
2924 |
"enable_norm": true,
|
2925 |
-
"enable_perm":
|
2926 |
"group_num": 1,
|
2927 |
"group_size": 4096,
|
2928 |
"in_features": 4096,
|
@@ -2946,7 +2932,7 @@
|
|
2946 |
"model.layers.24.mlp.up_proj": {
|
2947 |
"bias": false,
|
2948 |
"enable_norm": true,
|
2949 |
-
"enable_perm":
|
2950 |
"group_num": 1,
|
2951 |
"group_size": 4096,
|
2952 |
"in_features": 4096,
|
@@ -2970,7 +2956,7 @@
|
|
2970 |
"model.layers.24.self_attn.k_proj": {
|
2971 |
"bias": false,
|
2972 |
"enable_norm": true,
|
2973 |
-
"enable_perm":
|
2974 |
"group_num": 1,
|
2975 |
"group_size": 4096,
|
2976 |
"in_features": 4096,
|
@@ -2994,7 +2980,7 @@
|
|
2994 |
"model.layers.24.self_attn.o_proj": {
|
2995 |
"bias": false,
|
2996 |
"enable_norm": true,
|
2997 |
-
"enable_perm":
|
2998 |
"group_num": 1,
|
2999 |
"group_size": 4096,
|
3000 |
"in_features": 4096,
|
@@ -3018,7 +3004,7 @@
|
|
3018 |
"model.layers.24.self_attn.q_proj": {
|
3019 |
"bias": false,
|
3020 |
"enable_norm": true,
|
3021 |
-
"enable_perm":
|
3022 |
"group_num": 1,
|
3023 |
"group_size": 4096,
|
3024 |
"in_features": 4096,
|
@@ -3042,7 +3028,7 @@
|
|
3042 |
"model.layers.24.self_attn.v_proj": {
|
3043 |
"bias": false,
|
3044 |
"enable_norm": true,
|
3045 |
-
"enable_perm":
|
3046 |
"group_num": 1,
|
3047 |
"group_size": 4096,
|
3048 |
"in_features": 4096,
|
@@ -3066,7 +3052,7 @@
|
|
3066 |
"model.layers.25.mlp.down_proj": {
|
3067 |
"bias": false,
|
3068 |
"enable_norm": true,
|
3069 |
-
"enable_perm":
|
3070 |
"group_num": 1,
|
3071 |
"group_size": 14336,
|
3072 |
"in_features": 14336,
|
@@ -3090,7 +3076,7 @@
|
|
3090 |
"model.layers.25.mlp.gate_proj": {
|
3091 |
"bias": false,
|
3092 |
"enable_norm": true,
|
3093 |
-
"enable_perm":
|
3094 |
"group_num": 1,
|
3095 |
"group_size": 4096,
|
3096 |
"in_features": 4096,
|
@@ -3114,7 +3100,7 @@
|
|
3114 |
"model.layers.25.mlp.up_proj": {
|
3115 |
"bias": false,
|
3116 |
"enable_norm": true,
|
3117 |
-
"enable_perm":
|
3118 |
"group_num": 1,
|
3119 |
"group_size": 4096,
|
3120 |
"in_features": 4096,
|
@@ -3138,7 +3124,7 @@
|
|
3138 |
"model.layers.25.self_attn.k_proj": {
|
3139 |
"bias": false,
|
3140 |
"enable_norm": true,
|
3141 |
-
"enable_perm":
|
3142 |
"group_num": 1,
|
3143 |
"group_size": 4096,
|
3144 |
"in_features": 4096,
|
@@ -3162,7 +3148,7 @@
|
|
3162 |
"model.layers.25.self_attn.o_proj": {
|
3163 |
"bias": false,
|
3164 |
"enable_norm": true,
|
3165 |
-
"enable_perm":
|
3166 |
"group_num": 1,
|
3167 |
"group_size": 4096,
|
3168 |
"in_features": 4096,
|
@@ -3186,7 +3172,7 @@
|
|
3186 |
"model.layers.25.self_attn.q_proj": {
|
3187 |
"bias": false,
|
3188 |
"enable_norm": true,
|
3189 |
-
"enable_perm":
|
3190 |
"group_num": 1,
|
3191 |
"group_size": 4096,
|
3192 |
"in_features": 4096,
|
@@ -3210,7 +3196,7 @@
|
|
3210 |
"model.layers.25.self_attn.v_proj": {
|
3211 |
"bias": false,
|
3212 |
"enable_norm": true,
|
3213 |
-
"enable_perm":
|
3214 |
"group_num": 1,
|
3215 |
"group_size": 4096,
|
3216 |
"in_features": 4096,
|
@@ -3234,7 +3220,7 @@
|
|
3234 |
"model.layers.26.mlp.down_proj": {
|
3235 |
"bias": false,
|
3236 |
"enable_norm": true,
|
3237 |
-
"enable_perm":
|
3238 |
"group_num": 1,
|
3239 |
"group_size": 14336,
|
3240 |
"in_features": 14336,
|
@@ -3258,7 +3244,7 @@
|
|
3258 |
"model.layers.26.mlp.gate_proj": {
|
3259 |
"bias": false,
|
3260 |
"enable_norm": true,
|
3261 |
-
"enable_perm":
|
3262 |
"group_num": 1,
|
3263 |
"group_size": 4096,
|
3264 |
"in_features": 4096,
|
@@ -3282,7 +3268,7 @@
|
|
3282 |
"model.layers.26.mlp.up_proj": {
|
3283 |
"bias": false,
|
3284 |
"enable_norm": true,
|
3285 |
-
"enable_perm":
|
3286 |
"group_num": 1,
|
3287 |
"group_size": 4096,
|
3288 |
"in_features": 4096,
|
@@ -3306,7 +3292,7 @@
|
|
3306 |
"model.layers.26.self_attn.k_proj": {
|
3307 |
"bias": false,
|
3308 |
"enable_norm": true,
|
3309 |
-
"enable_perm":
|
3310 |
"group_num": 1,
|
3311 |
"group_size": 4096,
|
3312 |
"in_features": 4096,
|
@@ -3330,7 +3316,7 @@
|
|
3330 |
"model.layers.26.self_attn.o_proj": {
|
3331 |
"bias": false,
|
3332 |
"enable_norm": true,
|
3333 |
-
"enable_perm":
|
3334 |
"group_num": 1,
|
3335 |
"group_size": 4096,
|
3336 |
"in_features": 4096,
|
@@ -3354,7 +3340,7 @@
|
|
3354 |
"model.layers.26.self_attn.q_proj": {
|
3355 |
"bias": false,
|
3356 |
"enable_norm": true,
|
3357 |
-
"enable_perm":
|
3358 |
"group_num": 1,
|
3359 |
"group_size": 4096,
|
3360 |
"in_features": 4096,
|
@@ -3378,7 +3364,7 @@
|
|
3378 |
"model.layers.26.self_attn.v_proj": {
|
3379 |
"bias": false,
|
3380 |
"enable_norm": true,
|
3381 |
-
"enable_perm":
|
3382 |
"group_num": 1,
|
3383 |
"group_size": 4096,
|
3384 |
"in_features": 4096,
|
@@ -3402,7 +3388,7 @@
|
|
3402 |
"model.layers.27.mlp.down_proj": {
|
3403 |
"bias": false,
|
3404 |
"enable_norm": true,
|
3405 |
-
"enable_perm":
|
3406 |
"group_num": 1,
|
3407 |
"group_size": 14336,
|
3408 |
"in_features": 14336,
|
@@ -3426,7 +3412,7 @@
|
|
3426 |
"model.layers.27.mlp.gate_proj": {
|
3427 |
"bias": false,
|
3428 |
"enable_norm": true,
|
3429 |
-
"enable_perm":
|
3430 |
"group_num": 1,
|
3431 |
"group_size": 4096,
|
3432 |
"in_features": 4096,
|
@@ -3450,7 +3436,7 @@
|
|
3450 |
"model.layers.27.mlp.up_proj": {
|
3451 |
"bias": false,
|
3452 |
"enable_norm": true,
|
3453 |
-
"enable_perm":
|
3454 |
"group_num": 1,
|
3455 |
"group_size": 4096,
|
3456 |
"in_features": 4096,
|
@@ -3474,7 +3460,7 @@
|
|
3474 |
"model.layers.27.self_attn.k_proj": {
|
3475 |
"bias": false,
|
3476 |
"enable_norm": true,
|
3477 |
-
"enable_perm":
|
3478 |
"group_num": 1,
|
3479 |
"group_size": 4096,
|
3480 |
"in_features": 4096,
|
@@ -3498,7 +3484,7 @@
|
|
3498 |
"model.layers.27.self_attn.o_proj": {
|
3499 |
"bias": false,
|
3500 |
"enable_norm": true,
|
3501 |
-
"enable_perm":
|
3502 |
"group_num": 1,
|
3503 |
"group_size": 4096,
|
3504 |
"in_features": 4096,
|
@@ -3522,7 +3508,7 @@
|
|
3522 |
"model.layers.27.self_attn.q_proj": {
|
3523 |
"bias": false,
|
3524 |
"enable_norm": true,
|
3525 |
-
"enable_perm":
|
3526 |
"group_num": 1,
|
3527 |
"group_size": 4096,
|
3528 |
"in_features": 4096,
|
@@ -3546,7 +3532,7 @@
|
|
3546 |
"model.layers.27.self_attn.v_proj": {
|
3547 |
"bias": false,
|
3548 |
"enable_norm": true,
|
3549 |
-
"enable_perm":
|
3550 |
"group_num": 1,
|
3551 |
"group_size": 4096,
|
3552 |
"in_features": 4096,
|
@@ -3570,7 +3556,7 @@
|
|
3570 |
"model.layers.28.mlp.down_proj": {
|
3571 |
"bias": false,
|
3572 |
"enable_norm": true,
|
3573 |
-
"enable_perm":
|
3574 |
"group_num": 1,
|
3575 |
"group_size": 14336,
|
3576 |
"in_features": 14336,
|
@@ -3594,7 +3580,7 @@
|
|
3594 |
"model.layers.28.mlp.gate_proj": {
|
3595 |
"bias": false,
|
3596 |
"enable_norm": true,
|
3597 |
-
"enable_perm":
|
3598 |
"group_num": 1,
|
3599 |
"group_size": 4096,
|
3600 |
"in_features": 4096,
|
@@ -3618,7 +3604,7 @@
|
|
3618 |
"model.layers.28.mlp.up_proj": {
|
3619 |
"bias": false,
|
3620 |
"enable_norm": true,
|
3621 |
-
"enable_perm":
|
3622 |
"group_num": 1,
|
3623 |
"group_size": 4096,
|
3624 |
"in_features": 4096,
|
@@ -3642,7 +3628,7 @@
|
|
3642 |
"model.layers.28.self_attn.k_proj": {
|
3643 |
"bias": false,
|
3644 |
"enable_norm": true,
|
3645 |
-
"enable_perm":
|
3646 |
"group_num": 1,
|
3647 |
"group_size": 4096,
|
3648 |
"in_features": 4096,
|
@@ -3666,7 +3652,7 @@
|
|
3666 |
"model.layers.28.self_attn.o_proj": {
|
3667 |
"bias": false,
|
3668 |
"enable_norm": true,
|
3669 |
-
"enable_perm":
|
3670 |
"group_num": 1,
|
3671 |
"group_size": 4096,
|
3672 |
"in_features": 4096,
|
@@ -3690,7 +3676,7 @@
|
|
3690 |
"model.layers.28.self_attn.q_proj": {
|
3691 |
"bias": false,
|
3692 |
"enable_norm": true,
|
3693 |
-
"enable_perm":
|
3694 |
"group_num": 1,
|
3695 |
"group_size": 4096,
|
3696 |
"in_features": 4096,
|
@@ -3714,7 +3700,7 @@
|
|
3714 |
"model.layers.28.self_attn.v_proj": {
|
3715 |
"bias": false,
|
3716 |
"enable_norm": true,
|
3717 |
-
"enable_perm":
|
3718 |
"group_num": 1,
|
3719 |
"group_size": 4096,
|
3720 |
"in_features": 4096,
|
@@ -3738,7 +3724,7 @@
|
|
3738 |
"model.layers.29.mlp.down_proj": {
|
3739 |
"bias": false,
|
3740 |
"enable_norm": true,
|
3741 |
-
"enable_perm":
|
3742 |
"group_num": 1,
|
3743 |
"group_size": 14336,
|
3744 |
"in_features": 14336,
|
@@ -3762,7 +3748,7 @@
|
|
3762 |
"model.layers.29.mlp.gate_proj": {
|
3763 |
"bias": false,
|
3764 |
"enable_norm": true,
|
3765 |
-
"enable_perm":
|
3766 |
"group_num": 1,
|
3767 |
"group_size": 4096,
|
3768 |
"in_features": 4096,
|
@@ -3786,7 +3772,7 @@
|
|
3786 |
"model.layers.29.mlp.up_proj": {
|
3787 |
"bias": false,
|
3788 |
"enable_norm": true,
|
3789 |
-
"enable_perm":
|
3790 |
"group_num": 1,
|
3791 |
"group_size": 4096,
|
3792 |
"in_features": 4096,
|
@@ -3810,7 +3796,7 @@
|
|
3810 |
"model.layers.29.self_attn.k_proj": {
|
3811 |
"bias": false,
|
3812 |
"enable_norm": true,
|
3813 |
-
"enable_perm":
|
3814 |
"group_num": 1,
|
3815 |
"group_size": 4096,
|
3816 |
"in_features": 4096,
|
@@ -3834,7 +3820,7 @@
|
|
3834 |
"model.layers.29.self_attn.o_proj": {
|
3835 |
"bias": false,
|
3836 |
"enable_norm": true,
|
3837 |
-
"enable_perm":
|
3838 |
"group_num": 1,
|
3839 |
"group_size": 4096,
|
3840 |
"in_features": 4096,
|
@@ -3858,7 +3844,7 @@
|
|
3858 |
"model.layers.29.self_attn.q_proj": {
|
3859 |
"bias": false,
|
3860 |
"enable_norm": true,
|
3861 |
-
"enable_perm":
|
3862 |
"group_num": 1,
|
3863 |
"group_size": 4096,
|
3864 |
"in_features": 4096,
|
@@ -3882,7 +3868,7 @@
|
|
3882 |
"model.layers.29.self_attn.v_proj": {
|
3883 |
"bias": false,
|
3884 |
"enable_norm": true,
|
3885 |
-
"enable_perm":
|
3886 |
"group_num": 1,
|
3887 |
"group_size": 4096,
|
3888 |
"in_features": 4096,
|
@@ -3906,7 +3892,7 @@
|
|
3906 |
"model.layers.3.mlp.down_proj": {
|
3907 |
"bias": false,
|
3908 |
"enable_norm": true,
|
3909 |
-
"enable_perm":
|
3910 |
"group_num": 1,
|
3911 |
"group_size": 14336,
|
3912 |
"in_features": 14336,
|
@@ -3930,7 +3916,7 @@
|
|
3930 |
"model.layers.3.mlp.gate_proj": {
|
3931 |
"bias": false,
|
3932 |
"enable_norm": true,
|
3933 |
-
"enable_perm":
|
3934 |
"group_num": 1,
|
3935 |
"group_size": 4096,
|
3936 |
"in_features": 4096,
|
@@ -3954,7 +3940,7 @@
|
|
3954 |
"model.layers.3.mlp.up_proj": {
|
3955 |
"bias": false,
|
3956 |
"enable_norm": true,
|
3957 |
-
"enable_perm":
|
3958 |
"group_num": 1,
|
3959 |
"group_size": 4096,
|
3960 |
"in_features": 4096,
|
@@ -3978,7 +3964,7 @@
|
|
3978 |
"model.layers.3.self_attn.k_proj": {
|
3979 |
"bias": false,
|
3980 |
"enable_norm": true,
|
3981 |
-
"enable_perm":
|
3982 |
"group_num": 1,
|
3983 |
"group_size": 4096,
|
3984 |
"in_features": 4096,
|
@@ -4002,7 +3988,7 @@
|
|
4002 |
"model.layers.3.self_attn.o_proj": {
|
4003 |
"bias": false,
|
4004 |
"enable_norm": true,
|
4005 |
-
"enable_perm":
|
4006 |
"group_num": 1,
|
4007 |
"group_size": 4096,
|
4008 |
"in_features": 4096,
|
@@ -4026,7 +4012,7 @@
|
|
4026 |
"model.layers.3.self_attn.q_proj": {
|
4027 |
"bias": false,
|
4028 |
"enable_norm": true,
|
4029 |
-
"enable_perm":
|
4030 |
"group_num": 1,
|
4031 |
"group_size": 4096,
|
4032 |
"in_features": 4096,
|
@@ -4050,7 +4036,7 @@
|
|
4050 |
"model.layers.3.self_attn.v_proj": {
|
4051 |
"bias": false,
|
4052 |
"enable_norm": true,
|
4053 |
-
"enable_perm":
|
4054 |
"group_num": 1,
|
4055 |
"group_size": 4096,
|
4056 |
"in_features": 4096,
|
@@ -4074,7 +4060,7 @@
|
|
4074 |
"model.layers.30.mlp.down_proj": {
|
4075 |
"bias": false,
|
4076 |
"enable_norm": true,
|
4077 |
-
"enable_perm":
|
4078 |
"group_num": 1,
|
4079 |
"group_size": 14336,
|
4080 |
"in_features": 14336,
|
@@ -4098,7 +4084,7 @@
|
|
4098 |
"model.layers.30.mlp.gate_proj": {
|
4099 |
"bias": false,
|
4100 |
"enable_norm": true,
|
4101 |
-
"enable_perm":
|
4102 |
"group_num": 1,
|
4103 |
"group_size": 4096,
|
4104 |
"in_features": 4096,
|
@@ -4122,7 +4108,7 @@
|
|
4122 |
"model.layers.30.mlp.up_proj": {
|
4123 |
"bias": false,
|
4124 |
"enable_norm": true,
|
4125 |
-
"enable_perm":
|
4126 |
"group_num": 1,
|
4127 |
"group_size": 4096,
|
4128 |
"in_features": 4096,
|
@@ -4146,7 +4132,7 @@
|
|
4146 |
"model.layers.30.self_attn.k_proj": {
|
4147 |
"bias": false,
|
4148 |
"enable_norm": true,
|
4149 |
-
"enable_perm":
|
4150 |
"group_num": 1,
|
4151 |
"group_size": 4096,
|
4152 |
"in_features": 4096,
|
@@ -4170,7 +4156,7 @@
|
|
4170 |
"model.layers.30.self_attn.o_proj": {
|
4171 |
"bias": false,
|
4172 |
"enable_norm": true,
|
4173 |
-
"enable_perm":
|
4174 |
"group_num": 1,
|
4175 |
"group_size": 4096,
|
4176 |
"in_features": 4096,
|
@@ -4194,7 +4180,7 @@
|
|
4194 |
"model.layers.30.self_attn.q_proj": {
|
4195 |
"bias": false,
|
4196 |
"enable_norm": true,
|
4197 |
-
"enable_perm":
|
4198 |
"group_num": 1,
|
4199 |
"group_size": 4096,
|
4200 |
"in_features": 4096,
|
@@ -4218,7 +4204,7 @@
|
|
4218 |
"model.layers.30.self_attn.v_proj": {
|
4219 |
"bias": false,
|
4220 |
"enable_norm": true,
|
4221 |
-
"enable_perm":
|
4222 |
"group_num": 1,
|
4223 |
"group_size": 4096,
|
4224 |
"in_features": 4096,
|
@@ -4242,7 +4228,7 @@
|
|
4242 |
"model.layers.31.mlp.down_proj": {
|
4243 |
"bias": false,
|
4244 |
"enable_norm": true,
|
4245 |
-
"enable_perm":
|
4246 |
"group_num": 1,
|
4247 |
"group_size": 14336,
|
4248 |
"in_features": 14336,
|
@@ -4266,7 +4252,7 @@
|
|
4266 |
"model.layers.31.mlp.gate_proj": {
|
4267 |
"bias": false,
|
4268 |
"enable_norm": true,
|
4269 |
-
"enable_perm":
|
4270 |
"group_num": 1,
|
4271 |
"group_size": 4096,
|
4272 |
"in_features": 4096,
|
@@ -4290,7 +4276,7 @@
|
|
4290 |
"model.layers.31.mlp.up_proj": {
|
4291 |
"bias": false,
|
4292 |
"enable_norm": true,
|
4293 |
-
"enable_perm":
|
4294 |
"group_num": 1,
|
4295 |
"group_size": 4096,
|
4296 |
"in_features": 4096,
|
@@ -4314,7 +4300,7 @@
|
|
4314 |
"model.layers.31.self_attn.k_proj": {
|
4315 |
"bias": false,
|
4316 |
"enable_norm": true,
|
4317 |
-
"enable_perm":
|
4318 |
"group_num": 1,
|
4319 |
"group_size": 4096,
|
4320 |
"in_features": 4096,
|
@@ -4338,7 +4324,7 @@
|
|
4338 |
"model.layers.31.self_attn.o_proj": {
|
4339 |
"bias": false,
|
4340 |
"enable_norm": true,
|
4341 |
-
"enable_perm":
|
4342 |
"group_num": 1,
|
4343 |
"group_size": 4096,
|
4344 |
"in_features": 4096,
|
@@ -4362,7 +4348,7 @@
|
|
4362 |
"model.layers.31.self_attn.q_proj": {
|
4363 |
"bias": false,
|
4364 |
"enable_norm": true,
|
4365 |
-
"enable_perm":
|
4366 |
"group_num": 1,
|
4367 |
"group_size": 4096,
|
4368 |
"in_features": 4096,
|
@@ -4386,7 +4372,7 @@
|
|
4386 |
"model.layers.31.self_attn.v_proj": {
|
4387 |
"bias": false,
|
4388 |
"enable_norm": true,
|
4389 |
-
"enable_perm":
|
4390 |
"group_num": 1,
|
4391 |
"group_size": 4096,
|
4392 |
"in_features": 4096,
|
@@ -4410,7 +4396,7 @@
|
|
4410 |
"model.layers.4.mlp.down_proj": {
|
4411 |
"bias": false,
|
4412 |
"enable_norm": true,
|
4413 |
-
"enable_perm":
|
4414 |
"group_num": 1,
|
4415 |
"group_size": 14336,
|
4416 |
"in_features": 14336,
|
@@ -4434,7 +4420,7 @@
|
|
4434 |
"model.layers.4.mlp.gate_proj": {
|
4435 |
"bias": false,
|
4436 |
"enable_norm": true,
|
4437 |
-
"enable_perm":
|
4438 |
"group_num": 1,
|
4439 |
"group_size": 4096,
|
4440 |
"in_features": 4096,
|
@@ -4458,7 +4444,7 @@
|
|
4458 |
"model.layers.4.mlp.up_proj": {
|
4459 |
"bias": false,
|
4460 |
"enable_norm": true,
|
4461 |
-
"enable_perm":
|
4462 |
"group_num": 1,
|
4463 |
"group_size": 4096,
|
4464 |
"in_features": 4096,
|
@@ -4482,7 +4468,7 @@
|
|
4482 |
"model.layers.4.self_attn.k_proj": {
|
4483 |
"bias": false,
|
4484 |
"enable_norm": true,
|
4485 |
-
"enable_perm":
|
4486 |
"group_num": 1,
|
4487 |
"group_size": 4096,
|
4488 |
"in_features": 4096,
|
@@ -4506,7 +4492,7 @@
|
|
4506 |
"model.layers.4.self_attn.o_proj": {
|
4507 |
"bias": false,
|
4508 |
"enable_norm": true,
|
4509 |
-
"enable_perm":
|
4510 |
"group_num": 1,
|
4511 |
"group_size": 4096,
|
4512 |
"in_features": 4096,
|
@@ -4530,7 +4516,7 @@
|
|
4530 |
"model.layers.4.self_attn.q_proj": {
|
4531 |
"bias": false,
|
4532 |
"enable_norm": true,
|
4533 |
-
"enable_perm":
|
4534 |
"group_num": 1,
|
4535 |
"group_size": 4096,
|
4536 |
"in_features": 4096,
|
@@ -4554,7 +4540,7 @@
|
|
4554 |
"model.layers.4.self_attn.v_proj": {
|
4555 |
"bias": false,
|
4556 |
"enable_norm": true,
|
4557 |
-
"enable_perm":
|
4558 |
"group_num": 1,
|
4559 |
"group_size": 4096,
|
4560 |
"in_features": 4096,
|
@@ -4578,7 +4564,7 @@
|
|
4578 |
"model.layers.5.mlp.down_proj": {
|
4579 |
"bias": false,
|
4580 |
"enable_norm": true,
|
4581 |
-
"enable_perm":
|
4582 |
"group_num": 1,
|
4583 |
"group_size": 14336,
|
4584 |
"in_features": 14336,
|
@@ -4602,7 +4588,7 @@
|
|
4602 |
"model.layers.5.mlp.gate_proj": {
|
4603 |
"bias": false,
|
4604 |
"enable_norm": true,
|
4605 |
-
"enable_perm":
|
4606 |
"group_num": 1,
|
4607 |
"group_size": 4096,
|
4608 |
"in_features": 4096,
|
@@ -4626,7 +4612,7 @@
|
|
4626 |
"model.layers.5.mlp.up_proj": {
|
4627 |
"bias": false,
|
4628 |
"enable_norm": true,
|
4629 |
-
"enable_perm":
|
4630 |
"group_num": 1,
|
4631 |
"group_size": 4096,
|
4632 |
"in_features": 4096,
|
@@ -4650,7 +4636,7 @@
|
|
4650 |
"model.layers.5.self_attn.k_proj": {
|
4651 |
"bias": false,
|
4652 |
"enable_norm": true,
|
4653 |
-
"enable_perm":
|
4654 |
"group_num": 1,
|
4655 |
"group_size": 4096,
|
4656 |
"in_features": 4096,
|
@@ -4674,7 +4660,7 @@
|
|
4674 |
"model.layers.5.self_attn.o_proj": {
|
4675 |
"bias": false,
|
4676 |
"enable_norm": true,
|
4677 |
-
"enable_perm":
|
4678 |
"group_num": 1,
|
4679 |
"group_size": 4096,
|
4680 |
"in_features": 4096,
|
@@ -4698,7 +4684,7 @@
|
|
4698 |
"model.layers.5.self_attn.q_proj": {
|
4699 |
"bias": false,
|
4700 |
"enable_norm": true,
|
4701 |
-
"enable_perm":
|
4702 |
"group_num": 1,
|
4703 |
"group_size": 4096,
|
4704 |
"in_features": 4096,
|
@@ -4722,7 +4708,7 @@
|
|
4722 |
"model.layers.5.self_attn.v_proj": {
|
4723 |
"bias": false,
|
4724 |
"enable_norm": true,
|
4725 |
-
"enable_perm":
|
4726 |
"group_num": 1,
|
4727 |
"group_size": 4096,
|
4728 |
"in_features": 4096,
|
@@ -4746,7 +4732,7 @@
|
|
4746 |
"model.layers.6.mlp.down_proj": {
|
4747 |
"bias": false,
|
4748 |
"enable_norm": true,
|
4749 |
-
"enable_perm":
|
4750 |
"group_num": 1,
|
4751 |
"group_size": 14336,
|
4752 |
"in_features": 14336,
|
@@ -4770,7 +4756,7 @@
|
|
4770 |
"model.layers.6.mlp.gate_proj": {
|
4771 |
"bias": false,
|
4772 |
"enable_norm": true,
|
4773 |
-
"enable_perm":
|
4774 |
"group_num": 1,
|
4775 |
"group_size": 4096,
|
4776 |
"in_features": 4096,
|
@@ -4794,7 +4780,7 @@
|
|
4794 |
"model.layers.6.mlp.up_proj": {
|
4795 |
"bias": false,
|
4796 |
"enable_norm": true,
|
4797 |
-
"enable_perm":
|
4798 |
"group_num": 1,
|
4799 |
"group_size": 4096,
|
4800 |
"in_features": 4096,
|
@@ -4818,7 +4804,7 @@
|
|
4818 |
"model.layers.6.self_attn.k_proj": {
|
4819 |
"bias": false,
|
4820 |
"enable_norm": true,
|
4821 |
-
"enable_perm":
|
4822 |
"group_num": 1,
|
4823 |
"group_size": 4096,
|
4824 |
"in_features": 4096,
|
@@ -4842,7 +4828,7 @@
|
|
4842 |
"model.layers.6.self_attn.o_proj": {
|
4843 |
"bias": false,
|
4844 |
"enable_norm": true,
|
4845 |
-
"enable_perm":
|
4846 |
"group_num": 1,
|
4847 |
"group_size": 4096,
|
4848 |
"in_features": 4096,
|
@@ -4866,7 +4852,7 @@
|
|
4866 |
"model.layers.6.self_attn.q_proj": {
|
4867 |
"bias": false,
|
4868 |
"enable_norm": true,
|
4869 |
-
"enable_perm":
|
4870 |
"group_num": 1,
|
4871 |
"group_size": 4096,
|
4872 |
"in_features": 4096,
|
@@ -4890,7 +4876,7 @@
|
|
4890 |
"model.layers.6.self_attn.v_proj": {
|
4891 |
"bias": false,
|
4892 |
"enable_norm": true,
|
4893 |
-
"enable_perm":
|
4894 |
"group_num": 1,
|
4895 |
"group_size": 4096,
|
4896 |
"in_features": 4096,
|
@@ -4914,7 +4900,7 @@
|
|
4914 |
"model.layers.7.mlp.down_proj": {
|
4915 |
"bias": false,
|
4916 |
"enable_norm": true,
|
4917 |
-
"enable_perm":
|
4918 |
"group_num": 1,
|
4919 |
"group_size": 14336,
|
4920 |
"in_features": 14336,
|
@@ -4938,7 +4924,7 @@
|
|
4938 |
"model.layers.7.mlp.gate_proj": {
|
4939 |
"bias": false,
|
4940 |
"enable_norm": true,
|
4941 |
-
"enable_perm":
|
4942 |
"group_num": 1,
|
4943 |
"group_size": 4096,
|
4944 |
"in_features": 4096,
|
@@ -4962,7 +4948,7 @@
|
|
4962 |
"model.layers.7.mlp.up_proj": {
|
4963 |
"bias": false,
|
4964 |
"enable_norm": true,
|
4965 |
-
"enable_perm":
|
4966 |
"group_num": 1,
|
4967 |
"group_size": 4096,
|
4968 |
"in_features": 4096,
|
@@ -4986,7 +4972,7 @@
|
|
4986 |
"model.layers.7.self_attn.k_proj": {
|
4987 |
"bias": false,
|
4988 |
"enable_norm": true,
|
4989 |
-
"enable_perm":
|
4990 |
"group_num": 1,
|
4991 |
"group_size": 4096,
|
4992 |
"in_features": 4096,
|
@@ -5010,7 +4996,7 @@
|
|
5010 |
"model.layers.7.self_attn.o_proj": {
|
5011 |
"bias": false,
|
5012 |
"enable_norm": true,
|
5013 |
-
"enable_perm":
|
5014 |
"group_num": 1,
|
5015 |
"group_size": 4096,
|
5016 |
"in_features": 4096,
|
@@ -5034,7 +5020,7 @@
|
|
5034 |
"model.layers.7.self_attn.q_proj": {
|
5035 |
"bias": false,
|
5036 |
"enable_norm": true,
|
5037 |
-
"enable_perm":
|
5038 |
"group_num": 1,
|
5039 |
"group_size": 4096,
|
5040 |
"in_features": 4096,
|
@@ -5058,7 +5044,7 @@
|
|
5058 |
"model.layers.7.self_attn.v_proj": {
|
5059 |
"bias": false,
|
5060 |
"enable_norm": true,
|
5061 |
-
"enable_perm":
|
5062 |
"group_num": 1,
|
5063 |
"group_size": 4096,
|
5064 |
"in_features": 4096,
|
@@ -5082,7 +5068,7 @@
|
|
5082 |
"model.layers.8.mlp.down_proj": {
|
5083 |
"bias": false,
|
5084 |
"enable_norm": true,
|
5085 |
-
"enable_perm":
|
5086 |
"group_num": 1,
|
5087 |
"group_size": 14336,
|
5088 |
"in_features": 14336,
|
@@ -5106,7 +5092,7 @@
|
|
5106 |
"model.layers.8.mlp.gate_proj": {
|
5107 |
"bias": false,
|
5108 |
"enable_norm": true,
|
5109 |
-
"enable_perm":
|
5110 |
"group_num": 1,
|
5111 |
"group_size": 4096,
|
5112 |
"in_features": 4096,
|
@@ -5130,7 +5116,7 @@
|
|
5130 |
"model.layers.8.mlp.up_proj": {
|
5131 |
"bias": false,
|
5132 |
"enable_norm": true,
|
5133 |
-
"enable_perm":
|
5134 |
"group_num": 1,
|
5135 |
"group_size": 4096,
|
5136 |
"in_features": 4096,
|
@@ -5154,7 +5140,7 @@
|
|
5154 |
"model.layers.8.self_attn.k_proj": {
|
5155 |
"bias": false,
|
5156 |
"enable_norm": true,
|
5157 |
-
"enable_perm":
|
5158 |
"group_num": 1,
|
5159 |
"group_size": 4096,
|
5160 |
"in_features": 4096,
|
@@ -5178,7 +5164,7 @@
|
|
5178 |
"model.layers.8.self_attn.o_proj": {
|
5179 |
"bias": false,
|
5180 |
"enable_norm": true,
|
5181 |
-
"enable_perm":
|
5182 |
"group_num": 1,
|
5183 |
"group_size": 4096,
|
5184 |
"in_features": 4096,
|
@@ -5202,7 +5188,7 @@
|
|
5202 |
"model.layers.8.self_attn.q_proj": {
|
5203 |
"bias": false,
|
5204 |
"enable_norm": true,
|
5205 |
-
"enable_perm":
|
5206 |
"group_num": 1,
|
5207 |
"group_size": 4096,
|
5208 |
"in_features": 4096,
|
@@ -5226,7 +5212,7 @@
|
|
5226 |
"model.layers.8.self_attn.v_proj": {
|
5227 |
"bias": false,
|
5228 |
"enable_norm": true,
|
5229 |
-
"enable_perm":
|
5230 |
"group_num": 1,
|
5231 |
"group_size": 4096,
|
5232 |
"in_features": 4096,
|
@@ -5250,7 +5236,7 @@
|
|
5250 |
"model.layers.9.mlp.down_proj": {
|
5251 |
"bias": false,
|
5252 |
"enable_norm": true,
|
5253 |
-
"enable_perm":
|
5254 |
"group_num": 1,
|
5255 |
"group_size": 14336,
|
5256 |
"in_features": 14336,
|
@@ -5274,7 +5260,7 @@
|
|
5274 |
"model.layers.9.mlp.gate_proj": {
|
5275 |
"bias": false,
|
5276 |
"enable_norm": true,
|
5277 |
-
"enable_perm":
|
5278 |
"group_num": 1,
|
5279 |
"group_size": 4096,
|
5280 |
"in_features": 4096,
|
@@ -5298,7 +5284,7 @@
|
|
5298 |
"model.layers.9.mlp.up_proj": {
|
5299 |
"bias": false,
|
5300 |
"enable_norm": true,
|
5301 |
-
"enable_perm":
|
5302 |
"group_num": 1,
|
5303 |
"group_size": 4096,
|
5304 |
"in_features": 4096,
|
@@ -5322,7 +5308,7 @@
|
|
5322 |
"model.layers.9.self_attn.k_proj": {
|
5323 |
"bias": false,
|
5324 |
"enable_norm": true,
|
5325 |
-
"enable_perm":
|
5326 |
"group_num": 1,
|
5327 |
"group_size": 4096,
|
5328 |
"in_features": 4096,
|
@@ -5346,7 +5332,7 @@
|
|
5346 |
"model.layers.9.self_attn.o_proj": {
|
5347 |
"bias": false,
|
5348 |
"enable_norm": true,
|
5349 |
-
"enable_perm":
|
5350 |
"group_num": 1,
|
5351 |
"group_size": 4096,
|
5352 |
"in_features": 4096,
|
@@ -5370,7 +5356,7 @@
|
|
5370 |
"model.layers.9.self_attn.q_proj": {
|
5371 |
"bias": false,
|
5372 |
"enable_norm": true,
|
5373 |
-
"enable_perm":
|
5374 |
"group_num": 1,
|
5375 |
"group_size": 4096,
|
5376 |
"in_features": 4096,
|
@@ -5394,7 +5380,7 @@
|
|
5394 |
"model.layers.9.self_attn.v_proj": {
|
5395 |
"bias": false,
|
5396 |
"enable_norm": true,
|
5397 |
-
"enable_perm":
|
5398 |
"group_num": 1,
|
5399 |
"group_size": 4096,
|
5400 |
"in_features": 4096,
|
@@ -5415,6 +5401,21 @@
|
|
5415 |
8
|
5416 |
]
|
5417 |
}
|
5418 |
-
}
|
5419 |
-
|
5420 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-4096-woft",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
11 |
128008,
|
12 |
128009
|
13 |
],
|
14 |
+
"head_dim": 128,
|
15 |
"hidden_act": "silu",
|
16 |
"hidden_size": 4096,
|
17 |
"initializer_range": 0.02,
|
|
|
23 |
"num_hidden_layers": 32,
|
24 |
"num_key_value_heads": 8,
|
25 |
"pretraining_tp": 1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"quantization_config": {
|
|
|
27 |
"config_for_layers": {
|
28 |
"model.layers.0.mlp.down_proj": {
|
29 |
"bias": false,
|
30 |
"enable_norm": true,
|
31 |
+
"enable_perm": false,
|
32 |
"group_num": 1,
|
33 |
"group_size": 14336,
|
34 |
"in_features": 14336,
|
|
|
52 |
"model.layers.0.mlp.gate_proj": {
|
53 |
"bias": false,
|
54 |
"enable_norm": true,
|
55 |
+
"enable_perm": false,
|
56 |
"group_num": 1,
|
57 |
"group_size": 4096,
|
58 |
"in_features": 4096,
|
|
|
76 |
"model.layers.0.mlp.up_proj": {
|
77 |
"bias": false,
|
78 |
"enable_norm": true,
|
79 |
+
"enable_perm": false,
|
80 |
"group_num": 1,
|
81 |
"group_size": 4096,
|
82 |
"in_features": 4096,
|
|
|
100 |
"model.layers.0.self_attn.k_proj": {
|
101 |
"bias": false,
|
102 |
"enable_norm": true,
|
103 |
+
"enable_perm": false,
|
104 |
"group_num": 1,
|
105 |
"group_size": 4096,
|
106 |
"in_features": 4096,
|
|
|
124 |
"model.layers.0.self_attn.o_proj": {
|
125 |
"bias": false,
|
126 |
"enable_norm": true,
|
127 |
+
"enable_perm": false,
|
128 |
"group_num": 1,
|
129 |
"group_size": 4096,
|
130 |
"in_features": 4096,
|
|
|
148 |
"model.layers.0.self_attn.q_proj": {
|
149 |
"bias": false,
|
150 |
"enable_norm": true,
|
151 |
+
"enable_perm": false,
|
152 |
"group_num": 1,
|
153 |
"group_size": 4096,
|
154 |
"in_features": 4096,
|
|
|
172 |
"model.layers.0.self_attn.v_proj": {
|
173 |
"bias": false,
|
174 |
"enable_norm": true,
|
175 |
+
"enable_perm": false,
|
176 |
"group_num": 1,
|
177 |
"group_size": 4096,
|
178 |
"in_features": 4096,
|
|
|
196 |
"model.layers.1.mlp.down_proj": {
|
197 |
"bias": false,
|
198 |
"enable_norm": true,
|
199 |
+
"enable_perm": false,
|
200 |
"group_num": 1,
|
201 |
"group_size": 14336,
|
202 |
"in_features": 14336,
|
|
|
220 |
"model.layers.1.mlp.gate_proj": {
|
221 |
"bias": false,
|
222 |
"enable_norm": true,
|
223 |
+
"enable_perm": false,
|
224 |
"group_num": 1,
|
225 |
"group_size": 4096,
|
226 |
"in_features": 4096,
|
|
|
244 |
"model.layers.1.mlp.up_proj": {
|
245 |
"bias": false,
|
246 |
"enable_norm": true,
|
247 |
+
"enable_perm": false,
|
248 |
"group_num": 1,
|
249 |
"group_size": 4096,
|
250 |
"in_features": 4096,
|
|
|
268 |
"model.layers.1.self_attn.k_proj": {
|
269 |
"bias": false,
|
270 |
"enable_norm": true,
|
271 |
+
"enable_perm": false,
|
272 |
"group_num": 1,
|
273 |
"group_size": 4096,
|
274 |
"in_features": 4096,
|
|
|
292 |
"model.layers.1.self_attn.o_proj": {
|
293 |
"bias": false,
|
294 |
"enable_norm": true,
|
295 |
+
"enable_perm": false,
|
296 |
"group_num": 1,
|
297 |
"group_size": 4096,
|
298 |
"in_features": 4096,
|
|
|
316 |
"model.layers.1.self_attn.q_proj": {
|
317 |
"bias": false,
|
318 |
"enable_norm": true,
|
319 |
+
"enable_perm": false,
|
320 |
"group_num": 1,
|
321 |
"group_size": 4096,
|
322 |
"in_features": 4096,
|
|
|
340 |
"model.layers.1.self_attn.v_proj": {
|
341 |
"bias": false,
|
342 |
"enable_norm": true,
|
343 |
+
"enable_perm": false,
|
344 |
"group_num": 1,
|
345 |
"group_size": 4096,
|
346 |
"in_features": 4096,
|
|
|
364 |
"model.layers.10.mlp.down_proj": {
|
365 |
"bias": false,
|
366 |
"enable_norm": true,
|
367 |
+
"enable_perm": false,
|
368 |
"group_num": 1,
|
369 |
"group_size": 14336,
|
370 |
"in_features": 14336,
|
|
|
388 |
"model.layers.10.mlp.gate_proj": {
|
389 |
"bias": false,
|
390 |
"enable_norm": true,
|
391 |
+
"enable_perm": false,
|
392 |
"group_num": 1,
|
393 |
"group_size": 4096,
|
394 |
"in_features": 4096,
|
|
|
412 |
"model.layers.10.mlp.up_proj": {
|
413 |
"bias": false,
|
414 |
"enable_norm": true,
|
415 |
+
"enable_perm": false,
|
416 |
"group_num": 1,
|
417 |
"group_size": 4096,
|
418 |
"in_features": 4096,
|
|
|
436 |
"model.layers.10.self_attn.k_proj": {
|
437 |
"bias": false,
|
438 |
"enable_norm": true,
|
439 |
+
"enable_perm": false,
|
440 |
"group_num": 1,
|
441 |
"group_size": 4096,
|
442 |
"in_features": 4096,
|
|
|
460 |
"model.layers.10.self_attn.o_proj": {
|
461 |
"bias": false,
|
462 |
"enable_norm": true,
|
463 |
+
"enable_perm": false,
|
464 |
"group_num": 1,
|
465 |
"group_size": 4096,
|
466 |
"in_features": 4096,
|
|
|
484 |
"model.layers.10.self_attn.q_proj": {
|
485 |
"bias": false,
|
486 |
"enable_norm": true,
|
487 |
+
"enable_perm": false,
|
488 |
"group_num": 1,
|
489 |
"group_size": 4096,
|
490 |
"in_features": 4096,
|
|
|
508 |
"model.layers.10.self_attn.v_proj": {
|
509 |
"bias": false,
|
510 |
"enable_norm": true,
|
511 |
+
"enable_perm": false,
|
512 |
"group_num": 1,
|
513 |
"group_size": 4096,
|
514 |
"in_features": 4096,
|
|
|
532 |
"model.layers.11.mlp.down_proj": {
|
533 |
"bias": false,
|
534 |
"enable_norm": true,
|
535 |
+
"enable_perm": false,
|
536 |
"group_num": 1,
|
537 |
"group_size": 14336,
|
538 |
"in_features": 14336,
|
|
|
556 |
"model.layers.11.mlp.gate_proj": {
|
557 |
"bias": false,
|
558 |
"enable_norm": true,
|
559 |
+
"enable_perm": false,
|
560 |
"group_num": 1,
|
561 |
"group_size": 4096,
|
562 |
"in_features": 4096,
|
|
|
580 |
"model.layers.11.mlp.up_proj": {
|
581 |
"bias": false,
|
582 |
"enable_norm": true,
|
583 |
+
"enable_perm": false,
|
584 |
"group_num": 1,
|
585 |
"group_size": 4096,
|
586 |
"in_features": 4096,
|
|
|
604 |
"model.layers.11.self_attn.k_proj": {
|
605 |
"bias": false,
|
606 |
"enable_norm": true,
|
607 |
+
"enable_perm": false,
|
608 |
"group_num": 1,
|
609 |
"group_size": 4096,
|
610 |
"in_features": 4096,
|
|
|
628 |
"model.layers.11.self_attn.o_proj": {
|
629 |
"bias": false,
|
630 |
"enable_norm": true,
|
631 |
+
"enable_perm": false,
|
632 |
"group_num": 1,
|
633 |
"group_size": 4096,
|
634 |
"in_features": 4096,
|
|
|
652 |
"model.layers.11.self_attn.q_proj": {
|
653 |
"bias": false,
|
654 |
"enable_norm": true,
|
655 |
+
"enable_perm": false,
|
656 |
"group_num": 1,
|
657 |
"group_size": 4096,
|
658 |
"in_features": 4096,
|
|
|
676 |
"model.layers.11.self_attn.v_proj": {
|
677 |
"bias": false,
|
678 |
"enable_norm": true,
|
679 |
+
"enable_perm": false,
|
680 |
"group_num": 1,
|
681 |
"group_size": 4096,
|
682 |
"in_features": 4096,
|
|
|
700 |
"model.layers.12.mlp.down_proj": {
|
701 |
"bias": false,
|
702 |
"enable_norm": true,
|
703 |
+
"enable_perm": false,
|
704 |
"group_num": 1,
|
705 |
"group_size": 14336,
|
706 |
"in_features": 14336,
|
|
|
724 |
"model.layers.12.mlp.gate_proj": {
|
725 |
"bias": false,
|
726 |
"enable_norm": true,
|
727 |
+
"enable_perm": false,
|
728 |
"group_num": 1,
|
729 |
"group_size": 4096,
|
730 |
"in_features": 4096,
|
|
|
748 |
"model.layers.12.mlp.up_proj": {
|
749 |
"bias": false,
|
750 |
"enable_norm": true,
|
751 |
+
"enable_perm": false,
|
752 |
"group_num": 1,
|
753 |
"group_size": 4096,
|
754 |
"in_features": 4096,
|
|
|
772 |
"model.layers.12.self_attn.k_proj": {
|
773 |
"bias": false,
|
774 |
"enable_norm": true,
|
775 |
+
"enable_perm": false,
|
776 |
"group_num": 1,
|
777 |
"group_size": 4096,
|
778 |
"in_features": 4096,
|
|
|
796 |
"model.layers.12.self_attn.o_proj": {
|
797 |
"bias": false,
|
798 |
"enable_norm": true,
|
799 |
+
"enable_perm": false,
|
800 |
"group_num": 1,
|
801 |
"group_size": 4096,
|
802 |
"in_features": 4096,
|
|
|
820 |
"model.layers.12.self_attn.q_proj": {
|
821 |
"bias": false,
|
822 |
"enable_norm": true,
|
823 |
+
"enable_perm": false,
|
824 |
"group_num": 1,
|
825 |
"group_size": 4096,
|
826 |
"in_features": 4096,
|
|
|
844 |
"model.layers.12.self_attn.v_proj": {
|
845 |
"bias": false,
|
846 |
"enable_norm": true,
|
847 |
+
"enable_perm": false,
|
848 |
"group_num": 1,
|
849 |
"group_size": 4096,
|
850 |
"in_features": 4096,
|
|
|
868 |
"model.layers.13.mlp.down_proj": {
|
869 |
"bias": false,
|
870 |
"enable_norm": true,
|
871 |
+
"enable_perm": false,
|
872 |
"group_num": 1,
|
873 |
"group_size": 14336,
|
874 |
"in_features": 14336,
|
|
|
892 |
"model.layers.13.mlp.gate_proj": {
|
893 |
"bias": false,
|
894 |
"enable_norm": true,
|
895 |
+
"enable_perm": false,
|
896 |
"group_num": 1,
|
897 |
"group_size": 4096,
|
898 |
"in_features": 4096,
|
|
|
916 |
"model.layers.13.mlp.up_proj": {
|
917 |
"bias": false,
|
918 |
"enable_norm": true,
|
919 |
+
"enable_perm": false,
|
920 |
"group_num": 1,
|
921 |
"group_size": 4096,
|
922 |
"in_features": 4096,
|
|
|
940 |
"model.layers.13.self_attn.k_proj": {
|
941 |
"bias": false,
|
942 |
"enable_norm": true,
|
943 |
+
"enable_perm": false,
|
944 |
"group_num": 1,
|
945 |
"group_size": 4096,
|
946 |
"in_features": 4096,
|
|
|
964 |
"model.layers.13.self_attn.o_proj": {
|
965 |
"bias": false,
|
966 |
"enable_norm": true,
|
967 |
+
"enable_perm": false,
|
968 |
"group_num": 1,
|
969 |
"group_size": 4096,
|
970 |
"in_features": 4096,
|
|
|
988 |
"model.layers.13.self_attn.q_proj": {
|
989 |
"bias": false,
|
990 |
"enable_norm": true,
|
991 |
+
"enable_perm": false,
|
992 |
"group_num": 1,
|
993 |
"group_size": 4096,
|
994 |
"in_features": 4096,
|
|
|
1012 |
"model.layers.13.self_attn.v_proj": {
|
1013 |
"bias": false,
|
1014 |
"enable_norm": true,
|
1015 |
+
"enable_perm": false,
|
1016 |
"group_num": 1,
|
1017 |
"group_size": 4096,
|
1018 |
"in_features": 4096,
|
|
|
1036 |
"model.layers.14.mlp.down_proj": {
|
1037 |
"bias": false,
|
1038 |
"enable_norm": true,
|
1039 |
+
"enable_perm": false,
|
1040 |
"group_num": 1,
|
1041 |
"group_size": 14336,
|
1042 |
"in_features": 14336,
|
|
|
1060 |
"model.layers.14.mlp.gate_proj": {
|
1061 |
"bias": false,
|
1062 |
"enable_norm": true,
|
1063 |
+
"enable_perm": false,
|
1064 |
"group_num": 1,
|
1065 |
"group_size": 4096,
|
1066 |
"in_features": 4096,
|
|
|
1084 |
"model.layers.14.mlp.up_proj": {
|
1085 |
"bias": false,
|
1086 |
"enable_norm": true,
|
1087 |
+
"enable_perm": false,
|
1088 |
"group_num": 1,
|
1089 |
"group_size": 4096,
|
1090 |
"in_features": 4096,
|
|
|
1108 |
"model.layers.14.self_attn.k_proj": {
|
1109 |
"bias": false,
|
1110 |
"enable_norm": true,
|
1111 |
+
"enable_perm": false,
|
1112 |
"group_num": 1,
|
1113 |
"group_size": 4096,
|
1114 |
"in_features": 4096,
|
|
|
1132 |
"model.layers.14.self_attn.o_proj": {
|
1133 |
"bias": false,
|
1134 |
"enable_norm": true,
|
1135 |
+
"enable_perm": false,
|
1136 |
"group_num": 1,
|
1137 |
"group_size": 4096,
|
1138 |
"in_features": 4096,
|
|
|
1156 |
"model.layers.14.self_attn.q_proj": {
|
1157 |
"bias": false,
|
1158 |
"enable_norm": true,
|
1159 |
+
"enable_perm": false,
|
1160 |
"group_num": 1,
|
1161 |
"group_size": 4096,
|
1162 |
"in_features": 4096,
|
|
|
1180 |
"model.layers.14.self_attn.v_proj": {
|
1181 |
"bias": false,
|
1182 |
"enable_norm": true,
|
1183 |
+
"enable_perm": false,
|
1184 |
"group_num": 1,
|
1185 |
"group_size": 4096,
|
1186 |
"in_features": 4096,
|
|
|
1204 |
"model.layers.15.mlp.down_proj": {
|
1205 |
"bias": false,
|
1206 |
"enable_norm": true,
|
1207 |
+
"enable_perm": false,
|
1208 |
"group_num": 1,
|
1209 |
"group_size": 14336,
|
1210 |
"in_features": 14336,
|
|
|
1228 |
"model.layers.15.mlp.gate_proj": {
|
1229 |
"bias": false,
|
1230 |
"enable_norm": true,
|
1231 |
+
"enable_perm": false,
|
1232 |
"group_num": 1,
|
1233 |
"group_size": 4096,
|
1234 |
"in_features": 4096,
|
|
|
1252 |
"model.layers.15.mlp.up_proj": {
|
1253 |
"bias": false,
|
1254 |
"enable_norm": true,
|
1255 |
+
"enable_perm": false,
|
1256 |
"group_num": 1,
|
1257 |
"group_size": 4096,
|
1258 |
"in_features": 4096,
|
|
|
1276 |
"model.layers.15.self_attn.k_proj": {
|
1277 |
"bias": false,
|
1278 |
"enable_norm": true,
|
1279 |
+
"enable_perm": false,
|
1280 |
"group_num": 1,
|
1281 |
"group_size": 4096,
|
1282 |
"in_features": 4096,
|
|
|
1300 |
"model.layers.15.self_attn.o_proj": {
|
1301 |
"bias": false,
|
1302 |
"enable_norm": true,
|
1303 |
+
"enable_perm": false,
|
1304 |
"group_num": 1,
|
1305 |
"group_size": 4096,
|
1306 |
"in_features": 4096,
|
|
|
1324 |
"model.layers.15.self_attn.q_proj": {
|
1325 |
"bias": false,
|
1326 |
"enable_norm": true,
|
1327 |
+
"enable_perm": false,
|
1328 |
"group_num": 1,
|
1329 |
"group_size": 4096,
|
1330 |
"in_features": 4096,
|
|
|
1348 |
"model.layers.15.self_attn.v_proj": {
|
1349 |
"bias": false,
|
1350 |
"enable_norm": true,
|
1351 |
+
"enable_perm": false,
|
1352 |
"group_num": 1,
|
1353 |
"group_size": 4096,
|
1354 |
"in_features": 4096,
|
|
|
1372 |
"model.layers.16.mlp.down_proj": {
|
1373 |
"bias": false,
|
1374 |
"enable_norm": true,
|
1375 |
+
"enable_perm": false,
|
1376 |
"group_num": 1,
|
1377 |
"group_size": 14336,
|
1378 |
"in_features": 14336,
|
|
|
1396 |
"model.layers.16.mlp.gate_proj": {
|
1397 |
"bias": false,
|
1398 |
"enable_norm": true,
|
1399 |
+
"enable_perm": false,
|
1400 |
"group_num": 1,
|
1401 |
"group_size": 4096,
|
1402 |
"in_features": 4096,
|
|
|
1420 |
"model.layers.16.mlp.up_proj": {
|
1421 |
"bias": false,
|
1422 |
"enable_norm": true,
|
1423 |
+
"enable_perm": false,
|
1424 |
"group_num": 1,
|
1425 |
"group_size": 4096,
|
1426 |
"in_features": 4096,
|
|
|
1444 |
"model.layers.16.self_attn.k_proj": {
|
1445 |
"bias": false,
|
1446 |
"enable_norm": true,
|
1447 |
+
"enable_perm": false,
|
1448 |
"group_num": 1,
|
1449 |
"group_size": 4096,
|
1450 |
"in_features": 4096,
|
|
|
1468 |
"model.layers.16.self_attn.o_proj": {
|
1469 |
"bias": false,
|
1470 |
"enable_norm": true,
|
1471 |
+
"enable_perm": false,
|
1472 |
"group_num": 1,
|
1473 |
"group_size": 4096,
|
1474 |
"in_features": 4096,
|
|
|
1492 |
"model.layers.16.self_attn.q_proj": {
|
1493 |
"bias": false,
|
1494 |
"enable_norm": true,
|
1495 |
+
"enable_perm": false,
|
1496 |
"group_num": 1,
|
1497 |
"group_size": 4096,
|
1498 |
"in_features": 4096,
|
|
|
1516 |
"model.layers.16.self_attn.v_proj": {
|
1517 |
"bias": false,
|
1518 |
"enable_norm": true,
|
1519 |
+
"enable_perm": false,
|
1520 |
"group_num": 1,
|
1521 |
"group_size": 4096,
|
1522 |
"in_features": 4096,
|
|
|
1540 |
"model.layers.17.mlp.down_proj": {
|
1541 |
"bias": false,
|
1542 |
"enable_norm": true,
|
1543 |
+
"enable_perm": false,
|
1544 |
"group_num": 1,
|
1545 |
"group_size": 14336,
|
1546 |
"in_features": 14336,
|
|
|
1564 |
"model.layers.17.mlp.gate_proj": {
|
1565 |
"bias": false,
|
1566 |
"enable_norm": true,
|
1567 |
+
"enable_perm": false,
|
1568 |
"group_num": 1,
|
1569 |
"group_size": 4096,
|
1570 |
"in_features": 4096,
|
|
|
1588 |
"model.layers.17.mlp.up_proj": {
|
1589 |
"bias": false,
|
1590 |
"enable_norm": true,
|
1591 |
+
"enable_perm": false,
|
1592 |
"group_num": 1,
|
1593 |
"group_size": 4096,
|
1594 |
"in_features": 4096,
|
|
|
1612 |
"model.layers.17.self_attn.k_proj": {
|
1613 |
"bias": false,
|
1614 |
"enable_norm": true,
|
1615 |
+
"enable_perm": false,
|
1616 |
"group_num": 1,
|
1617 |
"group_size": 4096,
|
1618 |
"in_features": 4096,
|
|
|
1636 |
"model.layers.17.self_attn.o_proj": {
|
1637 |
"bias": false,
|
1638 |
"enable_norm": true,
|
1639 |
+
"enable_perm": false,
|
1640 |
"group_num": 1,
|
1641 |
"group_size": 4096,
|
1642 |
"in_features": 4096,
|
|
|
1660 |
"model.layers.17.self_attn.q_proj": {
|
1661 |
"bias": false,
|
1662 |
"enable_norm": true,
|
1663 |
+
"enable_perm": false,
|
1664 |
"group_num": 1,
|
1665 |
"group_size": 4096,
|
1666 |
"in_features": 4096,
|
|
|
1684 |
"model.layers.17.self_attn.v_proj": {
|
1685 |
"bias": false,
|
1686 |
"enable_norm": true,
|
1687 |
+
"enable_perm": false,
|
1688 |
"group_num": 1,
|
1689 |
"group_size": 4096,
|
1690 |
"in_features": 4096,
|
|
|
1708 |
"model.layers.18.mlp.down_proj": {
|
1709 |
"bias": false,
|
1710 |
"enable_norm": true,
|
1711 |
+
"enable_perm": false,
|
1712 |
"group_num": 1,
|
1713 |
"group_size": 14336,
|
1714 |
"in_features": 14336,
|
|
|
1732 |
"model.layers.18.mlp.gate_proj": {
|
1733 |
"bias": false,
|
1734 |
"enable_norm": true,
|
1735 |
+
"enable_perm": false,
|
1736 |
"group_num": 1,
|
1737 |
"group_size": 4096,
|
1738 |
"in_features": 4096,
|
|
|
1756 |
"model.layers.18.mlp.up_proj": {
|
1757 |
"bias": false,
|
1758 |
"enable_norm": true,
|
1759 |
+
"enable_perm": false,
|
1760 |
"group_num": 1,
|
1761 |
"group_size": 4096,
|
1762 |
"in_features": 4096,
|
|
|
1780 |
"model.layers.18.self_attn.k_proj": {
|
1781 |
"bias": false,
|
1782 |
"enable_norm": true,
|
1783 |
+
"enable_perm": false,
|
1784 |
"group_num": 1,
|
1785 |
"group_size": 4096,
|
1786 |
"in_features": 4096,
|
|
|
1804 |
"model.layers.18.self_attn.o_proj": {
|
1805 |
"bias": false,
|
1806 |
"enable_norm": true,
|
1807 |
+
"enable_perm": false,
|
1808 |
"group_num": 1,
|
1809 |
"group_size": 4096,
|
1810 |
"in_features": 4096,
|
|
|
1828 |
"model.layers.18.self_attn.q_proj": {
|
1829 |
"bias": false,
|
1830 |
"enable_norm": true,
|
1831 |
+
"enable_perm": false,
|
1832 |
"group_num": 1,
|
1833 |
"group_size": 4096,
|
1834 |
"in_features": 4096,
|
|
|
1852 |
"model.layers.18.self_attn.v_proj": {
|
1853 |
"bias": false,
|
1854 |
"enable_norm": true,
|
1855 |
+
"enable_perm": false,
|
1856 |
"group_num": 1,
|
1857 |
"group_size": 4096,
|
1858 |
"in_features": 4096,
|
|
|
1876 |
"model.layers.19.mlp.down_proj": {
|
1877 |
"bias": false,
|
1878 |
"enable_norm": true,
|
1879 |
+
"enable_perm": false,
|
1880 |
"group_num": 1,
|
1881 |
"group_size": 14336,
|
1882 |
"in_features": 14336,
|
|
|
1900 |
"model.layers.19.mlp.gate_proj": {
|
1901 |
"bias": false,
|
1902 |
"enable_norm": true,
|
1903 |
+
"enable_perm": false,
|
1904 |
"group_num": 1,
|
1905 |
"group_size": 4096,
|
1906 |
"in_features": 4096,
|
|
|
1924 |
"model.layers.19.mlp.up_proj": {
|
1925 |
"bias": false,
|
1926 |
"enable_norm": true,
|
1927 |
+
"enable_perm": false,
|
1928 |
"group_num": 1,
|
1929 |
"group_size": 4096,
|
1930 |
"in_features": 4096,
|
|
|
1948 |
"model.layers.19.self_attn.k_proj": {
|
1949 |
"bias": false,
|
1950 |
"enable_norm": true,
|
1951 |
+
"enable_perm": false,
|
1952 |
"group_num": 1,
|
1953 |
"group_size": 4096,
|
1954 |
"in_features": 4096,
|
|
|
1972 |
"model.layers.19.self_attn.o_proj": {
|
1973 |
"bias": false,
|
1974 |
"enable_norm": true,
|
1975 |
+
"enable_perm": false,
|
1976 |
"group_num": 1,
|
1977 |
"group_size": 4096,
|
1978 |
"in_features": 4096,
|
|
|
1996 |
"model.layers.19.self_attn.q_proj": {
|
1997 |
"bias": false,
|
1998 |
"enable_norm": true,
|
1999 |
+
"enable_perm": false,
|
2000 |
"group_num": 1,
|
2001 |
"group_size": 4096,
|
2002 |
"in_features": 4096,
|
|
|
2020 |
"model.layers.19.self_attn.v_proj": {
|
2021 |
"bias": false,
|
2022 |
"enable_norm": true,
|
2023 |
+
"enable_perm": false,
|
2024 |
"group_num": 1,
|
2025 |
"group_size": 4096,
|
2026 |
"in_features": 4096,
|
|
|
2044 |
"model.layers.2.mlp.down_proj": {
|
2045 |
"bias": false,
|
2046 |
"enable_norm": true,
|
2047 |
+
"enable_perm": false,
|
2048 |
"group_num": 1,
|
2049 |
"group_size": 14336,
|
2050 |
"in_features": 14336,
|
|
|
2068 |
"model.layers.2.mlp.gate_proj": {
|
2069 |
"bias": false,
|
2070 |
"enable_norm": true,
|
2071 |
+
"enable_perm": false,
|
2072 |
"group_num": 1,
|
2073 |
"group_size": 4096,
|
2074 |
"in_features": 4096,
|
|
|
2092 |
"model.layers.2.mlp.up_proj": {
|
2093 |
"bias": false,
|
2094 |
"enable_norm": true,
|
2095 |
+
"enable_perm": false,
|
2096 |
"group_num": 1,
|
2097 |
"group_size": 4096,
|
2098 |
"in_features": 4096,
|
|
|
2116 |
"model.layers.2.self_attn.k_proj": {
|
2117 |
"bias": false,
|
2118 |
"enable_norm": true,
|
2119 |
+
"enable_perm": false,
|
2120 |
"group_num": 1,
|
2121 |
"group_size": 4096,
|
2122 |
"in_features": 4096,
|
|
|
2140 |
"model.layers.2.self_attn.o_proj": {
|
2141 |
"bias": false,
|
2142 |
"enable_norm": true,
|
2143 |
+
"enable_perm": false,
|
2144 |
"group_num": 1,
|
2145 |
"group_size": 4096,
|
2146 |
"in_features": 4096,
|
|
|
2164 |
"model.layers.2.self_attn.q_proj": {
|
2165 |
"bias": false,
|
2166 |
"enable_norm": true,
|
2167 |
+
"enable_perm": false,
|
2168 |
"group_num": 1,
|
2169 |
"group_size": 4096,
|
2170 |
"in_features": 4096,
|
|
|
2188 |
"model.layers.2.self_attn.v_proj": {
|
2189 |
"bias": false,
|
2190 |
"enable_norm": true,
|
2191 |
+
"enable_perm": false,
|
2192 |
"group_num": 1,
|
2193 |
"group_size": 4096,
|
2194 |
"in_features": 4096,
|
|
|
2212 |
"model.layers.20.mlp.down_proj": {
|
2213 |
"bias": false,
|
2214 |
"enable_norm": true,
|
2215 |
+
"enable_perm": false,
|
2216 |
"group_num": 1,
|
2217 |
"group_size": 14336,
|
2218 |
"in_features": 14336,
|
|
|
2236 |
"model.layers.20.mlp.gate_proj": {
|
2237 |
"bias": false,
|
2238 |
"enable_norm": true,
|
2239 |
+
"enable_perm": false,
|
2240 |
"group_num": 1,
|
2241 |
"group_size": 4096,
|
2242 |
"in_features": 4096,
|
|
|
2260 |
"model.layers.20.mlp.up_proj": {
|
2261 |
"bias": false,
|
2262 |
"enable_norm": true,
|
2263 |
+
"enable_perm": false,
|
2264 |
"group_num": 1,
|
2265 |
"group_size": 4096,
|
2266 |
"in_features": 4096,
|
|
|
2284 |
"model.layers.20.self_attn.k_proj": {
|
2285 |
"bias": false,
|
2286 |
"enable_norm": true,
|
2287 |
+
"enable_perm": false,
|
2288 |
"group_num": 1,
|
2289 |
"group_size": 4096,
|
2290 |
"in_features": 4096,
|
|
|
2308 |
"model.layers.20.self_attn.o_proj": {
|
2309 |
"bias": false,
|
2310 |
"enable_norm": true,
|
2311 |
+
"enable_perm": false,
|
2312 |
"group_num": 1,
|
2313 |
"group_size": 4096,
|
2314 |
"in_features": 4096,
|
|
|
2332 |
"model.layers.20.self_attn.q_proj": {
|
2333 |
"bias": false,
|
2334 |
"enable_norm": true,
|
2335 |
+
"enable_perm": false,
|
2336 |
"group_num": 1,
|
2337 |
"group_size": 4096,
|
2338 |
"in_features": 4096,
|
|
|
2356 |
"model.layers.20.self_attn.v_proj": {
|
2357 |
"bias": false,
|
2358 |
"enable_norm": true,
|
2359 |
+
"enable_perm": false,
|
2360 |
"group_num": 1,
|
2361 |
"group_size": 4096,
|
2362 |
"in_features": 4096,
|
|
|
2380 |
"model.layers.21.mlp.down_proj": {
|
2381 |
"bias": false,
|
2382 |
"enable_norm": true,
|
2383 |
+
"enable_perm": false,
|
2384 |
"group_num": 1,
|
2385 |
"group_size": 14336,
|
2386 |
"in_features": 14336,
|
|
|
2404 |
"model.layers.21.mlp.gate_proj": {
|
2405 |
"bias": false,
|
2406 |
"enable_norm": true,
|
2407 |
+
"enable_perm": false,
|
2408 |
"group_num": 1,
|
2409 |
"group_size": 4096,
|
2410 |
"in_features": 4096,
|
|
|
2428 |
"model.layers.21.mlp.up_proj": {
|
2429 |
"bias": false,
|
2430 |
"enable_norm": true,
|
2431 |
+
"enable_perm": false,
|
2432 |
"group_num": 1,
|
2433 |
"group_size": 4096,
|
2434 |
"in_features": 4096,
|
|
|
2452 |
"model.layers.21.self_attn.k_proj": {
|
2453 |
"bias": false,
|
2454 |
"enable_norm": true,
|
2455 |
+
"enable_perm": false,
|
2456 |
"group_num": 1,
|
2457 |
"group_size": 4096,
|
2458 |
"in_features": 4096,
|
|
|
2476 |
"model.layers.21.self_attn.o_proj": {
|
2477 |
"bias": false,
|
2478 |
"enable_norm": true,
|
2479 |
+
"enable_perm": false,
|
2480 |
"group_num": 1,
|
2481 |
"group_size": 4096,
|
2482 |
"in_features": 4096,
|
|
|
2500 |
"model.layers.21.self_attn.q_proj": {
|
2501 |
"bias": false,
|
2502 |
"enable_norm": true,
|
2503 |
+
"enable_perm": false,
|
2504 |
"group_num": 1,
|
2505 |
"group_size": 4096,
|
2506 |
"in_features": 4096,
|
|
|
2524 |
"model.layers.21.self_attn.v_proj": {
|
2525 |
"bias": false,
|
2526 |
"enable_norm": true,
|
2527 |
+
"enable_perm": false,
|
2528 |
"group_num": 1,
|
2529 |
"group_size": 4096,
|
2530 |
"in_features": 4096,
|
|
|
2548 |
"model.layers.22.mlp.down_proj": {
|
2549 |
"bias": false,
|
2550 |
"enable_norm": true,
|
2551 |
+
"enable_perm": false,
|
2552 |
"group_num": 1,
|
2553 |
"group_size": 14336,
|
2554 |
"in_features": 14336,
|
|
|
2572 |
"model.layers.22.mlp.gate_proj": {
|
2573 |
"bias": false,
|
2574 |
"enable_norm": true,
|
2575 |
+
"enable_perm": false,
|
2576 |
"group_num": 1,
|
2577 |
"group_size": 4096,
|
2578 |
"in_features": 4096,
|
|
|
2596 |
"model.layers.22.mlp.up_proj": {
|
2597 |
"bias": false,
|
2598 |
"enable_norm": true,
|
2599 |
+
"enable_perm": false,
|
2600 |
"group_num": 1,
|
2601 |
"group_size": 4096,
|
2602 |
"in_features": 4096,
|
|
|
2620 |
"model.layers.22.self_attn.k_proj": {
|
2621 |
"bias": false,
|
2622 |
"enable_norm": true,
|
2623 |
+
"enable_perm": false,
|
2624 |
"group_num": 1,
|
2625 |
"group_size": 4096,
|
2626 |
"in_features": 4096,
|
|
|
2644 |
"model.layers.22.self_attn.o_proj": {
|
2645 |
"bias": false,
|
2646 |
"enable_norm": true,
|
2647 |
+
"enable_perm": false,
|
2648 |
"group_num": 1,
|
2649 |
"group_size": 4096,
|
2650 |
"in_features": 4096,
|
|
|
2668 |
"model.layers.22.self_attn.q_proj": {
|
2669 |
"bias": false,
|
2670 |
"enable_norm": true,
|
2671 |
+
"enable_perm": false,
|
2672 |
"group_num": 1,
|
2673 |
"group_size": 4096,
|
2674 |
"in_features": 4096,
|
|
|
2692 |
"model.layers.22.self_attn.v_proj": {
|
2693 |
"bias": false,
|
2694 |
"enable_norm": true,
|
2695 |
+
"enable_perm": false,
|
2696 |
"group_num": 1,
|
2697 |
"group_size": 4096,
|
2698 |
"in_features": 4096,
|
|
|
2716 |
"model.layers.23.mlp.down_proj": {
|
2717 |
"bias": false,
|
2718 |
"enable_norm": true,
|
2719 |
+
"enable_perm": false,
|
2720 |
"group_num": 1,
|
2721 |
"group_size": 14336,
|
2722 |
"in_features": 14336,
|
|
|
2740 |
"model.layers.23.mlp.gate_proj": {
|
2741 |
"bias": false,
|
2742 |
"enable_norm": true,
|
2743 |
+
"enable_perm": false,
|
2744 |
"group_num": 1,
|
2745 |
"group_size": 4096,
|
2746 |
"in_features": 4096,
|
|
|
2764 |
"model.layers.23.mlp.up_proj": {
|
2765 |
"bias": false,
|
2766 |
"enable_norm": true,
|
2767 |
+
"enable_perm": false,
|
2768 |
"group_num": 1,
|
2769 |
"group_size": 4096,
|
2770 |
"in_features": 4096,
|
|
|
2788 |
"model.layers.23.self_attn.k_proj": {
|
2789 |
"bias": false,
|
2790 |
"enable_norm": true,
|
2791 |
+
"enable_perm": false,
|
2792 |
"group_num": 1,
|
2793 |
"group_size": 4096,
|
2794 |
"in_features": 4096,
|
|
|
2812 |
"model.layers.23.self_attn.o_proj": {
|
2813 |
"bias": false,
|
2814 |
"enable_norm": true,
|
2815 |
+
"enable_perm": false,
|
2816 |
"group_num": 1,
|
2817 |
"group_size": 4096,
|
2818 |
"in_features": 4096,
|
|
|
2836 |
"model.layers.23.self_attn.q_proj": {
|
2837 |
"bias": false,
|
2838 |
"enable_norm": true,
|
2839 |
+
"enable_perm": false,
|
2840 |
"group_num": 1,
|
2841 |
"group_size": 4096,
|
2842 |
"in_features": 4096,
|
|
|
2860 |
"model.layers.23.self_attn.v_proj": {
|
2861 |
"bias": false,
|
2862 |
"enable_norm": true,
|
2863 |
+
"enable_perm": false,
|
2864 |
"group_num": 1,
|
2865 |
"group_size": 4096,
|
2866 |
"in_features": 4096,
|
|
|
2884 |
"model.layers.24.mlp.down_proj": {
|
2885 |
"bias": false,
|
2886 |
"enable_norm": true,
|
2887 |
+
"enable_perm": false,
|
2888 |
"group_num": 1,
|
2889 |
"group_size": 14336,
|
2890 |
"in_features": 14336,
|
|
|
2908 |
"model.layers.24.mlp.gate_proj": {
|
2909 |
"bias": false,
|
2910 |
"enable_norm": true,
|
2911 |
+
"enable_perm": false,
|
2912 |
"group_num": 1,
|
2913 |
"group_size": 4096,
|
2914 |
"in_features": 4096,
|
|
|
2932 |
"model.layers.24.mlp.up_proj": {
|
2933 |
"bias": false,
|
2934 |
"enable_norm": true,
|
2935 |
+
"enable_perm": false,
|
2936 |
"group_num": 1,
|
2937 |
"group_size": 4096,
|
2938 |
"in_features": 4096,
|
|
|
2956 |
"model.layers.24.self_attn.k_proj": {
|
2957 |
"bias": false,
|
2958 |
"enable_norm": true,
|
2959 |
+
"enable_perm": false,
|
2960 |
"group_num": 1,
|
2961 |
"group_size": 4096,
|
2962 |
"in_features": 4096,
|
|
|
2980 |
"model.layers.24.self_attn.o_proj": {
|
2981 |
"bias": false,
|
2982 |
"enable_norm": true,
|
2983 |
+
"enable_perm": false,
|
2984 |
"group_num": 1,
|
2985 |
"group_size": 4096,
|
2986 |
"in_features": 4096,
|
|
|
3004 |
"model.layers.24.self_attn.q_proj": {
|
3005 |
"bias": false,
|
3006 |
"enable_norm": true,
|
3007 |
+
"enable_perm": false,
|
3008 |
"group_num": 1,
|
3009 |
"group_size": 4096,
|
3010 |
"in_features": 4096,
|
|
|
3028 |
"model.layers.24.self_attn.v_proj": {
|
3029 |
"bias": false,
|
3030 |
"enable_norm": true,
|
3031 |
+
"enable_perm": false,
|
3032 |
"group_num": 1,
|
3033 |
"group_size": 4096,
|
3034 |
"in_features": 4096,
|
|
|
3052 |
"model.layers.25.mlp.down_proj": {
|
3053 |
"bias": false,
|
3054 |
"enable_norm": true,
|
3055 |
+
"enable_perm": false,
|
3056 |
"group_num": 1,
|
3057 |
"group_size": 14336,
|
3058 |
"in_features": 14336,
|
|
|
3076 |
"model.layers.25.mlp.gate_proj": {
|
3077 |
"bias": false,
|
3078 |
"enable_norm": true,
|
3079 |
+
"enable_perm": false,
|
3080 |
"group_num": 1,
|
3081 |
"group_size": 4096,
|
3082 |
"in_features": 4096,
|
|
|
3100 |
"model.layers.25.mlp.up_proj": {
|
3101 |
"bias": false,
|
3102 |
"enable_norm": true,
|
3103 |
+
"enable_perm": false,
|
3104 |
"group_num": 1,
|
3105 |
"group_size": 4096,
|
3106 |
"in_features": 4096,
|
|
|
3124 |
"model.layers.25.self_attn.k_proj": {
|
3125 |
"bias": false,
|
3126 |
"enable_norm": true,
|
3127 |
+
"enable_perm": false,
|
3128 |
"group_num": 1,
|
3129 |
"group_size": 4096,
|
3130 |
"in_features": 4096,
|
|
|
3148 |
"model.layers.25.self_attn.o_proj": {
|
3149 |
"bias": false,
|
3150 |
"enable_norm": true,
|
3151 |
+
"enable_perm": false,
|
3152 |
"group_num": 1,
|
3153 |
"group_size": 4096,
|
3154 |
"in_features": 4096,
|
|
|
3172 |
"model.layers.25.self_attn.q_proj": {
|
3173 |
"bias": false,
|
3174 |
"enable_norm": true,
|
3175 |
+
"enable_perm": false,
|
3176 |
"group_num": 1,
|
3177 |
"group_size": 4096,
|
3178 |
"in_features": 4096,
|
|
|
3196 |
"model.layers.25.self_attn.v_proj": {
|
3197 |
"bias": false,
|
3198 |
"enable_norm": true,
|
3199 |
+
"enable_perm": false,
|
3200 |
"group_num": 1,
|
3201 |
"group_size": 4096,
|
3202 |
"in_features": 4096,
|
|
|
3220 |
"model.layers.26.mlp.down_proj": {
|
3221 |
"bias": false,
|
3222 |
"enable_norm": true,
|
3223 |
+
"enable_perm": false,
|
3224 |
"group_num": 1,
|
3225 |
"group_size": 14336,
|
3226 |
"in_features": 14336,
|
|
|
3244 |
"model.layers.26.mlp.gate_proj": {
|
3245 |
"bias": false,
|
3246 |
"enable_norm": true,
|
3247 |
+
"enable_perm": false,
|
3248 |
"group_num": 1,
|
3249 |
"group_size": 4096,
|
3250 |
"in_features": 4096,
|
|
|
3268 |
"model.layers.26.mlp.up_proj": {
|
3269 |
"bias": false,
|
3270 |
"enable_norm": true,
|
3271 |
+
"enable_perm": false,
|
3272 |
"group_num": 1,
|
3273 |
"group_size": 4096,
|
3274 |
"in_features": 4096,
|
|
|
3292 |
"model.layers.26.self_attn.k_proj": {
|
3293 |
"bias": false,
|
3294 |
"enable_norm": true,
|
3295 |
+
"enable_perm": false,
|
3296 |
"group_num": 1,
|
3297 |
"group_size": 4096,
|
3298 |
"in_features": 4096,
|
|
|
3316 |
"model.layers.26.self_attn.o_proj": {
|
3317 |
"bias": false,
|
3318 |
"enable_norm": true,
|
3319 |
+
"enable_perm": false,
|
3320 |
"group_num": 1,
|
3321 |
"group_size": 4096,
|
3322 |
"in_features": 4096,
|
|
|
3340 |
"model.layers.26.self_attn.q_proj": {
|
3341 |
"bias": false,
|
3342 |
"enable_norm": true,
|
3343 |
+
"enable_perm": false,
|
3344 |
"group_num": 1,
|
3345 |
"group_size": 4096,
|
3346 |
"in_features": 4096,
|
|
|
3364 |
"model.layers.26.self_attn.v_proj": {
|
3365 |
"bias": false,
|
3366 |
"enable_norm": true,
|
3367 |
+
"enable_perm": false,
|
3368 |
"group_num": 1,
|
3369 |
"group_size": 4096,
|
3370 |
"in_features": 4096,
|
|
|
3388 |
"model.layers.27.mlp.down_proj": {
|
3389 |
"bias": false,
|
3390 |
"enable_norm": true,
|
3391 |
+
"enable_perm": false,
|
3392 |
"group_num": 1,
|
3393 |
"group_size": 14336,
|
3394 |
"in_features": 14336,
|
|
|
3412 |
"model.layers.27.mlp.gate_proj": {
|
3413 |
"bias": false,
|
3414 |
"enable_norm": true,
|
3415 |
+
"enable_perm": false,
|
3416 |
"group_num": 1,
|
3417 |
"group_size": 4096,
|
3418 |
"in_features": 4096,
|
|
|
3436 |
"model.layers.27.mlp.up_proj": {
|
3437 |
"bias": false,
|
3438 |
"enable_norm": true,
|
3439 |
+
"enable_perm": false,
|
3440 |
"group_num": 1,
|
3441 |
"group_size": 4096,
|
3442 |
"in_features": 4096,
|
|
|
3460 |
"model.layers.27.self_attn.k_proj": {
|
3461 |
"bias": false,
|
3462 |
"enable_norm": true,
|
3463 |
+
"enable_perm": false,
|
3464 |
"group_num": 1,
|
3465 |
"group_size": 4096,
|
3466 |
"in_features": 4096,
|
|
|
3484 |
"model.layers.27.self_attn.o_proj": {
|
3485 |
"bias": false,
|
3486 |
"enable_norm": true,
|
3487 |
+
"enable_perm": false,
|
3488 |
"group_num": 1,
|
3489 |
"group_size": 4096,
|
3490 |
"in_features": 4096,
|
|
|
3508 |
"model.layers.27.self_attn.q_proj": {
|
3509 |
"bias": false,
|
3510 |
"enable_norm": true,
|
3511 |
+
"enable_perm": false,
|
3512 |
"group_num": 1,
|
3513 |
"group_size": 4096,
|
3514 |
"in_features": 4096,
|
|
|
3532 |
"model.layers.27.self_attn.v_proj": {
|
3533 |
"bias": false,
|
3534 |
"enable_norm": true,
|
3535 |
+
"enable_perm": false,
|
3536 |
"group_num": 1,
|
3537 |
"group_size": 4096,
|
3538 |
"in_features": 4096,
|
|
|
3556 |
"model.layers.28.mlp.down_proj": {
|
3557 |
"bias": false,
|
3558 |
"enable_norm": true,
|
3559 |
+
"enable_perm": false,
|
3560 |
"group_num": 1,
|
3561 |
"group_size": 14336,
|
3562 |
"in_features": 14336,
|
|
|
3580 |
"model.layers.28.mlp.gate_proj": {
|
3581 |
"bias": false,
|
3582 |
"enable_norm": true,
|
3583 |
+
"enable_perm": false,
|
3584 |
"group_num": 1,
|
3585 |
"group_size": 4096,
|
3586 |
"in_features": 4096,
|
|
|
3604 |
"model.layers.28.mlp.up_proj": {
|
3605 |
"bias": false,
|
3606 |
"enable_norm": true,
|
3607 |
+
"enable_perm": false,
|
3608 |
"group_num": 1,
|
3609 |
"group_size": 4096,
|
3610 |
"in_features": 4096,
|
|
|
3628 |
"model.layers.28.self_attn.k_proj": {
|
3629 |
"bias": false,
|
3630 |
"enable_norm": true,
|
3631 |
+
"enable_perm": false,
|
3632 |
"group_num": 1,
|
3633 |
"group_size": 4096,
|
3634 |
"in_features": 4096,
|
|
|
3652 |
"model.layers.28.self_attn.o_proj": {
|
3653 |
"bias": false,
|
3654 |
"enable_norm": true,
|
3655 |
+
"enable_perm": false,
|
3656 |
"group_num": 1,
|
3657 |
"group_size": 4096,
|
3658 |
"in_features": 4096,
|
|
|
3676 |
"model.layers.28.self_attn.q_proj": {
|
3677 |
"bias": false,
|
3678 |
"enable_norm": true,
|
3679 |
+
"enable_perm": false,
|
3680 |
"group_num": 1,
|
3681 |
"group_size": 4096,
|
3682 |
"in_features": 4096,
|
|
|
3700 |
"model.layers.28.self_attn.v_proj": {
|
3701 |
"bias": false,
|
3702 |
"enable_norm": true,
|
3703 |
+
"enable_perm": false,
|
3704 |
"group_num": 1,
|
3705 |
"group_size": 4096,
|
3706 |
"in_features": 4096,
|
|
|
3724 |
"model.layers.29.mlp.down_proj": {
|
3725 |
"bias": false,
|
3726 |
"enable_norm": true,
|
3727 |
+
"enable_perm": false,
|
3728 |
"group_num": 1,
|
3729 |
"group_size": 14336,
|
3730 |
"in_features": 14336,
|
|
|
3748 |
"model.layers.29.mlp.gate_proj": {
|
3749 |
"bias": false,
|
3750 |
"enable_norm": true,
|
3751 |
+
"enable_perm": false,
|
3752 |
"group_num": 1,
|
3753 |
"group_size": 4096,
|
3754 |
"in_features": 4096,
|
|
|
3772 |
"model.layers.29.mlp.up_proj": {
|
3773 |
"bias": false,
|
3774 |
"enable_norm": true,
|
3775 |
+
"enable_perm": false,
|
3776 |
"group_num": 1,
|
3777 |
"group_size": 4096,
|
3778 |
"in_features": 4096,
|
|
|
3796 |
"model.layers.29.self_attn.k_proj": {
|
3797 |
"bias": false,
|
3798 |
"enable_norm": true,
|
3799 |
+
"enable_perm": false,
|
3800 |
"group_num": 1,
|
3801 |
"group_size": 4096,
|
3802 |
"in_features": 4096,
|
|
|
3820 |
"model.layers.29.self_attn.o_proj": {
|
3821 |
"bias": false,
|
3822 |
"enable_norm": true,
|
3823 |
+
"enable_perm": false,
|
3824 |
"group_num": 1,
|
3825 |
"group_size": 4096,
|
3826 |
"in_features": 4096,
|
|
|
3844 |
"model.layers.29.self_attn.q_proj": {
|
3845 |
"bias": false,
|
3846 |
"enable_norm": true,
|
3847 |
+
"enable_perm": false,
|
3848 |
"group_num": 1,
|
3849 |
"group_size": 4096,
|
3850 |
"in_features": 4096,
|
|
|
3868 |
"model.layers.29.self_attn.v_proj": {
|
3869 |
"bias": false,
|
3870 |
"enable_norm": true,
|
3871 |
+
"enable_perm": false,
|
3872 |
"group_num": 1,
|
3873 |
"group_size": 4096,
|
3874 |
"in_features": 4096,
|
|
|
3892 |
"model.layers.3.mlp.down_proj": {
|
3893 |
"bias": false,
|
3894 |
"enable_norm": true,
|
3895 |
+
"enable_perm": false,
|
3896 |
"group_num": 1,
|
3897 |
"group_size": 14336,
|
3898 |
"in_features": 14336,
|
|
|
3916 |
"model.layers.3.mlp.gate_proj": {
|
3917 |
"bias": false,
|
3918 |
"enable_norm": true,
|
3919 |
+
"enable_perm": false,
|
3920 |
"group_num": 1,
|
3921 |
"group_size": 4096,
|
3922 |
"in_features": 4096,
|
|
|
3940 |
"model.layers.3.mlp.up_proj": {
|
3941 |
"bias": false,
|
3942 |
"enable_norm": true,
|
3943 |
+
"enable_perm": false,
|
3944 |
"group_num": 1,
|
3945 |
"group_size": 4096,
|
3946 |
"in_features": 4096,
|
|
|
3964 |
"model.layers.3.self_attn.k_proj": {
|
3965 |
"bias": false,
|
3966 |
"enable_norm": true,
|
3967 |
+
"enable_perm": false,
|
3968 |
"group_num": 1,
|
3969 |
"group_size": 4096,
|
3970 |
"in_features": 4096,
|
|
|
3988 |
"model.layers.3.self_attn.o_proj": {
|
3989 |
"bias": false,
|
3990 |
"enable_norm": true,
|
3991 |
+
"enable_perm": false,
|
3992 |
"group_num": 1,
|
3993 |
"group_size": 4096,
|
3994 |
"in_features": 4096,
|
|
|
4012 |
"model.layers.3.self_attn.q_proj": {
|
4013 |
"bias": false,
|
4014 |
"enable_norm": true,
|
4015 |
+
"enable_perm": false,
|
4016 |
"group_num": 1,
|
4017 |
"group_size": 4096,
|
4018 |
"in_features": 4096,
|
|
|
4036 |
"model.layers.3.self_attn.v_proj": {
|
4037 |
"bias": false,
|
4038 |
"enable_norm": true,
|
4039 |
+
"enable_perm": false,
|
4040 |
"group_num": 1,
|
4041 |
"group_size": 4096,
|
4042 |
"in_features": 4096,
|
|
|
4060 |
"model.layers.30.mlp.down_proj": {
|
4061 |
"bias": false,
|
4062 |
"enable_norm": true,
|
4063 |
+
"enable_perm": false,
|
4064 |
"group_num": 1,
|
4065 |
"group_size": 14336,
|
4066 |
"in_features": 14336,
|
|
|
4084 |
"model.layers.30.mlp.gate_proj": {
|
4085 |
"bias": false,
|
4086 |
"enable_norm": true,
|
4087 |
+
"enable_perm": false,
|
4088 |
"group_num": 1,
|
4089 |
"group_size": 4096,
|
4090 |
"in_features": 4096,
|
|
|
4108 |
"model.layers.30.mlp.up_proj": {
|
4109 |
"bias": false,
|
4110 |
"enable_norm": true,
|
4111 |
+
"enable_perm": false,
|
4112 |
"group_num": 1,
|
4113 |
"group_size": 4096,
|
4114 |
"in_features": 4096,
|
|
|
4132 |
"model.layers.30.self_attn.k_proj": {
|
4133 |
"bias": false,
|
4134 |
"enable_norm": true,
|
4135 |
+
"enable_perm": false,
|
4136 |
"group_num": 1,
|
4137 |
"group_size": 4096,
|
4138 |
"in_features": 4096,
|
|
|
4156 |
"model.layers.30.self_attn.o_proj": {
|
4157 |
"bias": false,
|
4158 |
"enable_norm": true,
|
4159 |
+
"enable_perm": false,
|
4160 |
"group_num": 1,
|
4161 |
"group_size": 4096,
|
4162 |
"in_features": 4096,
|
|
|
4180 |
"model.layers.30.self_attn.q_proj": {
|
4181 |
"bias": false,
|
4182 |
"enable_norm": true,
|
4183 |
+
"enable_perm": false,
|
4184 |
"group_num": 1,
|
4185 |
"group_size": 4096,
|
4186 |
"in_features": 4096,
|
|
|
4204 |
"model.layers.30.self_attn.v_proj": {
|
4205 |
"bias": false,
|
4206 |
"enable_norm": true,
|
4207 |
+
"enable_perm": false,
|
4208 |
"group_num": 1,
|
4209 |
"group_size": 4096,
|
4210 |
"in_features": 4096,
|
|
|
4228 |
"model.layers.31.mlp.down_proj": {
|
4229 |
"bias": false,
|
4230 |
"enable_norm": true,
|
4231 |
+
"enable_perm": false,
|
4232 |
"group_num": 1,
|
4233 |
"group_size": 14336,
|
4234 |
"in_features": 14336,
|
|
|
4252 |
"model.layers.31.mlp.gate_proj": {
|
4253 |
"bias": false,
|
4254 |
"enable_norm": true,
|
4255 |
+
"enable_perm": false,
|
4256 |
"group_num": 1,
|
4257 |
"group_size": 4096,
|
4258 |
"in_features": 4096,
|
|
|
4276 |
"model.layers.31.mlp.up_proj": {
|
4277 |
"bias": false,
|
4278 |
"enable_norm": true,
|
4279 |
+
"enable_perm": false,
|
4280 |
"group_num": 1,
|
4281 |
"group_size": 4096,
|
4282 |
"in_features": 4096,
|
|
|
4300 |
"model.layers.31.self_attn.k_proj": {
|
4301 |
"bias": false,
|
4302 |
"enable_norm": true,
|
4303 |
+
"enable_perm": false,
|
4304 |
"group_num": 1,
|
4305 |
"group_size": 4096,
|
4306 |
"in_features": 4096,
|
|
|
4324 |
"model.layers.31.self_attn.o_proj": {
|
4325 |
"bias": false,
|
4326 |
"enable_norm": true,
|
4327 |
+
"enable_perm": false,
|
4328 |
"group_num": 1,
|
4329 |
"group_size": 4096,
|
4330 |
"in_features": 4096,
|
|
|
4348 |
"model.layers.31.self_attn.q_proj": {
|
4349 |
"bias": false,
|
4350 |
"enable_norm": true,
|
4351 |
+
"enable_perm": false,
|
4352 |
"group_num": 1,
|
4353 |
"group_size": 4096,
|
4354 |
"in_features": 4096,
|
|
|
4372 |
"model.layers.31.self_attn.v_proj": {
|
4373 |
"bias": false,
|
4374 |
"enable_norm": true,
|
4375 |
+
"enable_perm": false,
|
4376 |
"group_num": 1,
|
4377 |
"group_size": 4096,
|
4378 |
"in_features": 4096,
|
|
|
4396 |
"model.layers.4.mlp.down_proj": {
|
4397 |
"bias": false,
|
4398 |
"enable_norm": true,
|
4399 |
+
"enable_perm": false,
|
4400 |
"group_num": 1,
|
4401 |
"group_size": 14336,
|
4402 |
"in_features": 14336,
|
|
|
4420 |
"model.layers.4.mlp.gate_proj": {
|
4421 |
"bias": false,
|
4422 |
"enable_norm": true,
|
4423 |
+
"enable_perm": false,
|
4424 |
"group_num": 1,
|
4425 |
"group_size": 4096,
|
4426 |
"in_features": 4096,
|
|
|
4444 |
"model.layers.4.mlp.up_proj": {
|
4445 |
"bias": false,
|
4446 |
"enable_norm": true,
|
4447 |
+
"enable_perm": false,
|
4448 |
"group_num": 1,
|
4449 |
"group_size": 4096,
|
4450 |
"in_features": 4096,
|
|
|
4468 |
"model.layers.4.self_attn.k_proj": {
|
4469 |
"bias": false,
|
4470 |
"enable_norm": true,
|
4471 |
+
"enable_perm": false,
|
4472 |
"group_num": 1,
|
4473 |
"group_size": 4096,
|
4474 |
"in_features": 4096,
|
|
|
4492 |
"model.layers.4.self_attn.o_proj": {
|
4493 |
"bias": false,
|
4494 |
"enable_norm": true,
|
4495 |
+
"enable_perm": false,
|
4496 |
"group_num": 1,
|
4497 |
"group_size": 4096,
|
4498 |
"in_features": 4096,
|
|
|
4516 |
"model.layers.4.self_attn.q_proj": {
|
4517 |
"bias": false,
|
4518 |
"enable_norm": true,
|
4519 |
+
"enable_perm": false,
|
4520 |
"group_num": 1,
|
4521 |
"group_size": 4096,
|
4522 |
"in_features": 4096,
|
|
|
4540 |
"model.layers.4.self_attn.v_proj": {
|
4541 |
"bias": false,
|
4542 |
"enable_norm": true,
|
4543 |
+
"enable_perm": false,
|
4544 |
"group_num": 1,
|
4545 |
"group_size": 4096,
|
4546 |
"in_features": 4096,
|
|
|
4564 |
"model.layers.5.mlp.down_proj": {
|
4565 |
"bias": false,
|
4566 |
"enable_norm": true,
|
4567 |
+
"enable_perm": false,
|
4568 |
"group_num": 1,
|
4569 |
"group_size": 14336,
|
4570 |
"in_features": 14336,
|
|
|
4588 |
"model.layers.5.mlp.gate_proj": {
|
4589 |
"bias": false,
|
4590 |
"enable_norm": true,
|
4591 |
+
"enable_perm": false,
|
4592 |
"group_num": 1,
|
4593 |
"group_size": 4096,
|
4594 |
"in_features": 4096,
|
|
|
4612 |
"model.layers.5.mlp.up_proj": {
|
4613 |
"bias": false,
|
4614 |
"enable_norm": true,
|
4615 |
+
"enable_perm": false,
|
4616 |
"group_num": 1,
|
4617 |
"group_size": 4096,
|
4618 |
"in_features": 4096,
|
|
|
4636 |
"model.layers.5.self_attn.k_proj": {
|
4637 |
"bias": false,
|
4638 |
"enable_norm": true,
|
4639 |
+
"enable_perm": false,
|
4640 |
"group_num": 1,
|
4641 |
"group_size": 4096,
|
4642 |
"in_features": 4096,
|
|
|
4660 |
"model.layers.5.self_attn.o_proj": {
|
4661 |
"bias": false,
|
4662 |
"enable_norm": true,
|
4663 |
+
"enable_perm": false,
|
4664 |
"group_num": 1,
|
4665 |
"group_size": 4096,
|
4666 |
"in_features": 4096,
|
|
|
4684 |
"model.layers.5.self_attn.q_proj": {
|
4685 |
"bias": false,
|
4686 |
"enable_norm": true,
|
4687 |
+
"enable_perm": false,
|
4688 |
"group_num": 1,
|
4689 |
"group_size": 4096,
|
4690 |
"in_features": 4096,
|
|
|
4708 |
"model.layers.5.self_attn.v_proj": {
|
4709 |
"bias": false,
|
4710 |
"enable_norm": true,
|
4711 |
+
"enable_perm": false,
|
4712 |
"group_num": 1,
|
4713 |
"group_size": 4096,
|
4714 |
"in_features": 4096,
|
|
|
4732 |
"model.layers.6.mlp.down_proj": {
|
4733 |
"bias": false,
|
4734 |
"enable_norm": true,
|
4735 |
+
"enable_perm": false,
|
4736 |
"group_num": 1,
|
4737 |
"group_size": 14336,
|
4738 |
"in_features": 14336,
|
|
|
4756 |
"model.layers.6.mlp.gate_proj": {
|
4757 |
"bias": false,
|
4758 |
"enable_norm": true,
|
4759 |
+
"enable_perm": false,
|
4760 |
"group_num": 1,
|
4761 |
"group_size": 4096,
|
4762 |
"in_features": 4096,
|
|
|
4780 |
"model.layers.6.mlp.up_proj": {
|
4781 |
"bias": false,
|
4782 |
"enable_norm": true,
|
4783 |
+
"enable_perm": false,
|
4784 |
"group_num": 1,
|
4785 |
"group_size": 4096,
|
4786 |
"in_features": 4096,
|
|
|
4804 |
"model.layers.6.self_attn.k_proj": {
|
4805 |
"bias": false,
|
4806 |
"enable_norm": true,
|
4807 |
+
"enable_perm": false,
|
4808 |
"group_num": 1,
|
4809 |
"group_size": 4096,
|
4810 |
"in_features": 4096,
|
|
|
4828 |
"model.layers.6.self_attn.o_proj": {
|
4829 |
"bias": false,
|
4830 |
"enable_norm": true,
|
4831 |
+
"enable_perm": false,
|
4832 |
"group_num": 1,
|
4833 |
"group_size": 4096,
|
4834 |
"in_features": 4096,
|
|
|
4852 |
"model.layers.6.self_attn.q_proj": {
|
4853 |
"bias": false,
|
4854 |
"enable_norm": true,
|
4855 |
+
"enable_perm": false,
|
4856 |
"group_num": 1,
|
4857 |
"group_size": 4096,
|
4858 |
"in_features": 4096,
|
|
|
4876 |
"model.layers.6.self_attn.v_proj": {
|
4877 |
"bias": false,
|
4878 |
"enable_norm": true,
|
4879 |
+
"enable_perm": false,
|
4880 |
"group_num": 1,
|
4881 |
"group_size": 4096,
|
4882 |
"in_features": 4096,
|
|
|
4900 |
"model.layers.7.mlp.down_proj": {
|
4901 |
"bias": false,
|
4902 |
"enable_norm": true,
|
4903 |
+
"enable_perm": false,
|
4904 |
"group_num": 1,
|
4905 |
"group_size": 14336,
|
4906 |
"in_features": 14336,
|
|
|
4924 |
"model.layers.7.mlp.gate_proj": {
|
4925 |
"bias": false,
|
4926 |
"enable_norm": true,
|
4927 |
+
"enable_perm": false,
|
4928 |
"group_num": 1,
|
4929 |
"group_size": 4096,
|
4930 |
"in_features": 4096,
|
|
|
4948 |
"model.layers.7.mlp.up_proj": {
|
4949 |
"bias": false,
|
4950 |
"enable_norm": true,
|
4951 |
+
"enable_perm": false,
|
4952 |
"group_num": 1,
|
4953 |
"group_size": 4096,
|
4954 |
"in_features": 4096,
|
|
|
4972 |
"model.layers.7.self_attn.k_proj": {
|
4973 |
"bias": false,
|
4974 |
"enable_norm": true,
|
4975 |
+
"enable_perm": false,
|
4976 |
"group_num": 1,
|
4977 |
"group_size": 4096,
|
4978 |
"in_features": 4096,
|
|
|
4996 |
"model.layers.7.self_attn.o_proj": {
|
4997 |
"bias": false,
|
4998 |
"enable_norm": true,
|
4999 |
+
"enable_perm": false,
|
5000 |
"group_num": 1,
|
5001 |
"group_size": 4096,
|
5002 |
"in_features": 4096,
|
|
|
5020 |
"model.layers.7.self_attn.q_proj": {
|
5021 |
"bias": false,
|
5022 |
"enable_norm": true,
|
5023 |
+
"enable_perm": false,
|
5024 |
"group_num": 1,
|
5025 |
"group_size": 4096,
|
5026 |
"in_features": 4096,
|
|
|
5044 |
"model.layers.7.self_attn.v_proj": {
|
5045 |
"bias": false,
|
5046 |
"enable_norm": true,
|
5047 |
+
"enable_perm": false,
|
5048 |
"group_num": 1,
|
5049 |
"group_size": 4096,
|
5050 |
"in_features": 4096,
|
|
|
5068 |
"model.layers.8.mlp.down_proj": {
|
5069 |
"bias": false,
|
5070 |
"enable_norm": true,
|
5071 |
+
"enable_perm": false,
|
5072 |
"group_num": 1,
|
5073 |
"group_size": 14336,
|
5074 |
"in_features": 14336,
|
|
|
5092 |
"model.layers.8.mlp.gate_proj": {
|
5093 |
"bias": false,
|
5094 |
"enable_norm": true,
|
5095 |
+
"enable_perm": false,
|
5096 |
"group_num": 1,
|
5097 |
"group_size": 4096,
|
5098 |
"in_features": 4096,
|
|
|
5116 |
"model.layers.8.mlp.up_proj": {
|
5117 |
"bias": false,
|
5118 |
"enable_norm": true,
|
5119 |
+
"enable_perm": false,
|
5120 |
"group_num": 1,
|
5121 |
"group_size": 4096,
|
5122 |
"in_features": 4096,
|
|
|
5140 |
"model.layers.8.self_attn.k_proj": {
|
5141 |
"bias": false,
|
5142 |
"enable_norm": true,
|
5143 |
+
"enable_perm": false,
|
5144 |
"group_num": 1,
|
5145 |
"group_size": 4096,
|
5146 |
"in_features": 4096,
|
|
|
5164 |
"model.layers.8.self_attn.o_proj": {
|
5165 |
"bias": false,
|
5166 |
"enable_norm": true,
|
5167 |
+
"enable_perm": false,
|
5168 |
"group_num": 1,
|
5169 |
"group_size": 4096,
|
5170 |
"in_features": 4096,
|
|
|
5188 |
"model.layers.8.self_attn.q_proj": {
|
5189 |
"bias": false,
|
5190 |
"enable_norm": true,
|
5191 |
+
"enable_perm": false,
|
5192 |
"group_num": 1,
|
5193 |
"group_size": 4096,
|
5194 |
"in_features": 4096,
|
|
|
5212 |
"model.layers.8.self_attn.v_proj": {
|
5213 |
"bias": false,
|
5214 |
"enable_norm": true,
|
5215 |
+
"enable_perm": false,
|
5216 |
"group_num": 1,
|
5217 |
"group_size": 4096,
|
5218 |
"in_features": 4096,
|
|
|
5236 |
"model.layers.9.mlp.down_proj": {
|
5237 |
"bias": false,
|
5238 |
"enable_norm": true,
|
5239 |
+
"enable_perm": false,
|
5240 |
"group_num": 1,
|
5241 |
"group_size": 14336,
|
5242 |
"in_features": 14336,
|
|
|
5260 |
"model.layers.9.mlp.gate_proj": {
|
5261 |
"bias": false,
|
5262 |
"enable_norm": true,
|
5263 |
+
"enable_perm": false,
|
5264 |
"group_num": 1,
|
5265 |
"group_size": 4096,
|
5266 |
"in_features": 4096,
|
|
|
5284 |
"model.layers.9.mlp.up_proj": {
|
5285 |
"bias": false,
|
5286 |
"enable_norm": true,
|
5287 |
+
"enable_perm": false,
|
5288 |
"group_num": 1,
|
5289 |
"group_size": 4096,
|
5290 |
"in_features": 4096,
|
|
|
5308 |
"model.layers.9.self_attn.k_proj": {
|
5309 |
"bias": false,
|
5310 |
"enable_norm": true,
|
5311 |
+
"enable_perm": false,
|
5312 |
"group_num": 1,
|
5313 |
"group_size": 4096,
|
5314 |
"in_features": 4096,
|
|
|
5332 |
"model.layers.9.self_attn.o_proj": {
|
5333 |
"bias": false,
|
5334 |
"enable_norm": true,
|
5335 |
+
"enable_perm": false,
|
5336 |
"group_num": 1,
|
5337 |
"group_size": 4096,
|
5338 |
"in_features": 4096,
|
|
|
5356 |
"model.layers.9.self_attn.q_proj": {
|
5357 |
"bias": false,
|
5358 |
"enable_norm": true,
|
5359 |
+
"enable_perm": false,
|
5360 |
"group_num": 1,
|
5361 |
"group_size": 4096,
|
5362 |
"in_features": 4096,
|
|
|
5380 |
"model.layers.9.self_attn.v_proj": {
|
5381 |
"bias": false,
|
5382 |
"enable_norm": true,
|
5383 |
+
"enable_perm": false,
|
5384 |
"group_num": 1,
|
5385 |
"group_size": 4096,
|
5386 |
"in_features": 4096,
|
|
|
5401 |
8
|
5402 |
]
|
5403 |
}
|
5404 |
+
},
|
5405 |
+
"quant_method": "vptq"
|
5406 |
+
},
|
5407 |
+
"rms_norm_eps": 1e-05,
|
5408 |
+
"rope_scaling": {
|
5409 |
+
"factor": 8.0,
|
5410 |
+
"high_freq_factor": 4.0,
|
5411 |
+
"low_freq_factor": 1.0,
|
5412 |
+
"original_max_position_embeddings": 8192,
|
5413 |
+
"rope_type": "llama3"
|
5414 |
+
},
|
5415 |
+
"rope_theta": 500000.0,
|
5416 |
+
"tie_word_embeddings": false,
|
5417 |
+
"torch_dtype": "bfloat16",
|
5418 |
+
"transformers_version": "4.49.0",
|
5419 |
+
"use_cache": true,
|
5420 |
+
"vocab_size": 128256
|
5421 |
+
}
|
generation_config.json
CHANGED
@@ -1,16 +1,10 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
"bos_token_id": 128000,
|
4 |
-
"do_sample": true,
|
5 |
"eos_token_id": [
|
6 |
128001,
|
7 |
128008,
|
8 |
128009
|
9 |
],
|
10 |
-
"
|
11 |
-
"num_assistant_tokens_schedule": "heuristic",
|
12 |
-
"return_legacy_cache": true,
|
13 |
-
"temperature": 0.6,
|
14 |
-
"top_p": 0.9,
|
15 |
-
"transformers_version": "4.45.1"
|
16 |
}
|
|
|
1 |
{
|
2 |
+
"_from_model_config": true,
|
3 |
"bos_token_id": 128000,
|
|
|
4 |
"eos_token_id": [
|
5 |
128001,
|
6 |
128008,
|
7 |
128009
|
8 |
],
|
9 |
+
"transformers_version": "4.49.0"
|
|
|
|
|
|
|
|
|
|
|
10 |
}
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf76e03e16f27cef164fcc14201d31aa6a0d855d06cef6061bc0ebeb4f47de17
|
3 |
+
size 4359343640
|
model.safetensors.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -2053,11 +2053,12 @@
|
|
2053 |
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
|
|
2056 |
"legacy": false,
|
2057 |
"model_input_names": [
|
2058 |
"input_ids",
|
2059 |
"attention_mask"
|
2060 |
],
|
2061 |
"model_max_length": 131072,
|
2062 |
-
"tokenizer_class": "
|
2063 |
}
|
|
|
2053 |
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
2056 |
+
"extra_special_tokens": {},
|
2057 |
"legacy": false,
|
2058 |
"model_input_names": [
|
2059 |
"input_ids",
|
2060 |
"attention_mask"
|
2061 |
],
|
2062 |
"model_max_length": 131072,
|
2063 |
+
"tokenizer_class": "PreTrainedTokenizer"
|
2064 |
}
|