File size: 2,020 Bytes
99f5f18 998ded6 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 99f5f18 0532a30 6b57bf2 99f5f18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
{
"architectures": [
"Zamba2ForCausalLM"
],
"adapter_rank": 128,
"add_bias_linear": false,
"attention_dropout": 0.0,
"attention_head_dim": 160,
"attention_hidden_size": 5120,
"bos_token_id": 1,
"chunk_size": 256,
"eos_token_id": 2,
"ffn_hidden_size": 10240,
"hidden_act": "gelu",
"hidden_size": 2560,
"hybrid_layer_ids": [
6,
12,
18,
24,
30,
36,
42,
47,
51
],
"initializer_range": 0.02,
"intermediate_size": 10240,
"kv_channels": 80,
"layers_block_type": [
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba"
],
"mamba_d_conv": 4,
"mamba_d_state": 64,
"mamba_expand": 2,
"mamba_headdim": 64,
"mamba_ngroups": 1,
"max_position_embeddings": 4096,
"model_type": "zamba2",
"n_mamba_heads": 80,
"num_attention_heads": 32,
"num_hidden_layers": 54,
"num_key_value_heads": 32,
"num_logits_to_keep": 1,
"num_mem_blocks": 2,
"num_query_groups": 32,
"pad_token_id": 0,
"rms_norm_eps": 1e-05,
"rope_theta": 10000,
"time_step_floor": 0.0001,
"time_step_limit": null,
"time_step_max": 0.1,
"time_step_min": 0.001,
"transformers_version": "4.49.0.dev0",
"use_cache": true,
"use_conv_bias": true,
"use_long_context": false,
"use_mem_rope": false,
"use_shared_attention_adapter": false,
"use_shared_mlp_adapter": true,
"vocab_size": 32000
}
|