Transformers
PyTorch
mamba2-130m / config.json
Wauplin's picture
Wauplin HF Staff
Push model using huggingface_hub.
68d6937 verified
raw
history blame
552 Bytes
{
"A_init_range": [
1,
16
],
"D_has_hdim": false,
"bias": false,
"chunk_size": 256,
"conv_bias": true,
"conv_init": null,
"d_conv": 4,
"d_model": 768,
"d_ssm": null,
"d_state": 128,
"device": null,
"dt_init_floor": 0.0001,
"dt_limit": [
0.0,
Infinity
],
"dt_max": 0.1,
"dt_min": 0.001,
"dtype": null,
"expand": 2,
"headdim": 64,
"layer_idx": null,
"ngroups": 1,
"norm_before_gate": false,
"process_group": null,
"rmsnorm": true,
"sequence_parallel": true,
"use_mem_eff_path": true
}