{ | |
"A_init_range": [ | |
1, | |
16 | |
], | |
"D_has_hdim": false, | |
"bias": false, | |
"chunk_size": 256, | |
"conv_bias": true, | |
"conv_init": null, | |
"d_conv": 4, | |
"d_model": 768, | |
"d_ssm": null, | |
"d_state": 128, | |
"device": null, | |
"dt_init_floor": 0.0001, | |
"dt_limit": [ | |
0.0, | |
Infinity | |
], | |
"dt_max": 0.1, | |
"dt_min": 0.001, | |
"dtype": null, | |
"expand": 2, | |
"headdim": 64, | |
"layer_idx": null, | |
"ngroups": 1, | |
"norm_before_gate": false, | |
"process_group": null, | |
"rmsnorm": true, | |
"sequence_parallel": true, | |
"use_mem_eff_path": true | |
} |