test_stage: obcq_modifiers: LogarithmicEqualizationModifier: mappings: [ [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"], ] QuantizationModifier: ignore: # These operations don't make sense to quantize - LlamaRotaryEmbedding - LlamaRMSNorm - SiLUActivation - MatMulOutput_QK - MatMulOutput_PV # Skip quantizing the layers with the most sensitive activations - model.layers.21.mlp.down_proj - model.layers.7.mlp.down_proj - model.layers.2.mlp.down_proj - model.layers.8.self_attn.q_proj - model.layers.8.self_attn.k_proj post_oneshot_calibration: false scheme_overrides: Linear: weights: num_bits: 8 symmetric: true strategy: channel MatMulLeftInput_QK: input_activations: num_bits: 8 symmetric: true Embedding: input_activations: null weights: num_bits: 8 symmetric: false