microsoft/Phi-4-mini-instruct · Optional keyword argument attention_mask in Phi3Attention.forward in modeling

Hi!

I'm very new to LLMs so please bear with me in case I'm overlooking something obvious.
I'm attempting to do a quantization to AWQ format using autoawq. In the processes of doing so I got a TypeError: Phi3Attention.forward() missing 1 required positional argument: 'attention_mask'

Traceback

AWQ:   0%|                                                                                                                  | 0/32 [00:02<?, ?it/s]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[11], line 2
      1 # Quantize
----> 2 model.quantize(
      3     tokenizer,
      4     quant_config=quant_config,
      5     calib_data=calib_data,
      6     n_parallel_calib_samples=32,
      7     max_calib_samples=512,
      8     max_calib_seq_len=2048
      9 )

File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/venv/lib/python3.11/site-packages/awq/models/base.py:241, in BaseAWQForCausalLM.quantize(self, tokenizer, quant_config, calib_data, split, text_column, duo_scaling, export_compatible, apply_clip, n_parallel_calib_samples, max_calib_samples, max_calib_seq_len, max_chunk_memory, quantizer_cls, **kwargs)
    218     self.quant_config.modules_to_not_convert = self.modules_to_not_convert
    220 self.quantizer = quantizer_cls(
    221     self,
    222     self.model,
   (...)
    239     **kwargs,
    240 )
--> 241 self.quantizer.quantize()
    243 self.is_quantized = True

File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:179, in AwqQuantizer.quantize(self)
    175 # [STEP 2]: Compute and apply scale list
    176 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
    177     self.modules[i], input_feat, self.module_kwargs
    178 )
--> 179 scales_list = [
    180     self._search_best_scale(self.modules[i], **layer)
    181     for layer in module_config
    182 ]
    183 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
    184 scales_list = append_str_prefix(
    185     scales_list, get_op_name(self.model, self.modules[i]) + "."
    186 )

File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:180, in <listcomp>(.0)
    175 # [STEP 2]: Compute and apply scale list
    176 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
    177     self.modules[i], input_feat, self.module_kwargs
    178 )
    179 scales_list = [
--> 180     self._search_best_scale(self.modules[i], **layer)
    181     for layer in module_config
    182 ]
    183 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
    184 scales_list = append_str_prefix(
    185     scales_list, get_op_name(self.model, self.modules[i]) + "."
    186 )

File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:340, in AwqQuantizer._search_best_scale(self, module, prev_op, layers, inp, module2inspect, kwargs)
    338 with torch.no_grad():
    339     module_kwargs = self._sanitize_kwargs(kwargs, module2inspect)
--> 340     fp16_output = self._module_forward(inp, module2inspect, module_kwargs)
    341     fp16_output = fp16_output.clip(torch.finfo(fp16_output.dtype).min, torch.finfo(fp16_output.dtype).max)
    343 # [STEP 4]: Compute loss

File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:269, in AwqQuantizer._module_forward(self, x, module, module_kwargs)
    267 partitioned_inputs = torch.split(x, self.n_parallel_calib_samples)
    268 for x_partial in partitioned_inputs:
--> 269     partial_output = module(x_partial, **module_kwargs)
    271     if isinstance(partial_output, tuple):
    272         partial_output = partial_output[0]

File ~/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
   1749     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1750 else:
-> 1751     return self._call_impl(*args, **kwargs)

File ~/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1762, in Module._call_impl(self, *args, **kwargs)
   1757 # If we don't have any hooks, we want to skip the rest of the logic in
   1758 # this function, and just call forward.
   1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1760         or _global_backward_pre_hooks or _global_backward_hooks
   1761         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762     return forward_call(*args, **kwargs)
   1764 result = None
   1765 called_always_called_hooks = set()

TypeError: Phi3Attention.forward() missing 1 required positional argument: 'attention_mask'

If I simply add a default argument (=None) to attention_mask in Phi3Attention.forward (in modeling_phi3.py), then this error goes away. Since the type annotation is Optional[torch.Tensor], and I can see handling of attention_mask being None throughout the code, I thought this fix was appropriate. But perhaps I'm missing something?

All the best,
Björn

microsoft
/

Phi-4-mini-instruct

Optional keyword argument attention_mask in Phi3Attention.forward in modeling_phi3.py