Optional keyword argument attention_mask in Phi3Attention.forward in modeling_phi3.py
#13
by
bjodah
- opened
Hi!
I'm very new to LLMs so please bear with me in case I'm overlooking something obvious.
I'm attempting to do a quantization to AWQ format using autoawq. In the processes of doing so I got a TypeError: Phi3Attention.forward() missing 1 required positional argument: 'attention_mask'
Traceback
AWQ: 0%| | 0/32 [00:02<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[11], line 2
1 # Quantize
----> 2 model.quantize(
3 tokenizer,
4 quant_config=quant_config,
5 calib_data=calib_data,
6 n_parallel_calib_samples=32,
7 max_calib_samples=512,
8 max_calib_seq_len=2048
9 )
File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/venv/lib/python3.11/site-packages/awq/models/base.py:241, in BaseAWQForCausalLM.quantize(self, tokenizer, quant_config, calib_data, split, text_column, duo_scaling, export_compatible, apply_clip, n_parallel_calib_samples, max_calib_samples, max_calib_seq_len, max_chunk_memory, quantizer_cls, **kwargs)
218 self.quant_config.modules_to_not_convert = self.modules_to_not_convert
220 self.quantizer = quantizer_cls(
221 self,
222 self.model,
(...)
239 **kwargs,
240 )
--> 241 self.quantizer.quantize()
243 self.is_quantized = True
File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:179, in AwqQuantizer.quantize(self)
175 # [STEP 2]: Compute and apply scale list
176 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
177 self.modules[i], input_feat, self.module_kwargs
178 )
--> 179 scales_list = [
180 self._search_best_scale(self.modules[i], **layer)
181 for layer in module_config
182 ]
183 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
184 scales_list = append_str_prefix(
185 scales_list, get_op_name(self.model, self.modules[i]) + "."
186 )
File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:180, in <listcomp>(.0)
175 # [STEP 2]: Compute and apply scale list
176 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
177 self.modules[i], input_feat, self.module_kwargs
178 )
179 scales_list = [
--> 180 self._search_best_scale(self.modules[i], **layer)
181 for layer in module_config
182 ]
183 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
184 scales_list = append_str_prefix(
185 scales_list, get_op_name(self.model, self.modules[i]) + "."
186 )
File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:340, in AwqQuantizer._search_best_scale(self, module, prev_op, layers, inp, module2inspect, kwargs)
338 with torch.no_grad():
339 module_kwargs = self._sanitize_kwargs(kwargs, module2inspect)
--> 340 fp16_output = self._module_forward(inp, module2inspect, module_kwargs)
341 fp16_output = fp16_output.clip(torch.finfo(fp16_output.dtype).min, torch.finfo(fp16_output.dtype).max)
343 # [STEP 4]: Compute loss
File ~/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/venv/lib/python3.11/site-packages/awq/quantize/quantizer.py:269, in AwqQuantizer._module_forward(self, x, module, module_kwargs)
267 partitioned_inputs = torch.split(x, self.n_parallel_calib_samples)
268 for x_partial in partitioned_inputs:
--> 269 partial_output = module(x_partial, **module_kwargs)
271 if isinstance(partial_output, tuple):
272 partial_output = partial_output[0]
File ~/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
1749 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1750 else:
-> 1751 return self._call_impl(*args, **kwargs)
File ~/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1762, in Module._call_impl(self, *args, **kwargs)
1757 # If we don't have any hooks, we want to skip the rest of the logic in
1758 # this function, and just call forward.
1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1760 or _global_backward_pre_hooks or _global_backward_hooks
1761 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762 return forward_call(*args, **kwargs)
1764 result = None
1765 called_always_called_hooks = set()
TypeError: Phi3Attention.forward() missing 1 required positional argument: 'attention_mask'
If I simply add a default argument (=None
) to attention_mask
in Phi3Attention.forward (in modeling_phi3.py), then this error goes away. Since the type annotation is Optional[torch.Tensor], and I can see handling of attention_mask
being None
throughout the code, I thought this fix was appropriate. But perhaps I'm missing something?
All the best,
Björn