ammarnasr
/

t5mimo-bare-conv

Feature Extraction

Model card Files Files and versions Community

ammarnasr commited on Sep 10, 2024

Commit

3407056

·

verified ·

1 Parent(s): 5d50178

Upload model

Files changed (1) hide show

modeling_t5mimo.py +12 -6

modeling_t5mimo.py CHANGED Viewed

@@ -947,14 +947,20 @@ class T5Stack(T5PreTrainedModel):
             if encoder_attention_mask is None:
                 encoder_attention_mask = torch.ones(encoder_hidden_shape, device=inputs_embeds.device, dtype=torch.long)
-            if self.config.is_mimo:
-                encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
-                encoder_extended_attention_mask = encoder_extended_attention_mask.unsqueeze(0)
-                encoder_extended_attention_mask = encoder_extended_attention_mask.repeat(1, input_shape[1], 1, 1, 1)
             else:
-                encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
         else:
             encoder_extended_attention_mask = None

             if encoder_attention_mask is None:
                 encoder_attention_mask = torch.ones(encoder_hidden_shape, device=inputs_embeds.device, dtype=torch.long)
+                if self.config.is_mimo:
+                    encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+                    encoder_extended_attention_mask = encoder_extended_attention_mask.unsqueeze(0)
+                    encoder_extended_attention_mask = encoder_extended_attention_mask.repeat(1, input_shape[1], 1, 1, 1)
+                else:
+                    encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
             else:
+                if self.config.is_mimo:
+                    encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+                    encoder_extended_attention_mask = encoder_extended_attention_mask.permute(0, 2, 1, 3)
+                    encoder_extended_attention_mask = encoder_extended_attention_mask.unsqueeze(3)
+                else:
+                    encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
         else:
             encoder_extended_attention_mask = None