ghostai1
/

GHOSTSONAFB

English

python

Model card Files Files and versions

xet

Community

ghostai1 commited on 12 days ago

Commit

c3a53a3

verified ·

1 Parent(s): ebff558

Update 8bitapp.py

Browse files

Files changed (1) hide show

8bitapp.py +15 -8

8bitapp.py CHANGED Viewed

@@ -13,7 +13,6 @@ from torch.cuda.amp import autocast
 import warnings
 import random
 from bitsandbytes.nn import Linear8bitLt
-from transformers import AutoModel
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore")
@@ -52,24 +51,32 @@ try:
     # Load MusicGen model in FP16
     musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
-    # Apply 8-bit quantization to linear layers
     def quantize_to_8bit(model):
-        for name, module in model.named_modules():
             if isinstance(module, torch.nn.Linear):
                 # Replace with 8-bit linear layer
-                parent = model
-                for part in name.split('.')[:-1]:
                     parent = getattr(parent, part)
-                setattr(parent, name.split('.')[-1], Linear8bitLt(
                     module.in_features,
                     module.out_features,
                     bias=module.bias is not None,
                     has_fp16_weights=False,
                     threshold=6.0
                 ))
         return model
-    # Quantize the model (apply to relevant transformer layers)
     musicgen_model = quantize_to_8bit(musicgen_model)
     musicgen_model.to(device)
@@ -94,7 +101,7 @@ def print_resource_usage(stage: str):
     print("---------------")
 # Check available GPU memory
-def check_vram_availability(required_gb=3.5):
     """Check if sufficient VRAM is available for audio generation."""
     total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
     allocated_vram = torch.cuda.memory_allocated() / (1024**3)

 import warnings
 import random
 from bitsandbytes.nn import Linear8bitLt
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore")
     # Load MusicGen model in FP16
     musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
+    # Apply 8-bit quantization to the language model (lm) component
     def quantize_to_8bit(model):
+        # Target the lm (language model) attribute, which contains the transformer
+        if not hasattr(model, 'lm'):
+            raise AttributeError("MusicGen model does not have 'lm' attribute for quantization.")
+        lm = model.lm
+        quantized_layers = 0
+        for name, module in lm.named_modules():
             if isinstance(module, torch.nn.Linear):
                 # Replace with 8-bit linear layer
+                parent = lm
+                name_parts = name.split('.')
+                for part in name_parts[:-1]:
                     parent = getattr(parent, part)
+                setattr(parent, name_parts[-1], Linear8bitLt(
                     module.in_features,
                     module.out_features,
                     bias=module.bias is not None,
                     has_fp16_weights=False,
                     threshold=6.0
                 ))
+                quantized_layers += 1
+        print(f"Quantized {quantized_layers} linear layers to 8-bit.")
         return model
+    # Quantize the model
     musicgen_model = quantize_to_8bit(musicgen_model)
     musicgen_model.to(device)
     print("---------------")
 # Check available GPU memory
+def check_vram_availability(required_gb=4.0):
     """Check if sufficient VRAM is available for audio generation."""
     total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
     allocated_vram = torch.cuda.memory_allocated() / (1024**3)