ghostai1 commited on
Commit
5dc6a06
·
verified ·
1 Parent(s): ee0211d

Update barks.py

Browse files
Files changed (1) hide show
  1. barks.py +43 -35
barks.py CHANGED
@@ -1,4 +1,4 @@
1
-
2
  import os
3
  import torch
4
  import torchaudio
@@ -15,12 +15,13 @@ import warnings
15
  import random
16
  from transformers import AutoProcessor, BarkModel
17
  from accelerate import Accelerator
 
18
 
19
  # Suppress warnings for cleaner output
20
  warnings.filterwarnings("ignore")
21
 
22
  # Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
23
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
24
 
25
  # Check critical dependencies
26
  if np.__version__ != "1.23.5":
@@ -35,14 +36,18 @@ if device != "cuda":
35
  sys.exit(1)
36
  print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
37
 
38
- # Initialize accelerator for offloading
39
- accelerator = Accelerator(mixed_precision="fp16")
40
 
41
  # Pre-run memory cleanup
42
- torch.cuda.empty_cache()
43
- gc.collect()
44
- torch.cuda.ipc_collect()
45
- torch.cuda.synchronize()
 
 
 
 
46
 
47
  # 2) LOAD MODELS
48
  try:
@@ -52,9 +57,9 @@ try:
52
  print(f"ERROR: Local model path {local_model_path} does not exist.")
53
  print("Please download the MusicGen medium model weights and place them in the correct directory.")
54
  sys.exit(1)
55
- musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
56
  musicgen_model.set_generation_params(
57
- duration=10, # Default chunk duration
58
  two_step_cfg=False # Disable two-step CFG for stability
59
  )
60
  except Exception as e:
@@ -63,13 +68,12 @@ except Exception as e:
63
  sys.exit(1)
64
 
65
  try:
66
- print("Loading Bark small model into system RAM...")
67
  bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
68
- bark_model = BarkModel.from_pretrained("suno/bark-small")
69
- bark_model = bark_model.to("cpu") # Offload to CPU initially
70
  except Exception as e:
71
  print(f"ERROR: Failed to load Bark model: {e}")
72
- print("Ensure Bark model weights are available and dependencies are installed.")
73
  sys.exit(1)
74
 
75
  # 3) RESOURCE MONITORING FUNCTION
@@ -78,15 +82,18 @@ def print_resource_usage(stage: str):
78
  print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
79
  print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
80
  print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
 
81
  print("---------------")
82
 
83
  # Check available GPU memory
84
- def check_vram_availability(required_gb=4.5): # Adjusted for MusicGen + Bark
85
  total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
86
  allocated_vram = torch.cuda.memory_allocated() / (1024**3)
87
  available_vram = total_vram - allocated_vram
88
  if available_vram < required_gb:
89
- print(f"WARNING: Low VRAM available ({available_vram:.2f} GB). Reduce total_duration or chunk_duration.")
 
 
90
  return available_vram >= required_gb
91
 
92
  # 4) GENRE PROMPT FUNCTIONS
@@ -267,7 +274,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
267
  try:
268
  print("Generating vocals with Bark...")
269
  # Move Bark model to GPU
270
- bark_model = bark_model.to(accelerator.device)
271
 
272
  # Process vocal prompt
273
  inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
@@ -291,7 +298,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
291
 
292
  # Move Bark model back to CPU
293
  bark_model = bark_model.to("cpu")
294
- torch.cuda.empty_cache()
295
 
296
  return vocal_segment, "✅ Vocals generated successfully."
297
  except Exception as e:
@@ -306,7 +313,7 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
306
  try:
307
  start_time = time.time()
308
  total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
309
- chunk_duration = min(max(chunk_duration, 5), 15)
310
  num_chunks = max(1, total_duration // chunk_duration)
311
  chunk_duration = total_duration / num_chunks
312
  overlap_duration = min(1.0, crossfade_duration / 1000.0)
@@ -314,14 +321,17 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
314
  sample_rate = musicgen_model.sample_rate
315
  audio_segments = []
316
 
317
- if not check_vram_availability(required_gb=4.5):
318
- return None, "⚠️ Insufficient VRAM for generation. Reduce total_duration or chunk_duration."
319
 
320
  print("Generating instrumental audio...")
321
  seed = 42
322
  torch.manual_seed(seed)
323
  np.random.seed(seed)
324
 
 
 
 
325
  for i in range(num_chunks):
326
  chunk_prompt = instrumental_prompt
327
  print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
@@ -360,13 +370,13 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
360
  os.remove(temp_wav_path)
361
  audio_segments.append(segment)
362
 
363
- torch.cuda.empty_cache()
364
- gc.collect()
365
- torch.cuda.ipc_collect()
366
- torch.cuda.synchronize()
367
- time.sleep(0.5)
368
  print_resource_usage(f"After Chunk {i+1} Generation")
369
 
 
 
 
 
370
  print("Combining instrumental chunks...")
371
  final_segment = audio_segments[0]
372
  for i in range(1, len(audio_segments)):
@@ -405,14 +415,11 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
405
  except Exception as e:
406
  return None, f"❌ Generation failed: {e}"
407
  finally:
408
- torch.cuda.empty_cache()
409
- gc.collect()
410
- torch.cuda.ipc_collect()
411
- torch.cuda.synchronize()
412
 
413
  # Function to clear inputs
414
  def clear_inputs():
415
- return "", "", 3.0, 250, 0.9, 1.0, 30, 10, 1000, 120, "none", "none", "none", "none", "none"
416
 
417
  # 8) CUSTOM CSS
418
  css = """
@@ -560,7 +567,7 @@ with gr.Blocks(css=css) as demo:
560
  maximum=1.0,
561
  value=0.9,
562
  step=0.05,
563
- info="Keeps tokens with cumulative probability above p."
564
  )
565
  temperature = gr.Slider(
566
  label="Temperature 🔥",
@@ -579,10 +586,10 @@ with gr.Blocks(css=css) as demo:
579
  chunk_duration = gr.Slider(
580
  label="Chunk Duration ⏱️ (seconds)",
581
  minimum=5,
582
- maximum=15,
583
- value=10,
584
  step=1,
585
- info="Duration of each chunk to render (5 to 15 seconds)."
586
  )
587
  crossfade_duration = gr.Slider(
588
  label="Crossfade Duration 🎶 (ms)",
@@ -686,3 +693,4 @@ try:
686
  fastapi_app.openapi_url = None
687
  except Exception:
688
  pass
 
 
1
+ ```python
2
  import os
3
  import torch
4
  import torchaudio
 
15
  import random
16
  from transformers import AutoProcessor, BarkModel
17
  from accelerate import Accelerator
18
+ import bitsandbytes as bnb
19
 
20
  # Suppress warnings for cleaner output
21
  warnings.filterwarnings("ignore")
22
 
23
  # Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
24
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
25
 
26
  # Check critical dependencies
27
  if np.__version__ != "1.23.5":
 
36
  sys.exit(1)
37
  print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
38
 
39
+ # Initialize accelerator with enhanced CPU offloading
40
+ accelerator = Accelerator(mixed_precision="fp16", cpu_offload=True)
41
 
42
  # Pre-run memory cleanup
43
+ def aggressive_memory_cleanup():
44
+ torch.cuda.empty_cache()
45
+ gc.collect()
46
+ torch.cuda.ipc_collect()
47
+ torch.cuda.synchronize()
48
+ print("Performed aggressive memory cleanup.")
49
+
50
+ aggressive_memory_cleanup()
51
 
52
  # 2) LOAD MODELS
53
  try:
 
57
  print(f"ERROR: Local model path {local_model_path} does not exist.")
58
  print("Please download the MusicGen medium model weights and place them in the correct directory.")
59
  sys.exit(1)
60
+ musicgen_model = MusicGen.get_pretrained(local_model_path, device="cpu") # Load to CPU initially
61
  musicgen_model.set_generation_params(
62
+ duration=5, # Lower default chunk duration
63
  two_step_cfg=False # Disable two-step CFG for stability
64
  )
65
  except Exception as e:
 
68
  sys.exit(1)
69
 
70
  try:
71
+ print("Loading Bark small model into system RAM with 4-bit quantization...")
72
  bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
73
+ bark_model = BarkModel.from_pretrained("suno/bark-small", load_in_4bit=True, device_map="cpu") # Quantize and offload
 
74
  except Exception as e:
75
  print(f"ERROR: Failed to load Bark model: {e}")
76
+ print("Ensure Bark model weights and bitsandbytes are installed.")
77
  sys.exit(1)
78
 
79
  # 3) RESOURCE MONITORING FUNCTION
 
82
  print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
83
  print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
84
  print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
85
+ print(f"System RAM Available: {psutil.virtual_memory().available / (1024**3):.2f} GB")
86
  print("---------------")
87
 
88
  # Check available GPU memory
89
+ def check_vram_availability(required_gb=3.0): # Lowered threshold
90
  total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
91
  allocated_vram = torch.cuda.memory_allocated() / (1024**3)
92
  available_vram = total_vram - allocated_vram
93
  if available_vram < required_gb:
94
+ print(f"WARNING: Low VRAM available ({available_vram:.2f} GB < {required_gb:.2f} GB required).")
95
+ print("Reduce total_duration, chunk_duration, or enable more CPU offloading.")
96
+ print(f"Total VRAM: {total_vram:.2f} GB, Available: {available_vram:.2f} GB")
97
  return available_vram >= required_gb
98
 
99
  # 4) GENRE PROMPT FUNCTIONS
 
274
  try:
275
  print("Generating vocals with Bark...")
276
  # Move Bark model to GPU
277
+ bark_model = accelerator.prepare(bark_model)
278
 
279
  # Process vocal prompt
280
  inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
 
298
 
299
  # Move Bark model back to CPU
300
  bark_model = bark_model.to("cpu")
301
+ aggressive_memory_cleanup()
302
 
303
  return vocal_segment, "✅ Vocals generated successfully."
304
  except Exception as e:
 
313
  try:
314
  start_time = time.time()
315
  total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
316
+ chunk_duration = min(max(chunk_duration, 5), 10) # Lower max to 10s
317
  num_chunks = max(1, total_duration // chunk_duration)
318
  chunk_duration = total_duration / num_chunks
319
  overlap_duration = min(1.0, crossfade_duration / 1000.0)
 
321
  sample_rate = musicgen_model.sample_rate
322
  audio_segments = []
323
 
324
+ if not check_vram_availability(required_gb=3.0):
325
+ return None, "⚠️ Insufficient VRAM for generation. Try reducing total_duration or chunk_duration further."
326
 
327
  print("Generating instrumental audio...")
328
  seed = 42
329
  torch.manual_seed(seed)
330
  np.random.seed(seed)
331
 
332
+ # Move MusicGen to GPU
333
+ musicgen_model = accelerator.prepare(musicgen_model)
334
+
335
  for i in range(num_chunks):
336
  chunk_prompt = instrumental_prompt
337
  print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
 
370
  os.remove(temp_wav_path)
371
  audio_segments.append(segment)
372
 
373
+ aggressive_memory_cleanup()
 
 
 
 
374
  print_resource_usage(f"After Chunk {i+1} Generation")
375
 
376
+ # Move MusicGen back to CPU
377
+ musicgen_model = musicgen_model.to("cpu")
378
+ aggressive_memory_cleanup()
379
+
380
  print("Combining instrumental chunks...")
381
  final_segment = audio_segments[0]
382
  for i in range(1, len(audio_segments)):
 
415
  except Exception as e:
416
  return None, f"❌ Generation failed: {e}"
417
  finally:
418
+ aggressive_memory_cleanup()
 
 
 
419
 
420
  # Function to clear inputs
421
  def clear_inputs():
422
+ return "", "", 3.0, 250, 0.9, 1.0, 30, 5, 1000, 120, "none", "none", "none", "none", "none"
423
 
424
  # 8) CUSTOM CSS
425
  css = """
 
567
  maximum=1.0,
568
  value=0.9,
569
  step=0.05,
570
+ pair_with="Keeps tokens with cumulative probability above p."
571
  )
572
  temperature = gr.Slider(
573
  label="Temperature 🔥",
 
586
  chunk_duration = gr.Slider(
587
  label="Chunk Duration ⏱️ (seconds)",
588
  minimum=5,
589
+ maximum=10,
590
+ value=5, # Lower default
591
  step=1,
592
+ info="Duration of each chunk to render (5 to 10 seconds)."
593
  )
594
  crossfade_duration = gr.Slider(
595
  label="Crossfade Duration 🎶 (ms)",
 
693
  fastapi_app.openapi_url = None
694
  except Exception:
695
  pass
696
+ ```