Update barks.py
Browse files
barks.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
import os
|
3 |
import torch
|
4 |
import torchaudio
|
@@ -15,12 +15,13 @@ import warnings
|
|
15 |
import random
|
16 |
from transformers import AutoProcessor, BarkModel
|
17 |
from accelerate import Accelerator
|
|
|
18 |
|
19 |
# Suppress warnings for cleaner output
|
20 |
warnings.filterwarnings("ignore")
|
21 |
|
22 |
# Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
|
23 |
-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:
|
24 |
|
25 |
# Check critical dependencies
|
26 |
if np.__version__ != "1.23.5":
|
@@ -35,14 +36,18 @@ if device != "cuda":
|
|
35 |
sys.exit(1)
|
36 |
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
37 |
|
38 |
-
# Initialize accelerator
|
39 |
-
accelerator = Accelerator(mixed_precision="fp16")
|
40 |
|
41 |
# Pre-run memory cleanup
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
torch.cuda.
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# 2) LOAD MODELS
|
48 |
try:
|
@@ -52,9 +57,9 @@ try:
|
|
52 |
print(f"ERROR: Local model path {local_model_path} does not exist.")
|
53 |
print("Please download the MusicGen medium model weights and place them in the correct directory.")
|
54 |
sys.exit(1)
|
55 |
-
musicgen_model = MusicGen.get_pretrained(local_model_path, device=
|
56 |
musicgen_model.set_generation_params(
|
57 |
-
duration=
|
58 |
two_step_cfg=False # Disable two-step CFG for stability
|
59 |
)
|
60 |
except Exception as e:
|
@@ -63,13 +68,12 @@ except Exception as e:
|
|
63 |
sys.exit(1)
|
64 |
|
65 |
try:
|
66 |
-
print("Loading Bark small model into system RAM...")
|
67 |
bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
68 |
-
bark_model = BarkModel.from_pretrained("suno/bark-small")
|
69 |
-
bark_model = bark_model.to("cpu") # Offload to CPU initially
|
70 |
except Exception as e:
|
71 |
print(f"ERROR: Failed to load Bark model: {e}")
|
72 |
-
print("Ensure Bark model weights
|
73 |
sys.exit(1)
|
74 |
|
75 |
# 3) RESOURCE MONITORING FUNCTION
|
@@ -78,15 +82,18 @@ def print_resource_usage(stage: str):
|
|
78 |
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
|
79 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
80 |
print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
|
|
|
81 |
print("---------------")
|
82 |
|
83 |
# Check available GPU memory
|
84 |
-
def check_vram_availability(required_gb=
|
85 |
total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
86 |
allocated_vram = torch.cuda.memory_allocated() / (1024**3)
|
87 |
available_vram = total_vram - allocated_vram
|
88 |
if available_vram < required_gb:
|
89 |
-
print(f"WARNING: Low VRAM available ({available_vram:.2f} GB
|
|
|
|
|
90 |
return available_vram >= required_gb
|
91 |
|
92 |
# 4) GENRE PROMPT FUNCTIONS
|
@@ -267,7 +274,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
|
|
267 |
try:
|
268 |
print("Generating vocals with Bark...")
|
269 |
# Move Bark model to GPU
|
270 |
-
bark_model =
|
271 |
|
272 |
# Process vocal prompt
|
273 |
inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
|
@@ -291,7 +298,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
|
|
291 |
|
292 |
# Move Bark model back to CPU
|
293 |
bark_model = bark_model.to("cpu")
|
294 |
-
|
295 |
|
296 |
return vocal_segment, "✅ Vocals generated successfully."
|
297 |
except Exception as e:
|
@@ -306,7 +313,7 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
306 |
try:
|
307 |
start_time = time.time()
|
308 |
total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
|
309 |
-
chunk_duration = min(max(chunk_duration, 5),
|
310 |
num_chunks = max(1, total_duration // chunk_duration)
|
311 |
chunk_duration = total_duration / num_chunks
|
312 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
@@ -314,14 +321,17 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
314 |
sample_rate = musicgen_model.sample_rate
|
315 |
audio_segments = []
|
316 |
|
317 |
-
if not check_vram_availability(required_gb=
|
318 |
-
return None, "⚠️ Insufficient VRAM for generation.
|
319 |
|
320 |
print("Generating instrumental audio...")
|
321 |
seed = 42
|
322 |
torch.manual_seed(seed)
|
323 |
np.random.seed(seed)
|
324 |
|
|
|
|
|
|
|
325 |
for i in range(num_chunks):
|
326 |
chunk_prompt = instrumental_prompt
|
327 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
@@ -360,13 +370,13 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
360 |
os.remove(temp_wav_path)
|
361 |
audio_segments.append(segment)
|
362 |
|
363 |
-
|
364 |
-
gc.collect()
|
365 |
-
torch.cuda.ipc_collect()
|
366 |
-
torch.cuda.synchronize()
|
367 |
-
time.sleep(0.5)
|
368 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
369 |
|
|
|
|
|
|
|
|
|
370 |
print("Combining instrumental chunks...")
|
371 |
final_segment = audio_segments[0]
|
372 |
for i in range(1, len(audio_segments)):
|
@@ -405,14 +415,11 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
405 |
except Exception as e:
|
406 |
return None, f"❌ Generation failed: {e}"
|
407 |
finally:
|
408 |
-
|
409 |
-
gc.collect()
|
410 |
-
torch.cuda.ipc_collect()
|
411 |
-
torch.cuda.synchronize()
|
412 |
|
413 |
# Function to clear inputs
|
414 |
def clear_inputs():
|
415 |
-
return "", "", 3.0, 250, 0.9, 1.0, 30,
|
416 |
|
417 |
# 8) CUSTOM CSS
|
418 |
css = """
|
@@ -560,7 +567,7 @@ with gr.Blocks(css=css) as demo:
|
|
560 |
maximum=1.0,
|
561 |
value=0.9,
|
562 |
step=0.05,
|
563 |
-
|
564 |
)
|
565 |
temperature = gr.Slider(
|
566 |
label="Temperature 🔥",
|
@@ -579,10 +586,10 @@ with gr.Blocks(css=css) as demo:
|
|
579 |
chunk_duration = gr.Slider(
|
580 |
label="Chunk Duration ⏱️ (seconds)",
|
581 |
minimum=5,
|
582 |
-
maximum=
|
583 |
-
value=
|
584 |
step=1,
|
585 |
-
info="Duration of each chunk to render (5 to
|
586 |
)
|
587 |
crossfade_duration = gr.Slider(
|
588 |
label="Crossfade Duration 🎶 (ms)",
|
@@ -686,3 +693,4 @@ try:
|
|
686 |
fastapi_app.openapi_url = None
|
687 |
except Exception:
|
688 |
pass
|
|
|
|
1 |
+
```python
|
2 |
import os
|
3 |
import torch
|
4 |
import torchaudio
|
|
|
15 |
import random
|
16 |
from transformers import AutoProcessor, BarkModel
|
17 |
from accelerate import Accelerator
|
18 |
+
import bitsandbytes as bnb
|
19 |
|
20 |
# Suppress warnings for cleaner output
|
21 |
warnings.filterwarnings("ignore")
|
22 |
|
23 |
# Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
|
24 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
|
25 |
|
26 |
# Check critical dependencies
|
27 |
if np.__version__ != "1.23.5":
|
|
|
36 |
sys.exit(1)
|
37 |
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
38 |
|
39 |
+
# Initialize accelerator with enhanced CPU offloading
|
40 |
+
accelerator = Accelerator(mixed_precision="fp16", cpu_offload=True)
|
41 |
|
42 |
# Pre-run memory cleanup
|
43 |
+
def aggressive_memory_cleanup():
|
44 |
+
torch.cuda.empty_cache()
|
45 |
+
gc.collect()
|
46 |
+
torch.cuda.ipc_collect()
|
47 |
+
torch.cuda.synchronize()
|
48 |
+
print("Performed aggressive memory cleanup.")
|
49 |
+
|
50 |
+
aggressive_memory_cleanup()
|
51 |
|
52 |
# 2) LOAD MODELS
|
53 |
try:
|
|
|
57 |
print(f"ERROR: Local model path {local_model_path} does not exist.")
|
58 |
print("Please download the MusicGen medium model weights and place them in the correct directory.")
|
59 |
sys.exit(1)
|
60 |
+
musicgen_model = MusicGen.get_pretrained(local_model_path, device="cpu") # Load to CPU initially
|
61 |
musicgen_model.set_generation_params(
|
62 |
+
duration=5, # Lower default chunk duration
|
63 |
two_step_cfg=False # Disable two-step CFG for stability
|
64 |
)
|
65 |
except Exception as e:
|
|
|
68 |
sys.exit(1)
|
69 |
|
70 |
try:
|
71 |
+
print("Loading Bark small model into system RAM with 4-bit quantization...")
|
72 |
bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
73 |
+
bark_model = BarkModel.from_pretrained("suno/bark-small", load_in_4bit=True, device_map="cpu") # Quantize and offload
|
|
|
74 |
except Exception as e:
|
75 |
print(f"ERROR: Failed to load Bark model: {e}")
|
76 |
+
print("Ensure Bark model weights and bitsandbytes are installed.")
|
77 |
sys.exit(1)
|
78 |
|
79 |
# 3) RESOURCE MONITORING FUNCTION
|
|
|
82 |
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
|
83 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
84 |
print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
|
85 |
+
print(f"System RAM Available: {psutil.virtual_memory().available / (1024**3):.2f} GB")
|
86 |
print("---------------")
|
87 |
|
88 |
# Check available GPU memory
|
89 |
+
def check_vram_availability(required_gb=3.0): # Lowered threshold
|
90 |
total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
91 |
allocated_vram = torch.cuda.memory_allocated() / (1024**3)
|
92 |
available_vram = total_vram - allocated_vram
|
93 |
if available_vram < required_gb:
|
94 |
+
print(f"WARNING: Low VRAM available ({available_vram:.2f} GB < {required_gb:.2f} GB required).")
|
95 |
+
print("Reduce total_duration, chunk_duration, or enable more CPU offloading.")
|
96 |
+
print(f"Total VRAM: {total_vram:.2f} GB, Available: {available_vram:.2f} GB")
|
97 |
return available_vram >= required_gb
|
98 |
|
99 |
# 4) GENRE PROMPT FUNCTIONS
|
|
|
274 |
try:
|
275 |
print("Generating vocals with Bark...")
|
276 |
# Move Bark model to GPU
|
277 |
+
bark_model = accelerator.prepare(bark_model)
|
278 |
|
279 |
# Process vocal prompt
|
280 |
inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
|
|
|
298 |
|
299 |
# Move Bark model back to CPU
|
300 |
bark_model = bark_model.to("cpu")
|
301 |
+
aggressive_memory_cleanup()
|
302 |
|
303 |
return vocal_segment, "✅ Vocals generated successfully."
|
304 |
except Exception as e:
|
|
|
313 |
try:
|
314 |
start_time = time.time()
|
315 |
total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
|
316 |
+
chunk_duration = min(max(chunk_duration, 5), 10) # Lower max to 10s
|
317 |
num_chunks = max(1, total_duration // chunk_duration)
|
318 |
chunk_duration = total_duration / num_chunks
|
319 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
|
|
321 |
sample_rate = musicgen_model.sample_rate
|
322 |
audio_segments = []
|
323 |
|
324 |
+
if not check_vram_availability(required_gb=3.0):
|
325 |
+
return None, "⚠️ Insufficient VRAM for generation. Try reducing total_duration or chunk_duration further."
|
326 |
|
327 |
print("Generating instrumental audio...")
|
328 |
seed = 42
|
329 |
torch.manual_seed(seed)
|
330 |
np.random.seed(seed)
|
331 |
|
332 |
+
# Move MusicGen to GPU
|
333 |
+
musicgen_model = accelerator.prepare(musicgen_model)
|
334 |
+
|
335 |
for i in range(num_chunks):
|
336 |
chunk_prompt = instrumental_prompt
|
337 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
|
|
370 |
os.remove(temp_wav_path)
|
371 |
audio_segments.append(segment)
|
372 |
|
373 |
+
aggressive_memory_cleanup()
|
|
|
|
|
|
|
|
|
374 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
375 |
|
376 |
+
# Move MusicGen back to CPU
|
377 |
+
musicgen_model = musicgen_model.to("cpu")
|
378 |
+
aggressive_memory_cleanup()
|
379 |
+
|
380 |
print("Combining instrumental chunks...")
|
381 |
final_segment = audio_segments[0]
|
382 |
for i in range(1, len(audio_segments)):
|
|
|
415 |
except Exception as e:
|
416 |
return None, f"❌ Generation failed: {e}"
|
417 |
finally:
|
418 |
+
aggressive_memory_cleanup()
|
|
|
|
|
|
|
419 |
|
420 |
# Function to clear inputs
|
421 |
def clear_inputs():
|
422 |
+
return "", "", 3.0, 250, 0.9, 1.0, 30, 5, 1000, 120, "none", "none", "none", "none", "none"
|
423 |
|
424 |
# 8) CUSTOM CSS
|
425 |
css = """
|
|
|
567 |
maximum=1.0,
|
568 |
value=0.9,
|
569 |
step=0.05,
|
570 |
+
pair_with="Keeps tokens with cumulative probability above p."
|
571 |
)
|
572 |
temperature = gr.Slider(
|
573 |
label="Temperature 🔥",
|
|
|
586 |
chunk_duration = gr.Slider(
|
587 |
label="Chunk Duration ⏱️ (seconds)",
|
588 |
minimum=5,
|
589 |
+
maximum=10,
|
590 |
+
value=5, # Lower default
|
591 |
step=1,
|
592 |
+
info="Duration of each chunk to render (5 to 10 seconds)."
|
593 |
)
|
594 |
crossfade_duration = gr.Slider(
|
595 |
label="Crossfade Duration 🎶 (ms)",
|
|
|
693 |
fastapi_app.openapi_url = None
|
694 |
except Exception:
|
695 |
pass
|
696 |
+
```
|