|
|
|
import os |
|
import torch |
|
import torchaudio |
|
import psutil |
|
import time |
|
import sys |
|
import numpy as np |
|
import gc |
|
import gradio as gr |
|
from pydub import AudioSegment |
|
from audiocraft.models import MusicGen |
|
from torch.cuda.amp import autocast |
|
import warnings |
|
import random |
|
from transformers import AutoProcessor, BarkModel |
|
from accelerate import Accelerator |
|
from transformers import BitsAndBytesConfig |
|
import bitsandbytes as bnb |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64" |
|
|
|
|
|
if np.__version__ != "1.23.5": |
|
print(f"WARNING: NumPy version {np.__version__} is being used. Tested with numpy==1.23.5.") |
|
if not torch.__version__.startswith(("2.1.0", "2.3.1")): |
|
print(f"WARNING: PyTorch version {torch.__version__} may not be compatible. Expected torch==2.1.0 or 2.3.1.") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
if device != "cuda": |
|
print("ERROR: CUDA is required for GPU rendering. CPU rendering is disabled.") |
|
sys.exit(1) |
|
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}") |
|
|
|
|
|
accelerator = Accelerator(mixed_precision="fp16") |
|
|
|
|
|
def memory_cleanup(): |
|
torch.cuda.empty_cache() |
|
gc.collect() |
|
print("Performed memory cleanup.") |
|
|
|
memory_cleanup() |
|
|
|
|
|
try: |
|
print("Loading MusicGen medium model into GPU VRAM...") |
|
local_model_path = "./models/musicgen-medium" |
|
if not os.path.exists(local_model_path): |
|
print(f"ERROR: Local model path {local_model_path} does not exist.") |
|
print("Please download the MusicGen medium model weights and place them in the correct directory.") |
|
sys.exit(1) |
|
musicgen_model = MusicGen.get_pretrained(local_model_path, device="cuda") |
|
musicgen_model.set_generation_params( |
|
duration=5, |
|
two_step_cfg=False |
|
) |
|
except Exception as e: |
|
print(f"ERROR: Failed to load MusicGen model: {e}") |
|
print("Ensure model weights are correctly placed and dependencies are installed.") |
|
sys.exit(1) |
|
|
|
try: |
|
print("Loading Bark small model into GPU VRAM with 4-bit quantization...") |
|
|
|
quantization_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_compute_dtype=torch.float16, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_use_double_quant=True |
|
) |
|
bark_processor = AutoProcessor.from_pretrained("suno/bark-small") |
|
bark_model = BarkModel.from_pretrained( |
|
"suno/bark-small", |
|
quantization_config=quantization_config, |
|
device_map="cuda" |
|
) |
|
except Exception as e: |
|
print(f"ERROR: Failed to load Bark model: {e}") |
|
print("Ensure Bark model weights and bitsandbytes are installed.") |
|
sys.exit(1) |
|
|
|
|
|
def print_resource_usage(stage: str): |
|
print(f"--- {stage} ---") |
|
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB") |
|
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB") |
|
print(f"CPU Memory Used: {psutil.virtual_memory().percent}%") |
|
print(f"System RAM Available: {psutil.virtual_memory().available / (1024**3):.2f} GB") |
|
print("---------------") |
|
|
|
|
|
def check_vram_availability(required_gb=3.0): |
|
total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3) |
|
allocated_vram = torch.cuda.memory_allocated() / (1024**3) |
|
available_vram = total_vram - allocated_vram |
|
if available_vram < required_gb: |
|
print(f"WARNING: Low VRAM available ({available_vram:.2f} GB < {required_gb:.2f} GB required).") |
|
print("Reduce total_duration, chunk_duration, or skip vocals.") |
|
print(f"Total VRAM: {total_vram:.2f} GB, Available: {available_vram:.2f} GB") |
|
return available_vram >= required_gb |
|
|
|
|
|
def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("strong rhythmic steps" if bpm > 120 else "groovy rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines" |
|
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs" |
|
return f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Red Hot Chili Peppers-inspired vibe with dynamic energy and funky breakdowns, {rhythm} at {bpm} BPM." |
|
|
|
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines" |
|
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs" |
|
return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Nirvana-inspired angst-filled sound with quiet-loud dynamics, {rhythm} at {bpm} BPM." |
|
|
|
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", deep bass" |
|
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads" |
|
return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Pearl Jam-inspired emotional intensity with soaring choruses, {rhythm} at {bpm} BPM." |
|
|
|
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else "" |
|
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs" |
|
return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Soundgarden-inspired dark, psychedelic edge, {rhythm} at {bpm} BPM." |
|
|
|
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"] |
|
tempos = ["upbeat", "mid-tempo", "high-energy"] |
|
moods = ["energetic", "introspective", "rebellious", "uplifting"] |
|
style = random.choice(styles) |
|
tempo = random.choice(tempos) |
|
mood = random.choice(moods) |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("powerful rhythmic steps" if bpm > 120 else "catchy rhythmic groove") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else "" |
|
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs" |
|
return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Foo Fighters-inspired {mood} vibe with powerful choruses, {rhythm} at {bpm} BPM." |
|
|
|
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else "" |
|
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures" |
|
return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Smashing Pumpkins-inspired blend of melancholy and aggression, {rhythm} at {bpm} BPM." |
|
|
|
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths" |
|
bass = f", {bass_style}" if bass_style != "none" else "" |
|
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers" |
|
return f"Instrumental experimental rock{bass}{guitar}{drum}{synth}, Radiohead-inspired blend of introspective and innovative soundscapes, {rhythm} at {bpm} BPM." |
|
|
|
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", groovy bass" |
|
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars" |
|
return f"Instrumental classic rock{bass}{guitar}{drum}{synth}, Led Zeppelin-inspired raw energy with dynamic solos, {rhythm} at {bpm} BPM." |
|
|
|
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines" |
|
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs" |
|
return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Pixies-inspired quirky, energetic vibe, {rhythm} at {bpm} BPM." |
|
|
|
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines" |
|
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars" |
|
return f"Instrumental post-punk{bass}{guitar}{drum}{synth}, Joy Division-inspired moody, atmospheric sound, {rhythm} at {bpm} BPM." |
|
|
|
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else "" |
|
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars" |
|
return f"Instrumental indie rock{bass}{guitar}{drum}{synth}, Arctic Monkeys-inspired blend of catchy riffs, {rhythm} at {bpm} BPM." |
|
|
|
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else "" |
|
bass = f", {bass_style}" if bass_style != "none" else ", slap bass" |
|
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords" |
|
return f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Rage Against the Machine-inspired mix of groove and aggression, {rhythm} at {bpm} BPM." |
|
|
|
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else ", crisp hi-hats" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths" |
|
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines" |
|
guitar = f", {guitar_style} guitars" if guitar_style != "none" else "" |
|
return f"Instrumental Detroit techno{bass}{guitar}{drum}{synth}, Juan Atkins-inspired rhythmic groove, {rhythm} at {bpm} BPM." |
|
|
|
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style): |
|
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow") |
|
drum = f", {drum_beat} drums" if drum_beat != "none" else "" |
|
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords" |
|
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines" |
|
guitar = f", {guitar_style} guitars" if guitar_style != "none" else "" |
|
return f"Instrumental deep house{bass}{guitar}{drum}{synth}, Larry Heard-inspired laid-back groove, {rhythm} at {bpm} BPM." |
|
|
|
|
|
def set_upbeat_funk_rock_vocal_prompt(): |
|
return """[Verse 1, upbeat funk rock, male voice] |
|
Cruisin' down the highway, feel the summer breeze, |
|
Funky rhythm in my bones, movin' with ease, |
|
City lights are callin', gonna dance all night, |
|
Livin' for the groove, everything feels right! |
|
|
|
[Chorus, energetic] |
|
Oh-oh-oh, let the funk take control, |
|
Shake your body, let it free your soul, |
|
Oh-oh-oh, we're burnin' up the stage, |
|
Funk it up, we're livin' for the rage!""" |
|
|
|
def set_grunge_ballad_vocal_prompt(): |
|
return """[Verse 1 β Soft, Grunge Ballad, male voice] |
|
Shadows fall across my heart, I'm lost in the rain, |
|
Whispers of a broken dream, carry all my pain. |
|
Underneath the weight of time, Iβm fading away, |
|
Searching for a spark to light another day. |
|
|
|
[Chorus β Intense, male voice] |
|
Scream it out, let the silence break, |
|
Feel the fire, for my soulβs sake. |
|
Hold me now, through the endless night, |
|
In the dark, Iβm reaching for the light! |
|
|
|
[Verse 2 β Building Intensity, male voice] |
|
Cracks appear in my reflection, truth I can't deny, |
|
Memories like ghosts surround, no matter how I try. |
|
Each step forward feels like I'm walking through the past, |
|
Chasing echoes of a peace that never seems to last. |
|
|
|
[Chorus β Intensified, male voice] |
|
Scream it out, let the silence break, |
|
Feel the fire, for my soulβs sake. |
|
Hold me now, through the endless night, |
|
In the dark, Iβm reaching for the light! |
|
|
|
[Bridge β Emotional Climax, male voice] |
|
Iβve been down this road before, |
|
Locked behind a closing door. |
|
But even in the blackest shade, |
|
A flicker of hope refuses to fade. |
|
|
|
[Verse 3 β Reflective, male voice] |
|
Rain-soaked streets and neon signs, |
|
Mark the path of these troubled times. |
|
Yet amidst the storm and strife, |
|
I find fragments of a former life. |
|
|
|
[Chorus β Final, Powerful, male voice] |
|
Scream it out, let the silence break, |
|
Feel the fire, for my soulβs sake. |
|
Hold me now, through the endless night, |
|
In the dark, Iβm reaching for the light! |
|
|
|
[Outro β Soft, Resolute, male voice] |
|
Though shadows linger and nights are long, |
|
Within my soul, I find a song. |
|
A melody of hope, burning bright, |
|
Guiding me onward, into the light.""" |
|
|
|
def set_indie_pop_vocal_prompt(): |
|
return """[Verse 1, indie pop, female voice] |
|
Walking through the neon streets, where the city sings, |
|
Chasing every fleeting star, on these fragile wings, |
|
Heartbeat like a drum machine, pulsing through the air, |
|
Every momentβs electric, love is everywhere. |
|
|
|
[Chorus, upbeat] |
|
Oh, weβre dancing in the glow, under moonlit skies, |
|
Spinning through the colors, with sparks in our eyes, |
|
Oh, the night is ours to keep, let the music play, |
|
Living for this feeling, weβll never fade away!""" |
|
|
|
|
|
def apply_eq(segment): |
|
segment = segment.low_pass_filter(8000) |
|
segment = segment.high_pass_filter(80) |
|
return segment |
|
|
|
def apply_fade(segment, fade_in_duration=1000, fade_out_duration=1000): |
|
segment = segment.fade_in(fade_in_duration) |
|
segment = segment.fade_out(fade_out_duration) |
|
return segment |
|
|
|
def generate_vocals(vocal_prompt: str, total_duration: int, speaker_preset: str): |
|
global bark_model, bark_processor |
|
if not vocal_prompt.strip(): |
|
return None, "β οΈ Please enter a valid vocal prompt!" |
|
|
|
try: |
|
print("Generating vocals with Bark...") |
|
|
|
if speaker_preset != "default": |
|
vocal_prompt = f"[{speaker_preset}] {vocal_prompt}" |
|
|
|
|
|
tokens = bark_processor.tokenize(vocal_prompt) |
|
token_ids = bark_processor.convert_tokens_to_ids(tokens) |
|
if len(token_ids) > 512: |
|
print("WARNING: Vocal prompt exceeds 512 tokens; truncating to avoid errors.") |
|
vocal_prompt = bark_processor.decode(token_ids[:512]) |
|
|
|
|
|
inputs = bark_processor( |
|
vocal_prompt, |
|
return_tensors="pt", |
|
return_attention_mask=True |
|
).to("cuda") |
|
|
|
|
|
pad_token_id = 0 |
|
|
|
with torch.no_grad(), autocast(): |
|
vocal_array = bark_model.generate( |
|
input_ids=inputs["input_ids"], |
|
attention_mask=inputs["attention_mask"], |
|
do_sample=True, |
|
pad_token_id=pad_token_id |
|
) |
|
|
|
|
|
vocal_array = vocal_array.cpu().numpy().squeeze() |
|
sample_rate = 24000 |
|
temp_vocal_path = "temp_vocal.wav" |
|
|
|
vocal_tensor = torch.tensor(vocal_array, dtype=torch.float32).unsqueeze(0) |
|
torchaudio.save(temp_vocal_path, vocal_tensor, sample_rate) |
|
vocal_segment = AudioSegment.from_wav(temp_vocal_path) |
|
os.remove(temp_vocal_path) |
|
|
|
|
|
vocal_segment = vocal_segment[:total_duration * 1000] |
|
if len(vocal_segment) < total_duration * 1000: |
|
vocal_segment = vocal_segment + AudioSegment.silent(duration=(total_duration * 1000 - len(vocal_segment))) |
|
|
|
memory_cleanup() |
|
|
|
return vocal_segment, "β
Vocals generated successfully." |
|
except Exception as e: |
|
return None, f"β Vocal generation failed: {e}" |
|
|
|
|
|
def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, chunk_duration: int, crossfade_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, speaker_preset: str): |
|
global musicgen_model |
|
if not instrumental_prompt.strip(): |
|
return None, "β οΈ Please enter a valid instrumental prompt!" |
|
|
|
try: |
|
start_time = time.time() |
|
total_duration = total_duration |
|
chunk_duration = min(max(chunk_duration, 5), 10) |
|
num_chunks = max(1, total_duration // chunk_duration) |
|
chunk_duration = total_duration / num_chunks |
|
overlap_duration = min(1.0, crossfade_duration / 1000.0) |
|
generation_duration = chunk_duration + overlap_duration |
|
sample_rate = musicgen_model.sample_rate |
|
audio_segments = [] |
|
|
|
if not check_vram_availability(required_gb=3.0): |
|
return None, "β οΈ Insufficient VRAM for generation. Try reducing total_duration or chunk_duration further." |
|
|
|
print("Generating instrumental audio...") |
|
seed = 42 |
|
torch.manual_seed(seed) |
|
np.random.seed(seed) |
|
|
|
for i in range(num_chunks): |
|
chunk_prompt = instrumental_prompt |
|
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...") |
|
musicgen_model.set_generation_params( |
|
duration=generation_duration, |
|
use_sampling=True, |
|
top_k=top_k, |
|
top_p=top_p, |
|
temperature=temperature, |
|
cfg_coef=cfg_scale |
|
) |
|
|
|
print_resource_usage(f"Before Chunk {i+1} Generation") |
|
|
|
with torch.no_grad(): |
|
with autocast(): |
|
audio_chunk = musicgen_model.generate([chunk_prompt], progress=True)[0] |
|
|
|
audio_chunk = audio_chunk.cpu().to(dtype=torch.float32) |
|
if audio_chunk.dim() == 1: |
|
audio_chunk = torch.stack([audio_chunk, audio_chunk], dim=0) |
|
elif audio_chunk.dim() == 2 and audio_chunk.shape[0] == 1: |
|
audio_chunk = torch.cat([audio_chunk, audio_chunk], dim=0) |
|
elif audio_chunk.dim() == 2 and audio_chunk.shape[0] != 2: |
|
audio_chunk = audio_chunk[:1, :] |
|
audio_chunk = torch.cat([audio_chunk, audio_chunk], dim=0) |
|
elif audio_chunk.dim() > 2: |
|
audio_chunk = audio_chunk.view(2, -1) |
|
|
|
if audio_chunk.shape[0] != 2: |
|
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_chunk.shape}") |
|
|
|
temp_wav_path = f"temp_chunk_{i}.wav" |
|
torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24) |
|
segment = AudioSegment.from_wav(temp_wav_path) |
|
os.remove(temp_wav_path) |
|
audio_segments.append(segment) |
|
|
|
memory_cleanup() |
|
print_resource_usage(f"After Chunk {i+1} Generation") |
|
|
|
print("Combining instrumental chunks...") |
|
final_segment = audio_segments[0] |
|
for i in range(1, len(audio_segments)): |
|
next_segment = audio_segments[i] |
|
next_segment = next_segment + 1 |
|
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration) |
|
|
|
final_segment = final_segment[:total_duration * 1000] |
|
|
|
|
|
if vocal_prompt.strip(): |
|
vocal_segment, vocal_status = generate_vocals(vocal_prompt, total_duration, speaker_preset) |
|
if vocal_segment is None: |
|
return None, vocal_status |
|
print("Mixing vocals with instrumental...") |
|
final_segment = final_segment.overlay(vocal_segment, gain_during_overlay=-6) |
|
|
|
print("Post-processing final track...") |
|
final_segment = apply_eq(final_segment) |
|
final_segment = final_segment.normalize(headroom=-9.0) |
|
final_segment = apply_fade(final_segment) |
|
|
|
mp3_path = "output_cleaned.mp3" |
|
final_segment.export( |
|
mp3_path, |
|
format="mp3", |
|
bitrate="128k", |
|
tags={"title": "GhostAI Song", "artist": "GhostAI"} |
|
) |
|
print(f"Saved final audio to {mp3_path}") |
|
|
|
print_resource_usage("After Final Generation") |
|
print(f"Total Generation Time: {time.time() - start_time:.2f} seconds") |
|
|
|
return mp3_path, "β
Done! Generated song with vocals." if vocal_prompt.strip() else "β
Done! Generated instrumental audio." |
|
except Exception as e: |
|
return None, f"β Generation failed: {e}" |
|
finally: |
|
memory_cleanup() |
|
|
|
|
|
def clear_inputs(): |
|
return "", "", 3.0, 250, 0.9, 1.0, 30, 5, 1000, 120, "none", "none", "none", "none", "none", "default" |
|
|
|
|
|
css = """ |
|
body { |
|
background: linear-gradient(135deg, #0A0A0A 0%, #1C2526 100%); |
|
color: #E0E0E0; |
|
font-family: 'Orbitron', sans-serif; |
|
} |
|
.header-container { |
|
text-align: center; |
|
padding: 10px 20px; |
|
background: rgba(0, 0, 0, 0.9); |
|
border-bottom: 1px solid #00FF9F; |
|
} |
|
#ghost-logo { |
|
font-size: 40px; |
|
animation: glitch-ghost 1.5s infinite; |
|
} |
|
h1 { |
|
color: #A100FF; |
|
font-size: 24px; |
|
animation: glitch-text 2s infinite; |
|
} |
|
p { |
|
color: #E0E0E0; |
|
font-size: 12px; |
|
} |
|
.input-container, .settings-container, .output-container { |
|
max-width: 1200px; |
|
margin: 20px auto; |
|
padding: 20px; |
|
background: rgba(28, 37, 38, 0.8); |
|
border-radius: 10px; |
|
} |
|
.textbox { |
|
background: #1A1A1A; |
|
border: 1px solid #A100FF; |
|
color: #E0E0E0; |
|
} |
|
.genre-buttons, .vocal-buttons { |
|
display: flex; |
|
justify-content: center; |
|
flex-wrap: wrap; |
|
gap: 15px; |
|
} |
|
.genre-btn, .vocal-btn, button { |
|
background: linear-gradient(45deg, #A100FF, #00FF9F); |
|
border: none; |
|
color: #0A0A0A; |
|
padding: 10px 20px; |
|
border-radius: 5px; |
|
} |
|
.gradio-container { |
|
padding: 20px; |
|
} |
|
.group-container { |
|
margin-bottom: 20px; |
|
padding: 15px; |
|
border: 1px solid #00FF9F; |
|
border-radius: 8px; |
|
} |
|
@keyframes glitch-ghost { |
|
0% { transform: translate(0, 0); opacity: 1; } |
|
20% { transform: translate(-5px, 2px); opacity: 0.8; } |
|
100% { transform: translate(0, 0); opacity: 1; } |
|
} |
|
@keyframes glitch-text { |
|
0% { transform: translate(0, 0); } |
|
20% { transform: translate(-2px, 1px); } |
|
100% { transform: translate(0, 0); } |
|
} |
|
@font-face { |
|
font-family: 'Orbitron'; |
|
src: url('https://fonts.gstatic.com/s/orbitron/v29/yMJRMIlzdpvBhQQL_Qq7dy0.woff2') format('woff2'); |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown(""" |
|
<div class="header-container"> |
|
<div id="ghost-logo">π»</div> |
|
<h1>GhostAI Music Generator πΉ</h1> |
|
<p>Summon the Sound of the Unknown</p> |
|
</div> |
|
""") |
|
|
|
with gr.Column(elem_classes="input-container"): |
|
gr.Markdown("### πΈ Prompt Settings") |
|
instrumental_prompt = gr.Textbox( |
|
label="Instrumental Prompt βοΈ", |
|
placeholder="Click a genre button or type your own instrumental prompt", |
|
lines=4, |
|
elem_classes="textbox" |
|
) |
|
with gr.Row(elem_classes="genre-buttons"): |
|
rhcp_btn = gr.Button("Red Hot Chili Peppers πΆοΈ", elem_classes="genre-btn") |
|
nirvana_btn = gr.Button("Nirvana Grunge πΈ", elem_classes="genre-btn") |
|
pearl_jam_btn = gr.Button("Pearl Jam Grunge π¦ͺ", elem_classes="genre-btn") |
|
soundgarden_btn = gr.Button("Soundgarden Grunge π", elem_classes="genre-btn") |
|
foo_fighters_btn = gr.Button("Foo Fighters π€", elem_classes="genre-btn") |
|
smashing_pumpkins_btn = gr.Button("Smashing Pumpkins π", elem_classes="genre-btn") |
|
radiohead_btn = gr.Button("Radiohead π§ ", elem_classes="genre-btn") |
|
classic_rock_btn = gr.Button("Classic Rock πΈ", elem_classes="genre-btn") |
|
alternative_rock_btn = gr.Button("Alternative Rock π΅", elem_classes="genre-btn") |
|
post_punk_btn = gr.Button("Post-Punk π€", elem_classes="genre-btn") |
|
indie_rock_btn = gr.Button("Indie Rock π€", elem_classes="genre-btn") |
|
funk_rock_btn = gr.Button("Funk Rock πΊ", elem_classes="genre-btn") |
|
detroit_techno_btn = gr.Button("Detroit Techno ποΈ", elem_classes="genre-btn") |
|
deep_house_btn = gr.Button("Deep House π ", elem_classes="genre-btn") |
|
|
|
vocal_prompt = gr.Textbox( |
|
label="Vocal Prompt π€", |
|
placeholder="Click a vocal button or type your own vocal prompt (e.g., '[Verse, upbeat pop, male voice] Lyrics...')", |
|
lines=4, |
|
elem_classes="textbox" |
|
) |
|
with gr.Row(elem_classes="vocal-buttons"): |
|
funk_vocal_btn = gr.Button("Upbeat Funk Rock π΅", elem_classes="vocal-btn") |
|
grunge_vocal_btn = gr.Button("Grunge Ballad πΈ", elem_classes="vocal-btn") |
|
indie_vocal_btn = gr.Button("Indie Pop π€", elem_classes="vocal-btn") |
|
|
|
speaker_preset = gr.Dropdown( |
|
label="Bark Speaker Preset ποΈ", |
|
choices=["default", "v2/en_speaker_6", "v2/en_speaker_9"], |
|
value="default", |
|
info="Select a speaker preset to influence vocal style (e.g., male or female voice). Note: Bark is primarily a speech model; singing styles are limited and best controlled via prompt cues (e.g., [soft], [intense])." |
|
) |
|
|
|
with gr.Column(elem_classes="settings-container"): |
|
gr.Markdown("### βοΈ API Settings") |
|
with gr.Group(elem_classes="group-container"): |
|
cfg_scale = gr.Slider( |
|
label="CFG Scale π―", |
|
minimum=1.0, |
|
maximum=10.0, |
|
value=3.0, |
|
step=0.1, |
|
info="Controls how closely the music follows the prompt." |
|
) |
|
top_k = gr.Slider( |
|
label="Top-K Sampling π’", |
|
minimum=10, |
|
maximum=500, |
|
value=250, |
|
step=10, |
|
info="Limits sampling to the top k most likely tokens." |
|
) |
|
top_p = gr.Slider( |
|
label="Top-P Sampling π°", |
|
minimum=0.0, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.05, |
|
info="Keeps tokens with cumulative probability above p." |
|
) |
|
temperature = gr.Slider( |
|
label="Temperature π₯", |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=1.0, |
|
step=0.1, |
|
info="Controls randomness; higher values increase diversity." |
|
) |
|
total_duration = gr.Radio( |
|
label="Song Length β³ (seconds)", |
|
choices=[30, 60, 90, 120], |
|
value=30, |
|
info="Select the total duration of the track." |
|
) |
|
chunk_duration = gr.Slider( |
|
label="Chunk Duration β±οΈ (seconds)", |
|
minimum=5, |
|
maximum=10, |
|
value=5, |
|
step=1, |
|
info="Duration of each chunk to render (5 to 10 seconds)." |
|
) |
|
crossfade_duration = gr.Slider( |
|
label="Crossfade Duration πΆ (ms)", |
|
minimum=100, |
|
maximum=2000, |
|
value=1000, |
|
step=100, |
|
info="Crossfade duration between chunks." |
|
) |
|
|
|
gr.Markdown("### π΅ Musical Controls") |
|
with gr.Group(elem_classes="group-container"): |
|
bpm = gr.Slider( |
|
label="Tempo π΅ (BPM)", |
|
minimum=60, |
|
maximum=180, |
|
value=120, |
|
step=1, |
|
info="Beats per minute to set the track's tempo." |
|
) |
|
drum_beat = gr.Dropdown( |
|
label="Drum Beat π₯", |
|
choices=["none", "standard rock", "funk groove", "techno kick", "jazz swing"], |
|
value="none", |
|
info="Select a drum beat style to influence the rhythm." |
|
) |
|
synthesizer = gr.Dropdown( |
|
label="Synthesizer πΉ", |
|
choices=["none", "analog synth", "digital pad", "arpeggiated synth"], |
|
value="none", |
|
info="Select a synthesizer style for electronic accents." |
|
) |
|
rhythmic_steps = gr.Dropdown( |
|
label="Rhythmic Steps π£", |
|
choices=["none", "syncopated steps", "steady steps", "complex steps"], |
|
value="none", |
|
info="Select a rhythmic step style to enhance the beat." |
|
) |
|
bass_style = gr.Dropdown( |
|
label="Bass Style πΈ", |
|
choices=["none", "slap bass", "deep bass", "melodic bass"], |
|
value="none", |
|
info="Select a bass style to shape the low end." |
|
) |
|
guitar_style = gr.Dropdown( |
|
label="Guitar Style πΈ", |
|
choices=["none", "distorted", "clean", "jangle"], |
|
value="none", |
|
info="Select a guitar style to define the riffs." |
|
) |
|
|
|
with gr.Row(elem_classes="action-buttons"): |
|
gen_btn = gr.Button("Generate Music π") |
|
clr_btn = gr.Button("Clear Inputs π§Ή") |
|
|
|
with gr.Column(elem_classes="output-container"): |
|
gr.Markdown("### π§ Output") |
|
out_audio = gr.Audio(label="Generated Song π΅", type="filepath") |
|
status = gr.Textbox(label="Status π’", interactive=False) |
|
|
|
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
soundgarden_btn.click(set_soundgarden_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
foo_fighters_btn.click(set_foo_fighters_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
smashing_pumpkins_btn.click(set_smashing_pumpkins_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
radiohead_btn.click(set_radiohead_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
classic_rock_btn.click(set_classic_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
alternative_rock_btn.click(set_alternative_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
post_punk_btn.click(set_post_punk_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
indie_rock_btn.click(set_indie_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
funk_rock_btn.click(set_funk_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
detroit_techno_btn.click(set_detroit_techno_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
deep_house_btn.click(set_deep_house_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt) |
|
funk_vocal_btn.click(set_upbeat_funk_rock_vocal_prompt, inputs=[], outputs=vocal_prompt) |
|
grunge_vocal_btn.click(set_grunge_ballad_vocal_prompt, inputs=[], outputs=vocal_prompt) |
|
indie_vocal_btn.click(set_indie_pop_vocal_prompt, inputs=[], outputs=vocal_prompt) |
|
gen_btn.click( |
|
generate_music, |
|
inputs=[instrumental_prompt, vocal_prompt, cfg_scale, top_k, top_p, temperature, total_duration, chunk_duration, crossfade_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, speaker_preset], |
|
outputs=[out_audio, status] |
|
) |
|
clr_btn.click( |
|
clear_inputs, |
|
inputs=None, |
|
outputs=[instrumental_prompt, vocal_prompt, cfg_scale, top_k, top_p, temperature, total_duration, chunk_duration, crossfade_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, speaker_preset] |
|
) |
|
|
|
|
|
app = demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=9999, |
|
share=True, |
|
inbrowser=False, |
|
show_error=True |
|
) |
|
try: |
|
fastapi_app = demo._server.app |
|
fastapi_app.docs_url = None |
|
fastapi_app.redoc_url = None |
|
fastapi_app.openapi_url = None |
|
except Exception: |
|
pass |
|
|