Update app.py
Browse files
app.py
CHANGED
@@ -47,59 +47,75 @@ def print_resource_usage(stage: str):
|
|
47 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
48 |
print("---------------")
|
49 |
|
50 |
-
# 4) GENRE PROMPT FUNCTIONS (
|
51 |
-
def
|
52 |
-
return "
|
53 |
|
54 |
-
def
|
55 |
-
return "
|
56 |
|
57 |
-
def
|
58 |
-
return "
|
59 |
|
60 |
-
def
|
61 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
def
|
64 |
-
return "
|
|
|
|
|
|
|
65 |
|
66 |
def set_pop_rock_prompt():
|
67 |
-
return "Pop rock with catchy electric guitar riffs, uplifting
|
68 |
|
69 |
def set_fusion_jazz_prompt():
|
70 |
-
return "Fusion jazz with electric piano, funky basslines, intricate drum patterns, soaring trumpet, and a Herbie Hancock-inspired groove,
|
71 |
|
72 |
-
def
|
73 |
-
return "
|
74 |
|
75 |
-
def
|
76 |
-
return "
|
77 |
|
78 |
-
# 5) AUDIO PROCESSING FUNCTIONS
|
79 |
def apply_chorus(segment):
|
80 |
-
|
81 |
-
delayed = segment - 6 # Reduced gain to -6 dB for a subtler effect
|
82 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
83 |
return segment.overlay(delayed, position=20)
|
84 |
|
85 |
def apply_eq(segment):
|
86 |
-
|
87 |
-
segment = segment.
|
88 |
-
segment = segment.high_pass_filter(80) # Lowered cutoff to 80Hz for deeper bass
|
89 |
return segment
|
90 |
|
91 |
def apply_limiter(segment, max_db=-3.0):
|
92 |
-
# Apply limiter with a higher threshold to preserve dynamics
|
93 |
if segment.dBFS > max_db:
|
94 |
segment = segment - (segment.dBFS - max_db)
|
95 |
return segment
|
96 |
|
97 |
def apply_final_gain(segment, target_db=-12.0):
|
98 |
-
# Add final gain adjustment for consistent loudness
|
99 |
gain_adjustment = target_db - segment.dBFS
|
100 |
return segment + gain_adjustment
|
101 |
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
103 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
104 |
global musicgen_model
|
105 |
if not instrumental_prompt.strip():
|
@@ -107,27 +123,22 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
107 |
try:
|
108 |
start_time = time.time()
|
109 |
|
110 |
-
# Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
|
111 |
total_duration = min(max(total_duration, 10), 90)
|
112 |
chunk_duration = 15
|
113 |
-
# Use 2 chunks for durations up to 30 seconds, 3 chunks for longer durations
|
114 |
num_chunks = 2 if total_duration <= 30 else 3
|
115 |
chunk_duration = total_duration / num_chunks
|
116 |
|
117 |
-
# Generate slightly longer chunks for overlap
|
118 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
119 |
generation_duration = chunk_duration + overlap_duration
|
120 |
|
121 |
audio_chunks = []
|
122 |
sample_rate = musicgen_model.sample_rate
|
123 |
|
124 |
-
|
125 |
-
torch.manual_seed(42) # Fixed seed for reproducibility
|
126 |
np.random.seed(42)
|
127 |
|
128 |
-
# Generate audio in chunks with a consistent prompt
|
129 |
for i in range(num_chunks):
|
130 |
-
chunk_prompt = instrumental_prompt
|
131 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
132 |
musicgen_model.set_generation_params(
|
133 |
duration=generation_duration,
|
@@ -171,18 +182,15 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
171 |
time.sleep(0.5)
|
172 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
173 |
|
174 |
-
# Combine chunks with crossfade
|
175 |
print("Combining audio chunks...")
|
176 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
177 |
for i in range(1, len(audio_chunks)):
|
178 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
179 |
-
next_segment = next_segment + 1
|
180 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
181 |
|
182 |
-
# Trim to exact total duration
|
183 |
final_segment = final_segment[:total_duration * 1000]
|
184 |
|
185 |
-
# Post-process with improved dynamics
|
186 |
print("Post-processing final track...")
|
187 |
final_segment = apply_eq(final_segment)
|
188 |
final_segment = apply_chorus(final_segment)
|
@@ -361,15 +369,20 @@ with gr.Blocks(css=css) as demo:
|
|
361 |
elem_classes="textbox"
|
362 |
)
|
363 |
with gr.Row(elem_classes="genre-buttons"):
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
|
|
|
|
|
|
|
|
|
|
369 |
pop_rock_btn = gr.Button("Pop Rock", elem_classes="genre-btn")
|
370 |
fusion_jazz_btn = gr.Button("Fusion Jazz", elem_classes="genre-btn")
|
371 |
-
|
372 |
-
|
373 |
|
374 |
with gr.Column(elem_classes="settings-container"):
|
375 |
cfg_scale = gr.Slider(
|
@@ -428,15 +441,20 @@ with gr.Blocks(css=css) as demo:
|
|
428 |
out_audio = gr.Audio(label="Generated Stereo Instrumental Track", type="filepath")
|
429 |
status = gr.Textbox(label="Status", interactive=False)
|
430 |
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
|
|
|
|
|
|
|
|
|
|
436 |
pop_rock_btn.click(set_pop_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
437 |
fusion_jazz_btn.click(set_fusion_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
438 |
-
|
439 |
-
|
440 |
gen_btn.click(
|
441 |
generate_music,
|
442 |
inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration],
|
|
|
47 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
48 |
print("---------------")
|
49 |
|
50 |
+
# 4) GENRE PROMPT FUNCTIONS (Redesigned for better track generation)
|
51 |
+
def set_classic_rock_prompt():
|
52 |
+
return "Classic rock with bluesy electric guitars, steady drums, groovy bass, Hammond organ fills, and a Led Zeppelin-inspired raw energy, maintaining a cohesive structure with dynamic solos and powerful choruses."
|
53 |
|
54 |
+
def set_alternative_rock_prompt():
|
55 |
+
return "Alternative rock with distorted guitar riffs, punchy drums, melodic basslines, atmospheric synths, and a Nirvana-inspired grunge vibe, featuring introspective verses and explosive choruses."
|
56 |
|
57 |
+
def set_detroit_techno_prompt():
|
58 |
+
return "Detroit techno with deep pulsing synths, driving basslines, crisp hi-hats, atmospheric pads, and a rhythmic groove inspired by Juan Atkins, maintaining a hypnotic and energetic flow."
|
59 |
|
60 |
+
def set_deep_house_prompt():
|
61 |
+
return "Deep house with warm analog synth chords, soulful vocal chops, deep basslines, crisp hi-hats, and a laid-back groove inspired by Larry Heard, creating a consistent hypnotic vibe with smooth transitions."
|
62 |
+
|
63 |
+
def set_smooth_jazz_prompt():
|
64 |
+
return "Smooth jazz with warm saxophone leads, expressive Rhodes piano chords, soft bossa nova drums, upright bass, and a George Benson-inspired improvisational feel, maintaining a cohesive and relaxing vibe."
|
65 |
+
|
66 |
+
def set_bebop_jazz_prompt():
|
67 |
+
return "Bebop jazz with fast-paced saxophone solos, intricate piano runs, walking basslines, complex drum patterns, and a Charlie Parker-inspired improvisational style, featuring dynamic shifts and virtuosic performances."
|
68 |
+
|
69 |
+
def set_baroque_classical_prompt():
|
70 |
+
return "Baroque classical with harpsichord, delicate violin, cello, flute, and a Vivaldi-inspired melodic structure, featuring intricate counterpoint and elegant ornamentation, maintaining a consistent baroque elegance."
|
71 |
+
|
72 |
+
def set_romantic_classical_prompt():
|
73 |
+
return "Romantic classical with lush strings, expressive piano, dramatic brass, subtle woodwinds, and a Chopin-inspired melodic flow, building emotional intensity with sweeping crescendos and delicate pianissimos."
|
74 |
|
75 |
+
def set_boom_bap_hiphop_prompt():
|
76 |
+
return "Boom bap hip-hop with gritty sampled drums, deep basslines, jazzy piano loops, vinyl scratches, and a J Dilla-inspired rhythmic groove, maintaining a consistent head-nodding vibe."
|
77 |
+
|
78 |
+
def set_trap_hiphop_prompt():
|
79 |
+
return "Trap hip-hop with hard-hitting 808 bass, snappy snares, rapid hi-hats, eerie synth melodies, and a modern Atlanta-inspired sound, featuring catchy hooks and energetic drops."
|
80 |
|
81 |
def set_pop_rock_prompt():
|
82 |
+
return "Pop rock with catchy electric guitar riffs, uplifting synths, steady drums, melodic basslines, and a Coldplay-inspired anthemic feel, featuring bright intros and powerful choruses."
|
83 |
|
84 |
def set_fusion_jazz_prompt():
|
85 |
+
return "Fusion jazz with electric piano, funky basslines, intricate drum patterns, soaring trumpet, and a Herbie Hancock-inspired groove, blending jazz improvisation with rock and funk elements."
|
86 |
|
87 |
+
def set_edm_prompt():
|
88 |
+
return "EDM with high-energy synth leads, pounding basslines, four-on-the-floor kicks, euphoric breakdowns, and a festival-ready drop, inspired by artists like Avicii and Calvin Harris."
|
89 |
|
90 |
+
def set_indie_folk_prompt():
|
91 |
+
return "Indie folk with acoustic guitars, heartfelt vocals, gentle percussion, warm bass, and a Bon Iver-inspired intimate atmosphere, featuring layered harmonies and emotional crescendos."
|
92 |
|
93 |
+
# 5) AUDIO PROCESSING FUNCTIONS (Unchanged)
|
94 |
def apply_chorus(segment):
|
95 |
+
delayed = segment - 6
|
|
|
96 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
97 |
return segment.overlay(delayed, position=20)
|
98 |
|
99 |
def apply_eq(segment):
|
100 |
+
segment = segment.low_pass_filter(8000)
|
101 |
+
segment = segment.high_pass_filter(80)
|
|
|
102 |
return segment
|
103 |
|
104 |
def apply_limiter(segment, max_db=-3.0):
|
|
|
105 |
if segment.dBFS > max_db:
|
106 |
segment = segment - (segment.dBFS - max_db)
|
107 |
return segment
|
108 |
|
109 |
def apply_final_gain(segment, target_db=-12.0):
|
|
|
110 |
gain_adjustment = target_db - segment.dBFS
|
111 |
return segment + gain_adjustment
|
112 |
|
113 |
+
def apply_fade(segment, fade_in_duration=2000, fade_out_duration=2000):
|
114 |
+
segment = segment.fade_in(fade_in_duration)
|
115 |
+
segment = segment.fade_out(fade_out_duration)
|
116 |
+
return segment
|
117 |
+
|
118 |
+
# 6) GENERATION & I/O FUNCTIONS (Unchanged)
|
119 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
120 |
global musicgen_model
|
121 |
if not instrumental_prompt.strip():
|
|
|
123 |
try:
|
124 |
start_time = time.time()
|
125 |
|
|
|
126 |
total_duration = min(max(total_duration, 10), 90)
|
127 |
chunk_duration = 15
|
|
|
128 |
num_chunks = 2 if total_duration <= 30 else 3
|
129 |
chunk_duration = total_duration / num_chunks
|
130 |
|
|
|
131 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
132 |
generation_duration = chunk_duration + overlap_duration
|
133 |
|
134 |
audio_chunks = []
|
135 |
sample_rate = musicgen_model.sample_rate
|
136 |
|
137 |
+
torch.manual_seed(42)
|
|
|
138 |
np.random.seed(42)
|
139 |
|
|
|
140 |
for i in range(num_chunks):
|
141 |
+
chunk_prompt = instrumental_prompt
|
142 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
143 |
musicgen_model.set_generation_params(
|
144 |
duration=generation_duration,
|
|
|
182 |
time.sleep(0.5)
|
183 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
184 |
|
|
|
185 |
print("Combining audio chunks...")
|
186 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
187 |
for i in range(1, len(audio_chunks)):
|
188 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
189 |
+
next_segment = next_segment + 1
|
190 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
191 |
|
|
|
192 |
final_segment = final_segment[:total_duration * 1000]
|
193 |
|
|
|
194 |
print("Post-processing final track...")
|
195 |
final_segment = apply_eq(final_segment)
|
196 |
final_segment = apply_chorus(final_segment)
|
|
|
369 |
elem_classes="textbox"
|
370 |
)
|
371 |
with gr.Row(elem_classes="genre-buttons"):
|
372 |
+
classic_rock_btn = gr.Button("Classic Rock", elem_classes="genre-btn")
|
373 |
+
alternative_rock_btn = gr.Button("Alternative Rock", elem_classes="genre-btn")
|
374 |
+
detroit_techno_btn = gr.Button("Detroit Techno", elem_classes="genre-btn")
|
375 |
+
deep_house_btn = gr.Button("Deep House", elem_classes="genre-btn")
|
376 |
+
smooth_jazz_btn = gr.Button("Smooth Jazz", elem_classes="genre-btn")
|
377 |
+
bebop_jazz_btn = gr.Button("Bebop Jazz", elem_classes="genre-btn")
|
378 |
+
baroque_classical_btn = gr.Button("Baroque Classical", elem_classes="genre-btn")
|
379 |
+
romantic_classical_btn = gr.Button("Romantic Classical", elem_classes="genre-btn")
|
380 |
+
boom_bap_hiphop_btn = gr.Button("Boom Bap Hip-Hop", elem_classes="genre-btn")
|
381 |
+
trap_hiphop_btn = gr.Button("Trap Hip-Hop", elem_classes="genre-btn")
|
382 |
pop_rock_btn = gr.Button("Pop Rock", elem_classes="genre-btn")
|
383 |
fusion_jazz_btn = gr.Button("Fusion Jazz", elem_classes="genre-btn")
|
384 |
+
edm_btn = gr.Button("EDM", elem_classes="genre-btn")
|
385 |
+
indie_folk_btn = gr.Button("Indie Folk", elem_classes="genre-btn")
|
386 |
|
387 |
with gr.Column(elem_classes="settings-container"):
|
388 |
cfg_scale = gr.Slider(
|
|
|
441 |
out_audio = gr.Audio(label="Generated Stereo Instrumental Track", type="filepath")
|
442 |
status = gr.Textbox(label="Status", interactive=False)
|
443 |
|
444 |
+
classic_rock_btn.click(set_classic_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
445 |
+
alternative_rock_btn.click(set_alternative_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
446 |
+
detroit_techno_btn.click(set_detroit_techno_prompt, inputs=None, outputs=[instrumental_prompt])
|
447 |
+
deep_house_btn.click(set_deep_house_prompt, inputs=None, outputs=[instrumental_prompt])
|
448 |
+
smooth_jazz_btn.click(set_smooth_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
449 |
+
bebop_jazz_btn.click(set_bebop_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
450 |
+
baroque_classical_btn.click(set_baroque_classical_prompt, inputs=None, outputs=[instrumental_prompt])
|
451 |
+
romantic_classical_btn.click(set_romantic_classical_prompt, inputs=None, outputs=[instrumental_prompt])
|
452 |
+
boom_bap_hiphop_btn.click(set_boom_bap_hiphop_prompt, inputs=None, outputs=[instrumental_prompt])
|
453 |
+
trap_hiphop_btn.click(set_trap_hiphop_prompt, inputs=None, outputs=[instrumental_prompt])
|
454 |
pop_rock_btn.click(set_pop_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
455 |
fusion_jazz_btn.click(set_fusion_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
456 |
+
edm_btn.click(set_edm_prompt, inputs=None, outputs=[instrumental_prompt])
|
457 |
+
indie_folk_btn.click(set_indie_folk_prompt, inputs=None, outputs=[instrumental_prompt])
|
458 |
gen_btn.click(
|
459 |
generate_music,
|
460 |
inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration],
|