import os from huggingface_hub import login import torch import torchaudio from einops import rearrange import gradio as gr from stable_audio_tools import get_pretrained_model from stable_audio_tools.inference.generation import generate_diffusion_cond # Authenticate token = os.getenv("HUGGINGFACE_TOKEN") if not token: raise RuntimeError("HUGGINGFACE_TOKEN not set") login(token=token, add_to_git_credential=False) # Load model device = "cuda" if torch.cuda.is_available() else "cpu" model, config = get_pretrained_model("stabilityai/stable-audio-open-small") model = model.to(device) sample_rate = config["sample_rate"] sample_size = config["sample_size"] # Inference function def generate_audio(prompt): conditioning = [{"prompt": prompt, "seconds_total": 11}] with torch.no_grad(): output = generate_diffusion_cond( model, steps=8, conditioning=conditioning, sample_size=sample_size, device=device ) output = rearrange(output, "b d n -> d (b n)") output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu() path = "output.wav" torchaudio.save(path, output, sample_rate) return path # 🌀 Hot Prompt Club UI gr.Interface( fn=generate_audio, inputs=gr.Textbox( label="🎤 Prompt your sonic art here", placeholder="e.g. 'drunk driving with mario and yung lean'" ), outputs=gr.Audio( type="filepath", label="🧠 Generated Audio" ), title='🌐 Hot Prompts in Your Area: "My Husband Is Dead"', description="Enter a fun sound idea for music art.", examples=[ "ghosts peeing in a server room", "tech startup boss villain entrance music", "AI doing acid in a technofeudalist dystopia" ], css="style.css" ).launch()