import io from transformers import AutoProcessor, MusicgenForConditionalGeneration from IPython.display import Audio import torch import streamlit as st import wave def mu_gen(prompt): processor = AutoProcessor.from_pretrained("facebook/musicgen-small") model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") device = torch.device("cpu") model.to(device) inputs = processor( text=[str(prompt)], padding=True, return_tensors="pt", ) inputs = {key: value.to(device) for key, value in inputs.items()} # Generate audio on CPU audio_values = model.generate(**inputs, max_new_tokens=256) sampling_rate = model.config.audio_encoder.sampling_rate # Convert audio data to WAV format wav_data = audio_values[0].numpy() # Create an in-memory WAV file with io.BytesIO() as wav_file: with wave.open(wav_file, 'wb') as wf: wf.setnchannels(1) # Mono audio wf.setsampwidth(2) # 16-bit audio wf.setframerate(sampling_rate) wf.writeframes(wav_data.tobytes()) wav_bytes = wav_file.getvalue() return wav_bytes # Return the WAV audio data as bytes def main(): st.title("Text to Music") # Input text prompt title = st.text_input('Write a prompt (จะใช้เวลาค่อนข้างมากในการสร้างเนื่องจากใช้ CPU ในการรันโมเดล)', "") if st.button('Generate Music'): # Call the mu_gen function to generate music using the 'title' prompt generated_music = mu_gen(title) # Display the generated audio in WAV format st.audio(generated_music, format='audio/wav', start_time=0) if __name__ == '__main__': main()