from transformers import AutoProcessor, MusicgenForConditionalGeneration from IPython.display import Audio import scipy import torch import streamlit as st def mu_gen(prompt): processor = AutoProcessor.from_pretrained("facebook/musicgen-small") model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") device = torch.device("cpu") model.to(device) inputs = processor( text = [str(prompt)], # This line is correct padding=True, return_tensors="pt", ) inputs = {key: value.to(device) for key, value in inputs.items()} # Generate audio on CPU audio_values = model.generate(**inputs, max_new_tokens=256) sampling_rate = model.config.audio_encoder.sampling_rate # Create an Audio object from the generated audio result = Audio(audio_values[0].numpy(), rate=sampling_rate) return result def main(): st.title("Text to music") # Input text prompt title = st.text_input('Write a prompt (จะใช้เวลาค่อนข้างมากในการสร้างเนื่องจากใช้ CPU ในการรันโมเดล)', "") if st.button('Generate Image'): # Call the pic_mo function to generate an image generated_music = mu_gen(prompt) # Display the generated image st.image(generated_music, caption='Generated Music', use_column_width=True) if __name__ == '__main__': main()