|
from transformers import AutoProcessor, MusicgenForConditionalGeneration |
|
from IPython.display import Audio |
|
import scipy |
|
import torch |
|
import streamlit as st |
|
|
|
|
|
def mu_gen(prompt): |
|
processor = AutoProcessor.from_pretrained("facebook/musicgen-small") |
|
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") |
|
|
|
device = torch.device("cpu") |
|
model.to(device) |
|
|
|
inputs = processor( |
|
text = [str(prompt)], |
|
padding=True, |
|
return_tensors="pt", |
|
) |
|
|
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
|
|
|
audio_values = model.generate(**inputs, max_new_tokens=256) |
|
sampling_rate = model.config.audio_encoder.sampling_rate |
|
|
|
|
|
result = Audio(audio_values[0].numpy(), rate=sampling_rate) |
|
|
|
return result |
|
|
|
|
|
def main(): |
|
st.title("Text to music") |
|
|
|
|
|
title = st.text_input('Write a prompt (จะใช้เวลาค่อนข้างมากในการสร้างเนื่องจากใช้ CPU ในการรันโมเดล)', "") |
|
|
|
if st.button('Generate Image'): |
|
|
|
generated_music = mu_gen(prompt) |
|
|
|
|
|
st.image(generated_music, caption='Generated Music', use_column_width=True) |
|
|
|
if __name__ == '__main__': |
|
main() |