Spaces:

akhaliq
/

Pop_Music_Transformer

Build error

App Files Files Community

Wryley1234 commited on Apr 15, 2023

Commit

fbdbc6e

1 Parent(s): 9e846ad

...

Browse files

Files changed (1) hide show

app.py +198 -50

app.py CHANGED Viewed

@@ -1,55 +1,203 @@
-from model import PopMusicTransformer
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-import tensorflow as tf
-tf.compat.v1.disable_eager_execution()
 import gradio as gr
-import requests
-import torchtext
-import zipfile
-torchtext.utils.download_from_url("https://drive.google.com/uc?id=1gxuTSkF51NP04JZgTE46Pg4KQsbHQKGo", root=".")
-torchtext.utils.download_from_url("https://drive.google.com/uc?id=1nAKjaeahlzpVAX0F9wjQEG_hL4UosSbo", root=".")
-with zipfile.ZipFile("REMI-tempo-checkpoint.zip","r") as zip_ref:
-    zip_ref.extractall(".")
-with zipfile.ZipFile("REMI-tempo-chord-checkpoint.zip","r") as zip_ref:
-    zip_ref.extractall(".")
-url = 'https://github.com/AK391/remi/blob/master/input.midi?raw=true'
-r = requests.get(url, allow_redirects=True)
-open("input.midi", 'wb').write(r.content)
-# declare model
-model = PopMusicTransformer(
-    checkpoint='REMI-tempo-checkpoint',
-    is_training=False)
-def inference(midi):
-    # generate continuation
-    model.generate(
-        n_target_bar=4,
-        temperature=1.2,
-        topk=5,
-        output_path='./result/continuation.midi',
-        prompt=midi.name)
-    return './result/continuation.midi'
-title = "Pop Music Transformer"
-description = "demo for Pop Music Transformer. To use it, simply upload your midi file, or click one of the examples to load them. Read more at the links below."
-article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2002.00212'>Pop Music Transformer: Beat-based Modeling and Generation of Expressive Pop Piano Compositions</a> | <a href='https://github.com/YatingMusic/remi'>Github Repo</a></p>"
-examples = [
-    ['input.midi']
-]
-gr.Interface(
-    inference,
-    gr.inputs.File(label="Input Midi"),
-    gr.outputs.File(label="Output Midi"),
     title=title,
-    description=description,
     article=article,
-    examples=examples
-    ).launch()

+import pickle
+import pretty_midi
 import gradio as gr
+from music21 import *
+from midi2audio import FluidSynth
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+file_path = './objects/int_to_note.pkl'
+with open(file_path, 'rb') as f:
+    int_to_note = pickle.load(f)
+file_path = './objects/note_to_int.pkl'
+with open(file_path, 'rb') as f:
+    note_to_int = pickle.load(f)
+class GenerationRNN(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
+        super(GenerationRNN, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.n_layers = n_layers
+        self.embedding = nn.Embedding(input_size, hidden_size)
+        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
+        self.decoder = nn.Linear(hidden_size * n_layers, output_size)
+    def forward(self, input, hidden):
+        # Creates embedding of the input texts
+        #print('initial input', input.size())
+        input = self.embedding(input.view(1, -1))
+        #print('input after embedding', input.size())
+        output, hidden = self.gru(input, hidden)
+        #print('output after gru', output.size())
+        #print('hidden after gru', hidden.size())
+        output = self.decoder(hidden.view(1, -1))
+        #print('output after decoder', output.size())
+        return output, hidden
+    def init_hidden(self):
+        return torch.zeros(self.n_layers, 1, self.hidden_size).to(device)
+def predict_multimomial(net, prime_seq, predict_len, temperature=0.8):
+    '''
+    Arguments:
+    prime_seq - priming sequence (converted t)
+    predict_len - number of notes to predict for after prime sequence
+    '''
+    hidden = net.init_hidden()
+    predicted = prime_seq.copy()
+    prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)
+    # "Building up" the hidden state using the prime sequence
+    for p in range(len(prime_seq) - 1):
+        input = prime_seq[p]
+        _, hidden = net(input, hidden)
+    # Last character of prime sequence
+    input = prime_seq[-1]
+    # For every index to predict
+    for p in range(predict_len):
+        # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters
+        output, hidden = net(input, hidden)
+        # Sample from the network output as a multinomial distribution
+        output = output.data.view(-1).div(temperature).exp()
+        predicted_id = torch.multinomial(output, 1)
+        # Add predicted index to the list and use as next input
+        predicted.append(predicted_id.item())
+        input = predicted_id
+    return predicted
+def create_midi(prediction_output):
+    """ convert the output from the prediction to notes and create a midi file
+        from the notes """
+    offset = 0
+    output_notes = []
+    # create note and chord objects based on the values generated by the model
+    for pattern in prediction_output:
+        # pattern is a chord
+        if ('.' in pattern) or pattern.isdigit():
+            notes_in_chord = pattern.split('.')
+            notes = []
+            for current_note in notes_in_chord:
+                new_note = note.Note(int(current_note))
+                new_note.storedInstrument = instrument.Piano()
+                notes.append(new_note)
+            new_chord = chord.Chord(notes)
+            new_chord.offset = offset
+            output_notes.append(new_chord)
+        # pattern is a note
+        else:
+            new_note = note.Note(pattern)
+            new_note.offset = offset
+            new_note.storedInstrument = instrument.Piano()
+            output_notes.append(new_note)
+        # increase offset each iteration so that notes do not stack
+        offset += 0.5
+    midi_stream = stream.Stream(output_notes)
+    return midi_stream
+def get_note_names(midi):
+    s2 = instrument.partitionByInstrument(midi)
+    piano_part = None
+    # Filter for only the piano part
+    instr = instrument.Piano
+    for part in s2:
+        if isinstance(part.getInstrument(), instr):
+            piano_part = part
+    notes_song = []
+    if not piano_part: # Some songs somehow have no piano parts
+        # Just take the first part
+        piano_part = s2[0]
+    for element in piano_part:
+        if isinstance(element, note.Note):
+            # Return the pitch of the single note
+            notes_song.append(str(element.pitch))
+        elif isinstance(element, chord.Chord):
+            # Returns the normal order of a Chord represented in a list of integers
+            notes_song.append('.'.join(str(n) for n in element.normalOrder))
+    return notes_song
+def process_input(input_midi_file, input_randomness, input_duration):
+    print(input_midi_file.name)
+    midi = converter.parse(input_midi_file.name)
+    note_names = get_note_names(midi)
+    int_notes = [note_to_int[note_name] for note_name in note_names]
+    duration_to_size = {30: 100, 20: 66, 10: 33}
+    dur = duration_to_size[input_duration]
+    generated_seq_multinomial = predict_multimomial(model, int_notes, predict_len = dur, temperature = input_randomness / 50)
+    generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]
+    pred_midi_multinomial = create_midi(generated_seq_multinomial)
+    pred_midi_multinomial.write('midi', fp='result.midi')
+    sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
+    FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')
+    return 'result.wav', 'result.midi'
+file_path = './objects/model_cpu.pkl'
+with open(file_path, 'rb') as f:
+    model = pickle.load(f)
+midi_file_desc = """
+Audio file in .midi format
+"""
+article = """
+This model allows you to generate music based on audio input. Please upload a MIDI file below, choose music randomness and duration. The project has been created by the students of Ukrainian Catholic University for our ML course.
+We are using a GRU model to output new notes based on the given input. You can find more information at our Git repo: https://github.com/DmytroLopushanskyy/music-generation
+We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.
+"""
+title = """
+Classical Music Generation
+"""
+iface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.inputs.File(label=midi_file_desc),
+        gr.inputs.Slider(50, 250, default=100, step=50),
+        gr.inputs.Radio([10, 20, 30], type="value", default=20)
+        ],
     title=title,
+    outputs=["audio", "file"],
     article=article,
+    examples=[
+        ['examples/mozart.midi', 100, 10],
+        ['examples/beethoven.midi', 50, 30],
+        ['examples/chopin.midi', 100, 20]
+    ]
+)
+iface.launch()