Spaces:

JacobLinCool
/

tc5-exp

Running on Zero

App Files Files Community

JacobLinCool commited on 6 days ago

Commit

b7070f2

1 Parent(s): db8b2d5

Refactor inference functions to accept DEVICE and MODEL parameters for TC5, TC6, and TC7; update model loading to use GPU if available.

Browse files

Files changed (1) hide show

app.py +36 -17

app.py CHANGED Viewed

@@ -11,34 +11,43 @@ from tc7 import infer as tc7infer
 from gradio_client import Client, handle_file
 import tempfile
-DEVICE = torch.device("cpu")
 # Load model once
 tc5 = TaikoConformer5.from_pretrained("JacobLinCool/taiko-conformer-5")
-tc5.to(DEVICE)
 tc5.eval()
 # Load TC6 model
 tc6 = TaikoConformer6.from_pretrained("JacobLinCool/taiko-conformer-6")
-tc6.to(DEVICE)
 tc6.eval()
 # Load TC7 model
 tc7 = TaikoConformer7.from_pretrained("JacobLinCool/taiko-conformer-7")
-tc7.to(DEVICE)
 tc7.eval()
 synthesizer = Client("ryanlinjui/taiko-music-generator")
-def infer_tc5(audio, nps, bpm, offset):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
     mel_input, nps_input = tc5infer.preprocess_audio(audio_path, nps)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc5infer.run_inference(
-        tc5, mel_input, nps_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc5infer.decode_onsets(
@@ -91,7 +100,7 @@ def infer_tc5(audio, nps, bpm, offset):
     return oni_audio, plot, tja_content
-def infer_tc6(audio, nps, bpm, offset, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
@@ -101,7 +110,7 @@ def infer_tc6(audio, nps, bpm, offset, difficulty, level):
     level_input = torch.tensor(level, dtype=torch.float32).to(DEVICE)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc6infer.run_inference(
-        tc6, mel_input, nps_input, difficulty_input, level_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc6infer.decode_onsets(
@@ -154,7 +163,7 @@ def infer_tc6(audio, nps, bpm, offset, difficulty, level):
     return oni_audio, plot, tja_content
-def infer_tc7(audio, nps, bpm, offset, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
@@ -164,7 +173,7 @@ def infer_tc7(audio, nps, bpm, offset, difficulty, level):
     level_input = torch.tensor(level, dtype=torch.float32).to(DEVICE)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc7infer.run_inference(
-        tc7, mel_input, nps_input, difficulty_input, level_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc7infer.decode_onsets(
@@ -220,20 +229,21 @@ def infer_tc7(audio, nps, bpm, offset, difficulty, level):
 @spaces.GPU
 def run_inference_gpu(audio, model_choice, nps, bpm, offset, difficulty, level):
     if model_choice == "TC5":
-        return infer_tc5(audio, nps, bpm, offset)
     elif model_choice == "TC6":
-        return infer_tc6(audio, nps, bpm, offset, difficulty, level)
     else:  # TC7
-        return infer_tc7(audio, nps, bpm, offset, difficulty, level)
 def run_inference_cpu(audio, model_choice, nps, bpm, offset, difficulty, level):
     if model_choice == "TC5":
-        return infer_tc5(audio, nps, bpm, offset)
     elif model_choice == "TC6":
-        return infer_tc6(audio, nps, bpm, offset, difficulty, level)
     else:  # TC7
-        return infer_tc7(audio, nps, bpm, offset, difficulty, level)
 def run_inference(with_gpu, audio, model_choice, nps, bpm, offset, difficulty, level):
@@ -330,7 +340,16 @@ with gr.Blocks() as demo:
     run_btn.click(
         run_inference,
-        inputs=[audio_input, model_choice, nps, bpm, offset, difficulty, level],
         outputs=[audio_output, plot_output, tja_output],
     )

 from gradio_client import Client, handle_file
 import tempfile
+GPU_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load model once
 tc5 = TaikoConformer5.from_pretrained("JacobLinCool/taiko-conformer-5")
+tc5.to(GPU_DEVICE)
 tc5.eval()
+tc5_cpu = TaikoConformer5.from_pretrained("JacobLinCool/taiko-conformer-5")
+tc5_cpu.to("cpu")
+tc5_cpu.eval()
 # Load TC6 model
 tc6 = TaikoConformer6.from_pretrained("JacobLinCool/taiko-conformer-6")
+tc6.to(GPU_DEVICE)
 tc6.eval()
+tc6_cpu = TaikoConformer6.from_pretrained("JacobLinCool/taiko-conformer-6")
+tc6_cpu.to("cpu")
+tc6_cpu.eval()
 # Load TC7 model
 tc7 = TaikoConformer7.from_pretrained("JacobLinCool/taiko-conformer-7")
+tc7.to(GPU_DEVICE)
 tc7.eval()
+tc7_cpu = TaikoConformer7.from_pretrained("JacobLinCool/taiko-conformer-7")
+tc7_cpu.to("cpu")
+tc7_cpu.eval()
 synthesizer = Client("ryanlinjui/taiko-music-generator")
+def infer_tc5(audio, nps, bpm, offset, DEVICE, MODEL):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
     mel_input, nps_input = tc5infer.preprocess_audio(audio_path, nps)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc5infer.run_inference(
+        MODEL, mel_input, nps_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc5infer.decode_onsets(
     return oni_audio, plot, tja_content
+def infer_tc6(audio, nps, bpm, offset, difficulty, level, DEVICE, MODEL):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
     level_input = torch.tensor(level, dtype=torch.float32).to(DEVICE)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc6infer.run_inference(
+        MODEL, mel_input, nps_input, difficulty_input, level_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc6infer.decode_onsets(
     return oni_audio, plot, tja_content
+def infer_tc7(audio, nps, bpm, offset, difficulty, level, DEVICE, MODEL):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
     level_input = torch.tensor(level, dtype=torch.float32).to(DEVICE)
     # Inference
     don_energy, ka_energy, drumroll_energy = tc7infer.run_inference(
+        MODEL, mel_input, nps_input, difficulty_input, level_input, DEVICE
     )
     output_frame_hop_sec = HOP_LENGTH / SAMPLE_RATE
     onsets = tc7infer.decode_onsets(
 @spaces.GPU
 def run_inference_gpu(audio, model_choice, nps, bpm, offset, difficulty, level):
     if model_choice == "TC5":
+        return infer_tc5(audio, nps, bpm, offset, GPU_DEVICE, tc5)
     elif model_choice == "TC6":
+        return infer_tc6(audio, nps, bpm, offset, difficulty, level, GPU_DEVICE, tc6)
     else:  # TC7
+        return infer_tc7(audio, nps, bpm, offset, difficulty, level, GPU_DEVICE, tc7)
 def run_inference_cpu(audio, model_choice, nps, bpm, offset, difficulty, level):
+    DEVICE = torch.device("cpu")
     if model_choice == "TC5":
+        return infer_tc5(audio, nps, bpm, offset, DEVICE, tc5_cpu)
     elif model_choice == "TC6":
+        return infer_tc6(audio, nps, bpm, offset, difficulty, level, DEVICE, tc6_cpu)
     else:  # TC7
+        return infer_tc7(audio, nps, bpm, offset, difficulty, level, DEVICE, tc7_cpu)
 def run_inference(with_gpu, audio, model_choice, nps, bpm, offset, difficulty, level):
     run_btn.click(
         run_inference,
+        inputs=[
+            with_gpu,
+            audio_input,
+            model_choice,
+            nps,
+            bpm,
+            offset,
+            difficulty,
+            level,
+        ],
         outputs=[audio_output, plot_output, tja_output],
     )