# reference: https://huggingface.co/spaces/r3gm/Audio_separator import gradio as gr import shutil import numpy as np from pathlib import Path import os import time import torch from huggingface_hub import hf_hub_download from uvr_processing import process_uvr_task, run_mdx, get_model_params from utils import convert_to_stereo_and_wav import onnxruntime as ort import io MODEL_ID = "masszhou/mdxnet" MODELS_PATH = { "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")), "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")), "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx")) } def get_device_info(): if torch.cuda.is_available(): device = f"GPU ({torch.cuda.get_device_name(0)})" else: device = "CPU" return f"Current running environment: {device}" def inference(audio_file: str, stem: str = "vocal",) -> list[str]: # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a' if not audio_file: raise ValueError("The audio path is missing.") if not stem: raise ValueError("Please select 'vocal' or 'background' stem.") audio_file = Path(audio_file) output_dir = Path("./output") outputs = [] start_time = time.time() background_path, vocals_path = process_uvr_task( input_file_path=audio_file, output_dir=output_dir, models_path=MODELS_PATH, ) end_time = time.time() execution_time = end_time - start_time print(f"Execution time: {execution_time} seconds") print(f"Background file: {background_path}") print(f"Vocals file: {vocals_path}") os.makedirs("static/results", exist_ok=True) # shutil.copy(background_path, bg_dst) # shutil.copy(vocals_path, vc_dst) outputs.append(str(background_path)) outputs.append(str(vocals_path)) return outputs def inference_bgm(audio_file: str) -> list[str]: mdx_model_params = get_model_params(Path("./mdx_models")) audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz device_base = "cuda" if torch.cuda.is_available() else "cpu" output_dir = Path("./output") model_bgm_path = MODELS_PATH["bgm"] background_path, _ = run_mdx(model_params=mdx_model_params, input_filename=audio_file, output_dir=output_dir, model_path=model_bgm_path, denoise=False, device_base=device_base, ) return [str(background_path)] def return_original_file(file): # 读取原始文件名和内容 filename = os.path.basename(file.name) with open(file.path, "rb") as f: content = f.read() return (filename, content) def get_gui(theme, title, description): with gr.Blocks(theme=theme) as app: # Add title and description gr.Markdown(title) gr.Markdown(description) gr.Markdown(get_device_info()) # audio_input = gr.Audio(label="Audio file", type="filepath") # download_button = gr.Button("Inference") # file_output = gr.File(label="Result", file_count="multiple", interactive=False) # download_button.click( # inference, # inputs=[audio_input], # outputs=[file_output], # ) audio_input = gr.File(file_types=[".mp3", ".wav"], label="上传音频") download_btn = gr.DownloadData(return_original_file, audio_input) return app if __name__ == "__main__": title = "