Spaces:

masszhou
/

bgmseparator

Sleeping

App Files Files Community

masszhou commited on Mar 30

Commit

6e8f9db

1 Parent(s): bbf5927

fix

Browse files

Files changed (3) hide show

app.py +2 -2
mdx_models/{model_data_v2.json → model_data.json} +0 -0
uvr_processing.py +23 -22

app.py CHANGED Viewed

@@ -89,8 +89,8 @@ if __name__ == "__main__":
     # predict(audio_file, api_name="/predict") -> output
     app_api = gr.Interface(
         fn=inference,
-        inputs=gr.Audio(type="filepath"),   # 接收文件路径（也可以换成 type="file"）
-        outputs=gr.File(file_count="multiple"),  # 返回多个文件
     )
     app = gr.TabbedInterface(

     # predict(audio_file, api_name="/predict") -> output
     app_api = gr.Interface(
         fn=inference,
+        inputs=gr.Audio(type="filepath"),
+        outputs=gr.File(file_count="multiple"),
     )
     app = gr.TabbedInterface(

mdx_models/{model_data_v2.json → model_data.json} RENAMED Viewed

File without changes

uvr_processing.py CHANGED Viewed

@@ -31,7 +31,7 @@ def run_mdx(model_params: Dict,
             device_base: str = "cuda",
             ) -> Tuple[str, str]:
     """
-    使用MDX模型分离人声
     """
     if device_base == "cuda":
         device = torch.device("cuda:0")
@@ -146,7 +146,7 @@ def extract_bgm(mdx_model_params: Dict,
                 output_dir: Path,
                 device_base: str = "cuda") -> Path:
     """
-    提取纯音乐背景，去除人声部分
     """
     background_path, _ = run_mdx(model_params=mdx_model_params,
                                   input_filename=input_filename,
@@ -166,9 +166,9 @@ def extract_vocal(mdx_model_params: Dict,
                   dereverb_flag: bool = False,
                   device_base: str = "cuda") -> Path:
     """
-    提取人声
     """
-    # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
     vocals_path, _ = run_mdx(mdx_model_params,
                              input_filename,
                              output_dir,
@@ -176,7 +176,7 @@ def extract_vocal(mdx_model_params: Dict,
                              denoise=True,
                              device_base=device_base,
                              )
-    # 如果开启"main_vocals_flag"，则使用UVR_MDXNET_KARA_2.onnx进一步分成主人声(Main)和和声/背景声(Backup)
     if main_vocals_flag:
         time.sleep(2)
         backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
@@ -187,44 +187,45 @@ def extract_vocal(mdx_model_params: Dict,
                                                        device_base=device_base,
                                                        )
         vocals_path = main_vocals_path
-    # 如果开启"dereverb_flag"，则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
-    if dereverb_flag:
-        time.sleep(2)
-        _, vocals_dereverb_path = run_mdx(mdx_model_params,
-                                          output_dir,
-                                          mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
-                                          vocals_path,
-                                          denoise=True,
-                                          device_base=device_base,
-                                          )
-        vocals_path = vocals_dereverb_path
     return vocals_path
 def process_uvr_task(mdxnet_models_dir: Path,
                      input_file_path: Path,
                      output_dir: Path,
-                     main_vocals_flag: bool = False,  # 如果开启"Main"，则使用UVR_MDXNET_KARA_2.onnx进一步分离主副人声
-                     dereverb_flag: bool = False,  # 如果开启"DeReverb"，则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
                      ) -> Tuple[Path, Path]:
     device_base = "cuda" if torch.cuda.is_available() else "cpu"
     # load mdx model definition
-    with open(mdxnet_models_dir/"model_data_v2.json") as infile:
         mdx_model_params = json.load(infile)  # type: Dict
     output_dir.mkdir(parents=True, exist_ok=True)
     input_file_path = convert_to_stereo_and_wav(input_file_path)  # type: Path
-    # 1. 提取纯音乐背景，去除人声部分
     background_path = extract_bgm(mdx_model_params,
                                   input_file_path,
                                   mdxnet_models_dir,
                                   output_dir,
                                   device_base=device_base)
-    # 2. 分离人声
-    # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
     vocals_path = extract_vocal(mdx_model_params,
                                 input_file_path,
                                 mdxnet_models_dir,

             device_base: str = "cuda",
             ) -> Tuple[str, str]:
     """
+    Separate vocals using MDX model
     """
     if device_base == "cuda":
         device = torch.device("cuda:0")
                 output_dir: Path,
                 device_base: str = "cuda") -> Path:
     """
+    Extract pure background music, remove vocals
     """
     background_path, _ = run_mdx(model_params=mdx_model_params,
                                   input_filename=input_filename,
                   dereverb_flag: bool = False,
                   device_base: str = "cuda") -> Path:
     """
+    Extract vocals
     """
+    # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
     vocals_path, _ = run_mdx(mdx_model_params,
                              input_filename,
                              output_dir,
                              denoise=True,
                              device_base=device_base,
                              )
+    # If "main_vocals_flag" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main vocals (Main) from backup vocals/background vocals (Backup)
     if main_vocals_flag:
         time.sleep(2)
         backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
                                                        device_base=device_base,
                                                        )
         vocals_path = main_vocals_path
+    # If "dereverb_flag" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
+    # deactived since Model license unknown
+    # if dereverb_flag:
+    #     time.sleep(2)
+    #     _, vocals_dereverb_path = run_mdx(mdx_model_params,
+    #                                       output_dir,
+    #                                       mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
+    #                                       vocals_path,
+    #                                       denoise=True,
+    #                                       device_base=device_base,
+    #                                       )
+    #     vocals_path = vocals_dereverb_path
     return vocals_path
 def process_uvr_task(mdxnet_models_dir: Path,
                      input_file_path: Path,
                      output_dir: Path,
+                     main_vocals_flag: bool = False,  # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
+                     dereverb_flag: bool = False,  # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
                      ) -> Tuple[Path, Path]:
     device_base = "cuda" if torch.cuda.is_available() else "cpu"
     # load mdx model definition
+    with open(mdxnet_models_dir/"model_data.json") as infile:
         mdx_model_params = json.load(infile)  # type: Dict
     output_dir.mkdir(parents=True, exist_ok=True)
     input_file_path = convert_to_stereo_and_wav(input_file_path)  # type: Path
+    # 1. Extract pure background music, remove vocals
     background_path = extract_bgm(mdx_model_params,
                                   input_file_path,
                                   mdxnet_models_dir,
                                   output_dir,
                                   device_base=device_base)
+    # 2. Separate vocals
+    # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
     vocals_path = extract_vocal(mdx_model_params,
                                 input_file_path,
                                 mdxnet_models_dir,