Spaces:
Sleeping
Sleeping
fix
Browse files- app.py +2 -2
- mdx_models/{model_data_v2.json → model_data.json} +0 -0
- uvr_processing.py +23 -22
app.py
CHANGED
@@ -89,8 +89,8 @@ if __name__ == "__main__":
|
|
89 |
# predict(audio_file, api_name="/predict") -> output
|
90 |
app_api = gr.Interface(
|
91 |
fn=inference,
|
92 |
-
inputs=gr.Audio(type="filepath"),
|
93 |
-
outputs=gr.File(file_count="multiple"),
|
94 |
)
|
95 |
|
96 |
app = gr.TabbedInterface(
|
|
|
89 |
# predict(audio_file, api_name="/predict") -> output
|
90 |
app_api = gr.Interface(
|
91 |
fn=inference,
|
92 |
+
inputs=gr.Audio(type="filepath"),
|
93 |
+
outputs=gr.File(file_count="multiple"),
|
94 |
)
|
95 |
|
96 |
app = gr.TabbedInterface(
|
mdx_models/{model_data_v2.json → model_data.json}
RENAMED
File without changes
|
uvr_processing.py
CHANGED
@@ -31,7 +31,7 @@ def run_mdx(model_params: Dict,
|
|
31 |
device_base: str = "cuda",
|
32 |
) -> Tuple[str, str]:
|
33 |
"""
|
34 |
-
|
35 |
"""
|
36 |
if device_base == "cuda":
|
37 |
device = torch.device("cuda:0")
|
@@ -146,7 +146,7 @@ def extract_bgm(mdx_model_params: Dict,
|
|
146 |
output_dir: Path,
|
147 |
device_base: str = "cuda") -> Path:
|
148 |
"""
|
149 |
-
|
150 |
"""
|
151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
152 |
input_filename=input_filename,
|
@@ -166,9 +166,9 @@ def extract_vocal(mdx_model_params: Dict,
|
|
166 |
dereverb_flag: bool = False,
|
167 |
device_base: str = "cuda") -> Path:
|
168 |
"""
|
169 |
-
|
170 |
"""
|
171 |
-
#
|
172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
173 |
input_filename,
|
174 |
output_dir,
|
@@ -176,7 +176,7 @@ def extract_vocal(mdx_model_params: Dict,
|
|
176 |
denoise=True,
|
177 |
device_base=device_base,
|
178 |
)
|
179 |
-
#
|
180 |
if main_vocals_flag:
|
181 |
time.sleep(2)
|
182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
@@ -187,44 +187,45 @@ def extract_vocal(mdx_model_params: Dict,
|
|
187 |
device_base=device_base,
|
188 |
)
|
189 |
vocals_path = main_vocals_path
|
190 |
-
#
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
201 |
return vocals_path
|
202 |
|
203 |
def process_uvr_task(mdxnet_models_dir: Path,
|
204 |
input_file_path: Path,
|
205 |
output_dir: Path,
|
206 |
-
main_vocals_flag: bool = False, #
|
207 |
-
dereverb_flag: bool = False, #
|
208 |
) -> Tuple[Path, Path]:
|
209 |
|
210 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
211 |
|
212 |
# load mdx model definition
|
213 |
-
with open(mdxnet_models_dir/"
|
214 |
mdx_model_params = json.load(infile) # type: Dict
|
215 |
|
216 |
output_dir.mkdir(parents=True, exist_ok=True)
|
217 |
input_file_path = convert_to_stereo_and_wav(input_file_path) # type: Path
|
218 |
|
219 |
-
# 1.
|
220 |
background_path = extract_bgm(mdx_model_params,
|
221 |
input_file_path,
|
222 |
mdxnet_models_dir,
|
223 |
output_dir,
|
224 |
device_base=device_base)
|
225 |
|
226 |
-
# 2.
|
227 |
-
#
|
228 |
vocals_path = extract_vocal(mdx_model_params,
|
229 |
input_file_path,
|
230 |
mdxnet_models_dir,
|
|
|
31 |
device_base: str = "cuda",
|
32 |
) -> Tuple[str, str]:
|
33 |
"""
|
34 |
+
Separate vocals using MDX model
|
35 |
"""
|
36 |
if device_base == "cuda":
|
37 |
device = torch.device("cuda:0")
|
|
|
146 |
output_dir: Path,
|
147 |
device_base: str = "cuda") -> Path:
|
148 |
"""
|
149 |
+
Extract pure background music, remove vocals
|
150 |
"""
|
151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
152 |
input_filename=input_filename,
|
|
|
166 |
dereverb_flag: bool = False,
|
167 |
device_base: str = "cuda") -> Path:
|
168 |
"""
|
169 |
+
Extract vocals
|
170 |
"""
|
171 |
+
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
173 |
input_filename,
|
174 |
output_dir,
|
|
|
176 |
denoise=True,
|
177 |
device_base=device_base,
|
178 |
)
|
179 |
+
# If "main_vocals_flag" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main vocals (Main) from backup vocals/background vocals (Backup)
|
180 |
if main_vocals_flag:
|
181 |
time.sleep(2)
|
182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
|
|
187 |
device_base=device_base,
|
188 |
)
|
189 |
vocals_path = main_vocals_path
|
190 |
+
# If "dereverb_flag" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
|
191 |
+
# deactived since Model license unknown
|
192 |
+
# if dereverb_flag:
|
193 |
+
# time.sleep(2)
|
194 |
+
# _, vocals_dereverb_path = run_mdx(mdx_model_params,
|
195 |
+
# output_dir,
|
196 |
+
# mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
|
197 |
+
# vocals_path,
|
198 |
+
# denoise=True,
|
199 |
+
# device_base=device_base,
|
200 |
+
# )
|
201 |
+
# vocals_path = vocals_dereverb_path
|
202 |
return vocals_path
|
203 |
|
204 |
def process_uvr_task(mdxnet_models_dir: Path,
|
205 |
input_file_path: Path,
|
206 |
output_dir: Path,
|
207 |
+
main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
|
208 |
+
dereverb_flag: bool = False, # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
|
209 |
) -> Tuple[Path, Path]:
|
210 |
|
211 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
212 |
|
213 |
# load mdx model definition
|
214 |
+
with open(mdxnet_models_dir/"model_data.json") as infile:
|
215 |
mdx_model_params = json.load(infile) # type: Dict
|
216 |
|
217 |
output_dir.mkdir(parents=True, exist_ok=True)
|
218 |
input_file_path = convert_to_stereo_and_wav(input_file_path) # type: Path
|
219 |
|
220 |
+
# 1. Extract pure background music, remove vocals
|
221 |
background_path = extract_bgm(mdx_model_params,
|
222 |
input_file_path,
|
223 |
mdxnet_models_dir,
|
224 |
output_dir,
|
225 |
device_base=device_base)
|
226 |
|
227 |
+
# 2. Separate vocals
|
228 |
+
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
229 |
vocals_path = extract_vocal(mdx_model_params,
|
230 |
input_file_path,
|
231 |
mdxnet_models_dir,
|