masszhou commited on
Commit
bbf5927
·
1 Parent(s): 3b06e9c

hello world

Browse files
Files changed (7) hide show
  1. .gitignore +10 -0
  2. app.py +22 -11
  3. mdx_models/model_data_v2.json +50 -0
  4. mdxnet_model.py +20 -21
  5. pyproject.toml +35 -0
  6. utils.py +25 -144
  7. uvr_processing.py +236 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ .DS_Store
3
+ *.wav
4
+ *.m4a
5
+ *.mp3
6
+ *.mp4
7
+ *.avi
8
+ *.mkv
9
+ .env
10
+ .vscode
app.py CHANGED
@@ -4,9 +4,9 @@ import shutil
4
  import numpy as np
5
  from pathlib import Path
6
  import os
7
- from utils import get_hash
8
  import time
9
  import torch
 
10
 
11
 
12
  def get_device_info():
@@ -17,23 +17,34 @@ def get_device_info():
17
  return f"当前运行环境: {device}"
18
 
19
 
20
- def inference(audio_file):
 
21
  # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
22
 
23
- output_path1 = "downloaded_audio_1.wav"
24
- output_path2 = "downloaded_audio_2.wav"
25
 
26
- hash_audio = str(get_hash(audio_file))
27
- media_dir = os.path.dirname(audio_file)
28
 
29
- outputs = []
 
 
30
 
 
31
  start_time = time.time()
 
 
 
 
 
 
 
 
 
 
32
 
33
- shutil.copy(audio_file, output_path1)
34
- shutil.copy(audio_file, output_path2)
35
-
36
- return [output_path1, output_path2]
37
 
38
 
39
  def get_gui(theme, title, description):
 
4
  import numpy as np
5
  from pathlib import Path
6
  import os
 
7
  import time
8
  import torch
9
+ from uvr_processing import process_uvr_task
10
 
11
 
12
  def get_device_info():
 
17
  return f"当前运行环境: {device}"
18
 
19
 
20
+ def inference(audio_file: str,
21
+ stem: str = "vocal",) -> list[str]:
22
  # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
23
 
24
+ if not audio_file:
25
+ raise ValueError("The audio path is missing.")
26
 
27
+ if not stem:
28
+ raise ValueError("Please select 'vocal' or 'background' stem.")
29
 
30
+ audio_file = Path(audio_file)
31
+ mdxnet_models_dir = Path("./mdx_models")
32
+ output_dir = Path("./output")
33
 
34
+ outputs = []
35
  start_time = time.time()
36
+ background_path, vocals_path = process_uvr_task(
37
+ mdxnet_models_dir=mdxnet_models_dir,
38
+ input_file_path=audio_file,
39
+ output_dir=output_dir,
40
+ )
41
+ end_time = time.time()
42
+ execution_time = end_time - start_time
43
+
44
+ outputs.append(str(background_path))
45
+ outputs.append(str(vocals_path))
46
 
47
+ return outputs
 
 
 
48
 
49
 
50
  def get_gui(theme, title, description):
mdx_models/model_data_v2.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "77d07b2667ddf05b9e3175941b4454a0": {
3
+ "compensate": 1.021,
4
+ "mdx_dim_f_set": 3072,
5
+ "mdx_dim_t_set": 8,
6
+ "mdx_n_fft_scale_set": 7680,
7
+ "primary_stem": "Vocals",
8
+ "name": "UVR-MDX-NET-Voc_FT.onnx"
9
+ },
10
+ "1d64a6d2c30f709b8c9b4ce1366d96ee": {
11
+ "compensate": 1.035,
12
+ "mdx_dim_f_set": 2048,
13
+ "mdx_dim_t_set": 8,
14
+ "mdx_n_fft_scale_set": 5120,
15
+ "primary_stem": "Instrumental",
16
+ "name": "UVR_MDXNET_KARA_2.onnx"
17
+ },
18
+ "cd5b2989ad863f116c855db1dfe24e39": {
19
+ "compensate": 1.035,
20
+ "mdx_dim_f_set": 3072,
21
+ "mdx_dim_t_set": 9,
22
+ "mdx_n_fft_scale_set": 6144,
23
+ "primary_stem": "Other",
24
+ "name": "Reverb_HQ_By_FoxJoy.onnx"
25
+ },
26
+ "55657dd70583b0fedfba5f67df11d711": {
27
+ "compensate": 1.022,
28
+ "mdx_dim_f_set": 3072,
29
+ "mdx_dim_t_set": 8,
30
+ "mdx_n_fft_scale_set": 6144,
31
+ "primary_stem": "Instrumental",
32
+ "name": "UVR-MDX-NET-Inst_HQ_3.onnx"
33
+ },
34
+ "cc63408db3d80b4d85b0287d1d7c9632": {
35
+ "compensate": 1.033,
36
+ "mdx_dim_f_set": 3072,
37
+ "mdx_dim_t_set": 8,
38
+ "mdx_n_fft_scale_set": 6144,
39
+ "primary_stem": "Instrumental",
40
+ "name": "UVR-MDX-NET-Inst_HQ_2.onnx"
41
+ },
42
+ "0f2a6bc5b49d87d64728ee40e23bceb1": {
43
+ "compensate": 1.022,
44
+ "mdx_dim_f_set": 3072,
45
+ "mdx_dim_t_set": 8,
46
+ "mdx_n_fft_scale_set": 6144,
47
+ "primary_stem": "Instrumental",
48
+ "name": "UVR-MDX-NET-Inst_HQ_4.onnx"
49
+ }
50
+ }
mdxnet_model.py CHANGED
@@ -5,20 +5,20 @@ import onnxruntime as ort
5
  import hashlib
6
  import queue
7
  import threading
 
8
  from tqdm import tqdm
 
9
 
10
 
11
  class MDXModel:
12
- def __init__(
13
- self,
14
- device,
15
- dim_f,
16
- dim_t,
17
- n_fft,
18
- hop=1024,
19
- stem_name=None,
20
- compensation=1.000,
21
- ):
22
  self.dim_f = dim_f # frequency bins
23
  self.dim_t = dim_t
24
  self.dim_c = 4
@@ -92,7 +92,7 @@ class MDX:
92
  DEFAULT_CHUNK_SIZE = 0 * DEFAULT_SR
93
  DEFAULT_MARGIN_SIZE = 1 * DEFAULT_SR
94
 
95
- def __init__(self, model_path: str, params: MDXModel, processor=0):
96
  # Set the device and the provider (CPU or CUDA)
97
  self.device = (
98
  torch.device(f"cuda:{processor}")
@@ -121,7 +121,7 @@ class MDX:
121
  self.prog = None
122
 
123
  @staticmethod
124
- def get_hash(model_path: str) -> str:
125
  try:
126
  with open(model_path, "rb") as f:
127
  f.seek(-10000 * 1024, 2)
@@ -132,12 +132,11 @@ class MDX:
132
  return model_hash
133
 
134
  @staticmethod
135
- def segment(
136
- wave,
137
- combine=True,
138
- chunk_size=DEFAULT_CHUNK_SIZE,
139
- margin_size=DEFAULT_MARGIN_SIZE,
140
- ):
141
  """
142
  Segment or join segmented wave array
143
 
@@ -192,7 +191,7 @@ class MDX:
192
 
193
  return processed_wave
194
 
195
- def pad_wave(self, wave):
196
  """
197
  Pad the wave array to match the required chunk size
198
 
@@ -232,7 +231,7 @@ class MDX:
232
 
233
  return mix_waves, pad, trim
234
 
235
- def _process_wave(self, mix_waves, trim, pad, q: queue.Queue, _id: int):
236
  """
237
  Process each wave segment in a multi-threaded environment
238
 
@@ -268,7 +267,7 @@ class MDX:
268
  q.put({_id: processed_signal})
269
  return processed_signal
270
 
271
- def process_wave(self, wave: np.array, mt_threads=1):
272
  """
273
  Process the wave array in a multi-threaded environment
274
 
 
5
  import hashlib
6
  import queue
7
  import threading
8
+ from pathlib import Path
9
  from tqdm import tqdm
10
+ from typing import Tuple
11
 
12
 
13
  class MDXModel:
14
+ def __init__(self,
15
+ device: torch.device,
16
+ dim_f: int,
17
+ dim_t: int,
18
+ n_fft: int,
19
+ hop: int = 1024,
20
+ stem_name: str = "Vocals",
21
+ compensation: float = 1.000,):
 
 
22
  self.dim_f = dim_f # frequency bins
23
  self.dim_t = dim_t
24
  self.dim_c = 4
 
92
  DEFAULT_CHUNK_SIZE = 0 * DEFAULT_SR
93
  DEFAULT_MARGIN_SIZE = 1 * DEFAULT_SR
94
 
95
+ def __init__(self, model_path: Path, params: MDXModel, processor: int = 0):
96
  # Set the device and the provider (CPU or CUDA)
97
  self.device = (
98
  torch.device(f"cuda:{processor}")
 
121
  self.prog = None
122
 
123
  @staticmethod
124
+ def get_hash(model_path: Path) -> str:
125
  try:
126
  with open(model_path, "rb") as f:
127
  f.seek(-10000 * 1024, 2)
 
132
  return model_hash
133
 
134
  @staticmethod
135
+ def segment(wave: np.array,
136
+ combine: bool = True,
137
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
138
+ margin_size: int = DEFAULT_MARGIN_SIZE,
139
+ ) -> np.array:
 
140
  """
141
  Segment or join segmented wave array
142
 
 
191
 
192
  return processed_wave
193
 
194
+ def pad_wave(self, wave: np.array) -> Tuple[np.array, int, int]:
195
  """
196
  Pad the wave array to match the required chunk size
197
 
 
231
 
232
  return mix_waves, pad, trim
233
 
234
+ def _process_wave(self, mix_waves, trim, pad, q: queue.Queue, _id: int) -> np.array:
235
  """
236
  Process each wave segment in a multi-threaded environment
237
 
 
267
  q.put({_id: processed_signal})
268
  return processed_signal
269
 
270
+ def process_wave(self, wave: np.array, mt_threads=1) -> np.array:
271
  """
272
  Process the wave array in a multi-threaded environment
273
 
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "vocal-bgm-separator"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = [
6
+ {name = "Zhiliang Zhou",email = "[email protected]"}
7
+ ]
8
+ license = {text = "MIT"}
9
+ readme = "README.md"
10
+ requires-python = ">=3.12,<4.0"
11
+ dependencies = [
12
+ "gradio (>=5.23.0,<6.0.0)",
13
+ "demucs (>=4.0.1,<5.0.0)",
14
+ "torch (>=2.6.0,<3.0.0)",
15
+ "torchaudio (>=2.6.0,<3.0.0)",
16
+ "llvmlite (>=0.44.0,<0.45.0)", # must install before librose on Mac
17
+ "librosa (>=0.11.0,<0.12.0)", # A python package for music and audio analysis.
18
+ "soundfile (>=0.13.1,<0.14.0)",
19
+ "pedalboard (>=0.9.16,<0.10.0)" # pedalboard is a Python library for adding effects to audio from spotify
20
+ ]
21
+
22
+
23
+ [build-system]
24
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
25
+ build-backend = "poetry.core.masonry.api"
26
+
27
+ [tool.poetry.group.dev.dependencies]
28
+ spaces = "^0.34.0"
29
+ onnxruntime = "^1.21.0"
30
+ gradio-client = "^1.8.0"
31
+ jupyter = "^1.1.1"
32
+ qtconsole = "^5.6.1"
33
+ pyqt5 = "^5.15.11"
34
+ dotenv = "^0.9.9"
35
+
utils.py CHANGED
@@ -1,152 +1,33 @@
1
  # reference: https://huggingface.co/spaces/r3gm/Audio_separator
2
- import os, zipfile, shutil, subprocess, shlex, sys # noqa
3
  from urllib.parse import urlparse
4
- import re
5
- import logging
6
- import hashlib
7
 
8
 
9
- def load_file_from_url(
10
- url: str,
11
- model_dir: str,
12
- file_name: str | None = None,
13
- overwrite: bool = False,
14
- progress: bool = True,
15
- ) -> str:
16
- """Download a file from `url` into `model_dir`,
17
- using the file present if possible.
18
 
19
- Returns the path to the downloaded file.
20
- """
21
- os.makedirs(model_dir, exist_ok=True)
22
- if not file_name:
23
- parts = urlparse(url)
24
- file_name = os.path.basename(parts.path)
25
- cached_file = os.path.abspath(os.path.join(model_dir, file_name))
26
 
27
- # Overwrite
28
- if os.path.exists(cached_file):
29
- if overwrite or os.path.getsize(cached_file) == 0:
30
- remove_files(cached_file)
31
-
32
- # Download
33
- if not os.path.exists(cached_file):
34
- logger.info(f'Downloading: "{url}" to {cached_file}\n')
35
- from torch.hub import download_url_to_file
36
-
37
- download_url_to_file(url, cached_file, progress=progress)
38
- else:
39
- logger.debug(cached_file)
40
-
41
- return cached_file
42
-
43
-
44
- def friendly_name(file: str):
45
- if file.startswith("http"):
46
- file = urlparse(file).path
47
-
48
- file = os.path.basename(file)
49
- model_name, extension = os.path.splitext(file)
50
- return model_name, extension
51
-
52
-
53
- def download_manager(
54
- url: str,
55
- path: str,
56
- extension: str = "",
57
- overwrite: bool = False,
58
- progress: bool = True,
59
- ):
60
- url = url.strip()
61
-
62
- name, ext = friendly_name(url)
63
- name += ext if not extension else f".{extension}"
64
-
65
- if url.startswith("http"):
66
- filename = load_file_from_url(
67
- url=url,
68
- model_dir=path,
69
- file_name=name,
70
- overwrite=overwrite,
71
- progress=progress,
72
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
- filename = path
75
-
76
- return filename
77
-
78
-
79
- def remove_files(file_list):
80
- if isinstance(file_list, str):
81
- file_list = [file_list]
82
-
83
- for file in file_list:
84
- if os.path.exists(file):
85
- os.remove(file)
86
-
87
-
88
- def remove_directory_contents(directory_path):
89
- """
90
- Removes all files and subdirectories within a directory.
91
-
92
- Parameters:
93
- directory_path (str): Path to the directory whose
94
- contents need to be removed.
95
- """
96
- if os.path.exists(directory_path):
97
- for filename in os.listdir(directory_path):
98
- file_path = os.path.join(directory_path, filename)
99
- try:
100
- if os.path.isfile(file_path):
101
- os.remove(file_path)
102
- elif os.path.isdir(file_path):
103
- shutil.rmtree(file_path)
104
- except Exception as e:
105
- logger.error(f"Failed to delete {file_path}. Reason: {e}")
106
- logger.info(f"Content in '{directory_path}' removed.")
107
- else:
108
- logger.error(f"Directory '{directory_path}' does not exist.")
109
-
110
-
111
- # Create directory if not exists
112
- def create_directories(directory_path):
113
- if isinstance(directory_path, str):
114
- directory_path = [directory_path]
115
- for one_dir_path in directory_path:
116
- if not os.path.exists(one_dir_path):
117
- os.makedirs(one_dir_path)
118
- logger.debug(f"Directory '{one_dir_path}' created.")
119
-
120
-
121
- def setup_logger(name_log):
122
- logger = logging.getLogger(name_log)
123
- logger.setLevel(logging.INFO)
124
-
125
- _default_handler = logging.StreamHandler() # Set sys.stderr as stream.
126
- _default_handler.flush = sys.stderr.flush
127
- logger.addHandler(_default_handler)
128
-
129
- logger.propagate = False
130
-
131
- handlers = logger.handlers
132
-
133
- for handler in handlers:
134
- formatter = logging.Formatter("[%(levelname)s] >> %(message)s")
135
- handler.setFormatter(formatter)
136
-
137
- # logger.handlers
138
-
139
- return logger
140
-
141
-
142
- logger = setup_logger("ss")
143
- logger.setLevel(logging.INFO)
144
-
145
-
146
- def get_hash(filepath):
147
- with open(filepath, 'rb') as f:
148
- file_hash = hashlib.blake2b()
149
- while chunk := f.read(8192):
150
- file_hash.update(chunk)
151
-
152
- return file_hash.hexdigest()[:18]
 
1
  # reference: https://huggingface.co/spaces/r3gm/Audio_separator
2
+ import subprocess, shlex, sys # noqa
3
  from urllib.parse import urlparse
4
+ import librosa
5
+ import numpy as np
6
+ from pathlib import Path
7
 
8
 
9
+ def convert_to_stereo_and_wav(audio_path: Path) -> Path:
10
+ wave, sr = librosa.load(str(audio_path), mono=False, sr=44100)
 
 
 
 
 
 
 
11
 
12
+ # check if mono
13
+ if type(wave[0]) != np.ndarray or audio_path.suffix != ".wav": # noqa
14
+ stereo_path = audio_path.with_name(audio_path.stem + "_stereo.wav")
 
 
 
 
15
 
16
+ command = shlex.split(
17
+ f'ffmpeg -y -loglevel error -i "{str(audio_path)}" -ac 2 -f wav "{str(stereo_path)}"'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  )
19
+ sub_params = {
20
+ "stdout": subprocess.PIPE,
21
+ "stderr": subprocess.PIPE,
22
+ "creationflags": subprocess.CREATE_NO_WINDOW
23
+ if sys.platform == "win32"
24
+ else 0,
25
+ }
26
+ process_wav = subprocess.Popen(command, **sub_params)
27
+ output, errors = process_wav.communicate()
28
+ if process_wav.returncode != 0 or not stereo_path.exists():
29
+ raise Exception("Error processing audio to stereo wav")
30
+
31
+ return stereo_path
32
  else:
33
+ return Path(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr_processing.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import json
3
+ import gc
4
+ import spaces
5
+ import librosa
6
+ import soundfile as sf
7
+ import numpy as np
8
+ from pathlib import Path
9
+ from typing import Dict, Tuple
10
+ from utils import convert_to_stereo_and_wav
11
+ from mdxnet_model import MDX, MDXModel
12
+ import time
13
+
14
+
15
+ STEM_NAMING = {
16
+ "Vocals": "Instrumental",
17
+ "Other": "Instruments",
18
+ "Instrumental": "Vocals",
19
+ "Drums": "Drumless",
20
+ "Bass": "Bassless",
21
+ }
22
+
23
+
24
+ @spaces.GPU()
25
+ def run_mdx(model_params: Dict,
26
+ input_filename: Path,
27
+ output_dir: Path,
28
+ model_path: Path,
29
+ denoise: bool = False,
30
+ m_threads: int = 2,
31
+ device_base: str = "cuda",
32
+ ) -> Tuple[str, str]:
33
+ """
34
+ 使用MDX模型分离人声
35
+ """
36
+ if device_base == "cuda":
37
+ device = torch.device("cuda:0")
38
+ processor_num = 0
39
+ device_properties = torch.cuda.get_device_properties(device)
40
+ vram_gb = device_properties.total_memory / 1024**3
41
+ m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
42
+ else:
43
+ device = torch.device("cpu")
44
+ processor_num = -1
45
+ m_threads = 1
46
+
47
+ model_hash = MDX.get_hash(model_path) # type: str
48
+ mp = model_params.get(model_hash)
49
+ model = MDXModel(
50
+ device,
51
+ dim_f=mp["mdx_dim_f_set"],
52
+ dim_t=2 ** mp["mdx_dim_t_set"],
53
+ n_fft=mp["mdx_n_fft_scale_set"],
54
+ stem_name=mp["primary_stem"],
55
+ compensation=mp["compensate"],
56
+ )
57
+
58
+ mdx_sess = MDX(model_path, model, processor=processor_num)
59
+ wave, sr = librosa.load(input_filename, mono=False, sr=44100)
60
+ # normalizing input wave gives better output
61
+ peak = max(np.max(wave), abs(np.min(wave)))
62
+ wave /= peak
63
+ if denoise:
64
+ wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (mdx_sess.process_wave(wave, m_threads)) # type: np.array
65
+ wave_processed *= 0.5
66
+ else:
67
+ wave_processed = mdx_sess.process_wave(wave, m_threads)
68
+ # return to previous peak
69
+ wave_processed *= peak
70
+ stem_name = model.stem_name
71
+
72
+ # output main track
73
+ main_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}.wav")
74
+ sf.write(main_filepath, wave_processed.T, sr)
75
+
76
+ # output reverse track
77
+ invert_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}_reverse.wav")
78
+ sf.write(invert_filepath, (-wave_processed.T * model.compensation) + wave.T, sr)
79
+
80
+ del mdx_sess, wave_processed, wave
81
+ gc.collect()
82
+ torch.cuda.empty_cache()
83
+ return main_filepath, invert_filepath
84
+
85
+
86
+ def run_mdx_cpu(model_params: Dict,
87
+ input_filename: Path,
88
+ output_dir: Path,
89
+ model_path: Path,
90
+ denoise: bool = False,
91
+ m_threads: int = 2,
92
+ device_base: str = ""):
93
+ m_threads = 1
94
+ duration = librosa.get_duration(filename=input_filename)
95
+ if duration >= 60 and duration <= 120:
96
+ m_threads = 8
97
+ elif duration > 120:
98
+ m_threads = 16
99
+
100
+ model_hash = MDX.get_hash(model_path)
101
+ device = torch.device("cpu")
102
+ processor_num = -1
103
+ mp = model_params.get(model_hash)
104
+ model = MDXModel(
105
+ device,
106
+ dim_f=mp["mdx_dim_f_set"],
107
+ dim_t=2 ** mp["mdx_dim_t_set"],
108
+ n_fft=mp["mdx_n_fft_scale_set"],
109
+ stem_name=mp["primary_stem"],
110
+ compensation=mp["compensate"],
111
+ )
112
+
113
+ mdx_sess = MDX(model_path, model, processor=processor_num)
114
+ wave, sr = librosa.load(input_filename, mono=False, sr=44100)
115
+ # normalizing input wave gives better output
116
+ peak = max(np.max(wave), abs(np.min(wave)))
117
+ wave /= peak
118
+ if denoise:
119
+ wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (
120
+ mdx_sess.process_wave(wave, m_threads)
121
+ )
122
+ wave_processed *= 0.5
123
+ else:
124
+ wave_processed = mdx_sess.process_wave(wave, m_threads)
125
+ # return to previous peak
126
+ wave_processed *= peak
127
+ stem_name = model.stem_name
128
+
129
+ # output main track
130
+ main_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}.wav")
131
+ sf.write(main_filepath, wave_processed.T, sr)
132
+
133
+ # output reverse track
134
+ invert_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}_reverse.wav")
135
+ sf.write(invert_filepath, (-wave_processed.T * model.compensation) + wave.T, sr)
136
+
137
+ del mdx_sess, wave_processed, wave
138
+ gc.collect()
139
+ torch.cuda.empty_cache()
140
+ return main_filepath, invert_filepath
141
+
142
+
143
+ def extract_bgm(mdx_model_params: Dict,
144
+ input_filename: Path,
145
+ mdxnet_models_dir: Path,
146
+ output_dir: Path,
147
+ device_base: str = "cuda") -> Path:
148
+ """
149
+ 提取纯音乐背景,去除人声部分
150
+ """
151
+ background_path, _ = run_mdx(model_params=mdx_model_params,
152
+ input_filename=input_filename,
153
+ output_dir=output_dir,
154
+ model_path=mdxnet_models_dir/"UVR-MDX-NET-Inst_HQ_3.onnx",
155
+ denoise=False,
156
+ device_base=device_base,
157
+ )
158
+ return background_path
159
+
160
+
161
+ def extract_vocal(mdx_model_params: Dict,
162
+ input_filename: Path,
163
+ mdxnet_models_dir: Path,
164
+ output_dir: Path,
165
+ main_vocals_flag: bool = False,
166
+ dereverb_flag: bool = False,
167
+ device_base: str = "cuda") -> Path:
168
+ """
169
+ 提取人声
170
+ """
171
+ # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
172
+ vocals_path, _ = run_mdx(mdx_model_params,
173
+ input_filename,
174
+ output_dir,
175
+ mdxnet_models_dir/"UVR-MDX-NET-Voc_FT.onnx",
176
+ denoise=True,
177
+ device_base=device_base,
178
+ )
179
+ # 如果开启"main_vocals_flag",则使用UVR_MDXNET_KARA_2.onnx进一步分成主人声(Main)和和声/背景声(Backup)
180
+ if main_vocals_flag:
181
+ time.sleep(2)
182
+ backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
183
+ output_dir,
184
+ mdxnet_models_dir/"UVR_MDXNET_KARA_2.onnx",
185
+ vocals_path,
186
+ denoise=True,
187
+ device_base=device_base,
188
+ )
189
+ vocals_path = main_vocals_path
190
+ # 如果开启"dereverb_flag",则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
191
+ if dereverb_flag:
192
+ time.sleep(2)
193
+ _, vocals_dereverb_path = run_mdx(mdx_model_params,
194
+ output_dir,
195
+ mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
196
+ vocals_path,
197
+ denoise=True,
198
+ device_base=device_base,
199
+ )
200
+ vocals_path = vocals_dereverb_path
201
+ return vocals_path
202
+
203
+ def process_uvr_task(mdxnet_models_dir: Path,
204
+ input_file_path: Path,
205
+ output_dir: Path,
206
+ main_vocals_flag: bool = False, # 如果开启"Main",则使用UVR_MDXNET_KARA_2.onnx进一步分离主副人声
207
+ dereverb_flag: bool = False, # 如果开启"DeReverb",则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
208
+ ) -> Tuple[Path, Path]:
209
+
210
+ device_base = "cuda" if torch.cuda.is_available() else "cpu"
211
+
212
+ # load mdx model definition
213
+ with open(mdxnet_models_dir/"model_data_v2.json") as infile:
214
+ mdx_model_params = json.load(infile) # type: Dict
215
+
216
+ output_dir.mkdir(parents=True, exist_ok=True)
217
+ input_file_path = convert_to_stereo_and_wav(input_file_path) # type: Path
218
+
219
+ # 1. 提取纯音乐背景,去除人声部分
220
+ background_path = extract_bgm(mdx_model_params,
221
+ input_file_path,
222
+ mdxnet_models_dir,
223
+ output_dir,
224
+ device_base=device_base)
225
+
226
+ # 2. 分离人声
227
+ # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
228
+ vocals_path = extract_vocal(mdx_model_params,
229
+ input_file_path,
230
+ mdxnet_models_dir,
231
+ output_dir,
232
+ main_vocals_flag=main_vocals_flag,
233
+ dereverb_flag=dereverb_flag,
234
+ device_base=device_base)
235
+
236
+ return background_path, vocals_path