Spaces:

masszhou
/

bgmseparator

Sleeping

App Files Files Community

masszhou commited on Mar 30

Commit

bbf5927

1 Parent(s): 3b06e9c

hello world

Browse files

Files changed (7) hide show

.gitignore +10 -0
app.py +22 -11
mdx_models/model_data_v2.json +50 -0
mdxnet_model.py +20 -21
pyproject.toml +35 -0
utils.py +25 -144
uvr_processing.py +236 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__
+.DS_Store
+*.wav
+*.m4a
+*.mp3
+*.mp4
+*.avi
+*.mkv
+.env
+.vscode

app.py CHANGED Viewed

@@ -4,9 +4,9 @@ import shutil
 import numpy as np
 from pathlib import Path
 import os
-from utils import get_hash
 import time
 import torch
 def get_device_info():
@@ -17,23 +17,34 @@ def get_device_info():
     return f"当前运行环境: {device}"
-def inference(audio_file):
     # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
-    output_path1 = "downloaded_audio_1.wav"
-    output_path2 = "downloaded_audio_2.wav"
-    hash_audio = str(get_hash(audio_file))
-    media_dir = os.path.dirname(audio_file)
-    outputs = []
     start_time = time.time()
-    shutil.copy(audio_file, output_path1)
-    shutil.copy(audio_file, output_path2)
-    return [output_path1, output_path2]
 def get_gui(theme, title, description):

 import numpy as np
 from pathlib import Path
 import os
 import time
 import torch
+from uvr_processing import process_uvr_task
 def get_device_info():
     return f"当前运行环境: {device}"
+def inference(audio_file: str,
+              stem: str = "vocal",) -> list[str]:
     # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
+    if not audio_file:
+        raise ValueError("The audio path is missing.")
+    if not stem:
+        raise ValueError("Please select 'vocal' or 'background' stem.")
+    audio_file = Path(audio_file)
+    mdxnet_models_dir = Path("./mdx_models")
+    output_dir = Path("./output")
+    outputs = []
     start_time = time.time()
+    background_path, vocals_path = process_uvr_task(
+        mdxnet_models_dir=mdxnet_models_dir,
+        input_file_path=audio_file,
+        output_dir=output_dir,
+        )
+    end_time = time.time()
+    execution_time = end_time - start_time
+    outputs.append(str(background_path))
+    outputs.append(str(vocals_path))
+    return outputs
 def get_gui(theme, title, description):

mdx_models/model_data_v2.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals",
+        "name": "UVR-MDX-NET-Voc_FT.onnx"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "name": "UVR_MDXNET_KARA_2.onnx"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Other",
+        "name": "Reverb_HQ_By_FoxJoy.onnx"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_3.onnx"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_2.onnx"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_4.onnx"
+    }
+}

mdxnet_model.py CHANGED Viewed

@@ -5,20 +5,20 @@ import onnxruntime as ort
 import hashlib
 import queue
 import threading
 from tqdm import tqdm
 class MDXModel:
-    def __init__(
-        self,
-        device,
-        dim_f,
-        dim_t,
-        n_fft,
-        hop=1024,
-        stem_name=None,
-        compensation=1.000,
-    ):
         self.dim_f = dim_f  # frequency bins
         self.dim_t = dim_t
         self.dim_c = 4
@@ -92,7 +92,7 @@ class MDX:
     DEFAULT_CHUNK_SIZE = 0 * DEFAULT_SR
     DEFAULT_MARGIN_SIZE = 1 * DEFAULT_SR
-    def __init__(self, model_path: str, params: MDXModel, processor=0):
         # Set the device and the provider (CPU or CUDA)
         self.device = (
             torch.device(f"cuda:{processor}")
@@ -121,7 +121,7 @@ class MDX:
         self.prog = None
     @staticmethod
-    def get_hash(model_path: str) -> str:
         try:
             with open(model_path, "rb") as f:
                 f.seek(-10000 * 1024, 2)
@@ -132,12 +132,11 @@ class MDX:
         return model_hash
     @staticmethod
-    def segment(
-        wave,
-        combine=True,
-        chunk_size=DEFAULT_CHUNK_SIZE,
-        margin_size=DEFAULT_MARGIN_SIZE,
-    ):
         """
         Segment or join segmented wave array
@@ -192,7 +191,7 @@ class MDX:
         return processed_wave
-    def pad_wave(self, wave):
         """
         Pad the wave array to match the required chunk size
@@ -232,7 +231,7 @@ class MDX:
         return mix_waves, pad, trim
-    def _process_wave(self, mix_waves, trim, pad, q: queue.Queue, _id: int):
         """
         Process each wave segment in a multi-threaded environment
@@ -268,7 +267,7 @@ class MDX:
         q.put({_id: processed_signal})
         return processed_signal
-    def process_wave(self, wave: np.array, mt_threads=1):
         """
         Process the wave array in a multi-threaded environment

 import hashlib
 import queue
 import threading
+from pathlib import Path
 from tqdm import tqdm
+from typing import Tuple
 class MDXModel:
+    def __init__(self,
+                 device: torch.device,
+                 dim_f: int,
+                 dim_t: int,
+                 n_fft: int,
+                 hop: int = 1024,
+                 stem_name: str = "Vocals",
+                 compensation: float = 1.000,):
         self.dim_f = dim_f  # frequency bins
         self.dim_t = dim_t
         self.dim_c = 4
     DEFAULT_CHUNK_SIZE = 0 * DEFAULT_SR
     DEFAULT_MARGIN_SIZE = 1 * DEFAULT_SR
+    def __init__(self, model_path: Path, params: MDXModel, processor: int = 0):
         # Set the device and the provider (CPU or CUDA)
         self.device = (
             torch.device(f"cuda:{processor}")
         self.prog = None
     @staticmethod
+    def get_hash(model_path: Path) -> str:
         try:
             with open(model_path, "rb") as f:
                 f.seek(-10000 * 1024, 2)
         return model_hash
     @staticmethod
+    def segment(wave: np.array,
+                combine: bool = True,
+                chunk_size: int = DEFAULT_CHUNK_SIZE,
+                margin_size: int = DEFAULT_MARGIN_SIZE,
+                ) -> np.array:
         """
         Segment or join segmented wave array
         return processed_wave
+    def pad_wave(self, wave: np.array) -> Tuple[np.array, int, int]:
         """
         Pad the wave array to match the required chunk size
         return mix_waves, pad, trim
+    def _process_wave(self, mix_waves, trim, pad, q: queue.Queue, _id: int) -> np.array:
         """
         Process each wave segment in a multi-threaded environment
         q.put({_id: processed_signal})
         return processed_signal
+    def process_wave(self, wave: np.array, mt_threads=1) -> np.array:
         """
         Process the wave array in a multi-threaded environment

pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+[project]
+name = "vocal-bgm-separator"
+version = "0.1.0"
+description = ""
+authors = [
+    {name = "Zhiliang Zhou",email = "[email protected]"}
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.12,<4.0"
+dependencies = [
+    "gradio (>=5.23.0,<6.0.0)",
+    "demucs (>=4.0.1,<5.0.0)",
+    "torch (>=2.6.0,<3.0.0)",
+    "torchaudio (>=2.6.0,<3.0.0)",
+    "llvmlite (>=0.44.0,<0.45.0)",  # must install before librose on Mac
+    "librosa (>=0.11.0,<0.12.0)",  # A python package for music and audio analysis.
+    "soundfile (>=0.13.1,<0.14.0)",
+    "pedalboard (>=0.9.16,<0.10.0)"  # pedalboard is a Python library for adding effects to audio from spotify
+]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+[tool.poetry.group.dev.dependencies]
+spaces = "^0.34.0"
+onnxruntime = "^1.21.0"
+gradio-client = "^1.8.0"
+jupyter = "^1.1.1"
+qtconsole = "^5.6.1"
+pyqt5 = "^5.15.11"
+dotenv = "^0.9.9"

utils.py CHANGED Viewed

@@ -1,152 +1,33 @@
 # reference: https://huggingface.co/spaces/r3gm/Audio_separator
-import os, zipfile, shutil, subprocess, shlex, sys # noqa
 from urllib.parse import urlparse
-import re
-import logging
-import hashlib
-def load_file_from_url(
-    url: str,
-    model_dir: str,
-    file_name: str | None = None,
-    overwrite: bool = False,
-    progress: bool = True,
-) -> str:
-    """Download a file from `url` into `model_dir`,
-    using the file present if possible.
-    Returns the path to the downloaded file.
-    """
-    os.makedirs(model_dir, exist_ok=True)
-    if not file_name:
-        parts = urlparse(url)
-        file_name = os.path.basename(parts.path)
-    cached_file = os.path.abspath(os.path.join(model_dir, file_name))
-    # Overwrite
-    if os.path.exists(cached_file):
-        if overwrite or os.path.getsize(cached_file) == 0:
-            remove_files(cached_file)
-    # Download
-    if not os.path.exists(cached_file):
-        logger.info(f'Downloading: "{url}" to {cached_file}\n')
-        from torch.hub import download_url_to_file
-        download_url_to_file(url, cached_file, progress=progress)
-    else:
-        logger.debug(cached_file)
-    return cached_file
-def friendly_name(file: str):
-    if file.startswith("http"):
-        file = urlparse(file).path
-    file = os.path.basename(file)
-    model_name, extension = os.path.splitext(file)
-    return model_name, extension
-def download_manager(
-    url: str,
-    path: str,
-    extension: str = "",
-    overwrite: bool = False,
-    progress: bool = True,
-):
-    url = url.strip()
-    name, ext = friendly_name(url)
-    name += ext if not extension else f".{extension}"
-    if url.startswith("http"):
-        filename = load_file_from_url(
-            url=url,
-            model_dir=path,
-            file_name=name,
-            overwrite=overwrite,
-            progress=progress,
         )
     else:
-        filename = path
-    return filename
-def remove_files(file_list):
-    if isinstance(file_list, str):
-        file_list = [file_list]
-    for file in file_list:
-        if os.path.exists(file):
-            os.remove(file)
-def remove_directory_contents(directory_path):
-    """
-    Removes all files and subdirectories within a directory.
-    Parameters:
-    directory_path (str): Path to the directory whose
-    contents need to be removed.
-    """
-    if os.path.exists(directory_path):
-        for filename in os.listdir(directory_path):
-            file_path = os.path.join(directory_path, filename)
-            try:
-                if os.path.isfile(file_path):
-                    os.remove(file_path)
-                elif os.path.isdir(file_path):
-                    shutil.rmtree(file_path)
-            except Exception as e:
-                logger.error(f"Failed to delete {file_path}. Reason: {e}")
-        logger.info(f"Content in '{directory_path}' removed.")
-    else:
-        logger.error(f"Directory '{directory_path}' does not exist.")
-# Create directory if not exists
-def create_directories(directory_path):
-    if isinstance(directory_path, str):
-        directory_path = [directory_path]
-    for one_dir_path in directory_path:
-        if not os.path.exists(one_dir_path):
-            os.makedirs(one_dir_path)
-            logger.debug(f"Directory '{one_dir_path}' created.")
-def setup_logger(name_log):
-    logger = logging.getLogger(name_log)
-    logger.setLevel(logging.INFO)
-    _default_handler = logging.StreamHandler()  # Set sys.stderr as stream.
-    _default_handler.flush = sys.stderr.flush
-    logger.addHandler(_default_handler)
-    logger.propagate = False
-    handlers = logger.handlers
-    for handler in handlers:
-        formatter = logging.Formatter("[%(levelname)s] >> %(message)s")
-        handler.setFormatter(formatter)
-    # logger.handlers
-    return logger
-logger = setup_logger("ss")
-logger.setLevel(logging.INFO)
-def get_hash(filepath):
-    with open(filepath, 'rb') as f:
-        file_hash = hashlib.blake2b()
-        while chunk := f.read(8192):
-            file_hash.update(chunk)
-    return file_hash.hexdigest()[:18]

 # reference: https://huggingface.co/spaces/r3gm/Audio_separator
+import subprocess, shlex, sys # noqa
 from urllib.parse import urlparse
+import librosa
+import numpy as np
+from pathlib import Path
+def convert_to_stereo_and_wav(audio_path: Path) -> Path:
+    wave, sr = librosa.load(str(audio_path), mono=False, sr=44100)
+    # check if mono
+    if type(wave[0]) != np.ndarray or audio_path.suffix != ".wav": # noqa
+        stereo_path = audio_path.with_name(audio_path.stem + "_stereo.wav")
+        command = shlex.split(
+            f'ffmpeg -y -loglevel error -i "{str(audio_path)}" -ac 2 -f wav "{str(stereo_path)}"'
         )
+        sub_params = {
+            "stdout": subprocess.PIPE,
+            "stderr": subprocess.PIPE,
+            "creationflags": subprocess.CREATE_NO_WINDOW
+            if sys.platform == "win32"
+            else 0,
+        }
+        process_wav = subprocess.Popen(command, **sub_params)
+        output, errors = process_wav.communicate()
+        if process_wav.returncode != 0 or not stereo_path.exists():
+            raise Exception("Error processing audio to stereo wav")
+        return stereo_path
     else:
+        return Path(audio_path)

uvr_processing.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import torch
+import json
+import gc
+import spaces
+import librosa
+import soundfile as sf
+import numpy as np
+from pathlib import Path
+from typing import Dict, Tuple
+from utils import convert_to_stereo_and_wav
+from mdxnet_model import MDX, MDXModel
+import time
+STEM_NAMING = {
+    "Vocals": "Instrumental",
+    "Other": "Instruments",
+    "Instrumental": "Vocals",
+    "Drums": "Drumless",
+    "Bass": "Bassless",
+}
+@spaces.GPU()
+def run_mdx(model_params: Dict,
+            input_filename: Path,
+            output_dir: Path,
+            model_path: Path,
+            denoise: bool = False,
+            m_threads: int = 2,
+            device_base: str = "cuda",
+            ) -> Tuple[str, str]:
+    """
+    使用MDX模型分离人声
+    """
+    if device_base == "cuda":
+        device = torch.device("cuda:0")
+        processor_num = 0
+        device_properties = torch.cuda.get_device_properties(device)
+        vram_gb = device_properties.total_memory / 1024**3
+        m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
+    else:
+        device = torch.device("cpu")
+        processor_num = -1
+        m_threads = 1
+    model_hash = MDX.get_hash(model_path)  # type: str
+    mp = model_params.get(model_hash)
+    model = MDXModel(
+        device,
+        dim_f=mp["mdx_dim_f_set"],
+        dim_t=2 ** mp["mdx_dim_t_set"],
+        n_fft=mp["mdx_n_fft_scale_set"],
+        stem_name=mp["primary_stem"],
+        compensation=mp["compensate"],
+    )
+    mdx_sess = MDX(model_path, model, processor=processor_num)
+    wave, sr = librosa.load(input_filename, mono=False, sr=44100)
+    # normalizing input wave gives better output
+    peak = max(np.max(wave), abs(np.min(wave)))
+    wave /= peak
+    if denoise:
+        wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (mdx_sess.process_wave(wave, m_threads))  # type: np.array
+        wave_processed *= 0.5
+    else:
+        wave_processed = mdx_sess.process_wave(wave, m_threads)
+    # return to previous peak
+    wave_processed *= peak
+    stem_name = model.stem_name
+    # output main track
+    main_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}.wav")
+    sf.write(main_filepath, wave_processed.T, sr)
+    # output reverse track
+    invert_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}_reverse.wav")
+    sf.write(invert_filepath, (-wave_processed.T * model.compensation) + wave.T, sr)
+    del mdx_sess, wave_processed, wave
+    gc.collect()
+    torch.cuda.empty_cache()
+    return main_filepath, invert_filepath
+def run_mdx_cpu(model_params: Dict,
+                input_filename: Path,
+                output_dir: Path,
+                model_path: Path,
+                denoise: bool = False,
+                m_threads: int = 2,
+                device_base: str = ""):
+    m_threads = 1
+    duration = librosa.get_duration(filename=input_filename)
+    if duration >= 60 and duration <= 120:
+        m_threads = 8
+    elif duration > 120:
+        m_threads = 16
+    model_hash = MDX.get_hash(model_path)
+    device = torch.device("cpu")
+    processor_num = -1
+    mp = model_params.get(model_hash)
+    model = MDXModel(
+        device,
+        dim_f=mp["mdx_dim_f_set"],
+        dim_t=2 ** mp["mdx_dim_t_set"],
+        n_fft=mp["mdx_n_fft_scale_set"],
+        stem_name=mp["primary_stem"],
+        compensation=mp["compensate"],
+    )
+    mdx_sess = MDX(model_path, model, processor=processor_num)
+    wave, sr = librosa.load(input_filename, mono=False, sr=44100)
+    # normalizing input wave gives better output
+    peak = max(np.max(wave), abs(np.min(wave)))
+    wave /= peak
+    if denoise:
+        wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (
+            mdx_sess.process_wave(wave, m_threads)
+        )
+        wave_processed *= 0.5
+    else:
+        wave_processed = mdx_sess.process_wave(wave, m_threads)
+    # return to previous peak
+    wave_processed *= peak
+    stem_name = model.stem_name
+    # output main track
+    main_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}.wav")
+    sf.write(main_filepath, wave_processed.T, sr)
+    # output reverse track
+    invert_filepath = output_dir / input_filename.with_name(f"{input_filename.stem}_{stem_name}_reverse.wav")
+    sf.write(invert_filepath, (-wave_processed.T * model.compensation) + wave.T, sr)
+    del mdx_sess, wave_processed, wave
+    gc.collect()
+    torch.cuda.empty_cache()
+    return main_filepath, invert_filepath
+def extract_bgm(mdx_model_params: Dict,
+                input_filename: Path,
+                mdxnet_models_dir: Path,
+                output_dir: Path,
+                device_base: str = "cuda") -> Path:
+    """
+    提取纯音乐背景，去除人声部分
+    """
+    background_path, _ = run_mdx(model_params=mdx_model_params,
+                                  input_filename=input_filename,
+                                  output_dir=output_dir,
+                                  model_path=mdxnet_models_dir/"UVR-MDX-NET-Inst_HQ_3.onnx",
+                                  denoise=False,
+                                  device_base=device_base,
+                                  )
+    return background_path
+def extract_vocal(mdx_model_params: Dict,
+                  input_filename: Path,
+                  mdxnet_models_dir: Path,
+                  output_dir: Path,
+                  main_vocals_flag: bool = False,
+                  dereverb_flag: bool = False,
+                  device_base: str = "cuda") -> Path:
+    """
+    提取人声
+    """
+    # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
+    vocals_path, _ = run_mdx(mdx_model_params,
+                             input_filename,
+                             output_dir,
+                             mdxnet_models_dir/"UVR-MDX-NET-Voc_FT.onnx",
+                             denoise=True,
+                             device_base=device_base,
+                             )
+    # 如果开启"main_vocals_flag"，则使用UVR_MDXNET_KARA_2.onnx进一步分成主人声(Main)和和声/背景声(Backup)
+    if main_vocals_flag:
+        time.sleep(2)
+        backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
+                                                       output_dir,
+                                                       mdxnet_models_dir/"UVR_MDXNET_KARA_2.onnx",
+                                                       vocals_path,
+                                                       denoise=True,
+                                                       device_base=device_base,
+                                                       )
+        vocals_path = main_vocals_path
+    # 如果开启"dereverb_flag"，则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
+    if dereverb_flag:
+        time.sleep(2)
+        _, vocals_dereverb_path = run_mdx(mdx_model_params,
+                                          output_dir,
+                                          mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
+                                          vocals_path,
+                                          denoise=True,
+                                          device_base=device_base,
+                                          )
+        vocals_path = vocals_dereverb_path
+    return vocals_path
+def process_uvr_task(mdxnet_models_dir: Path,
+                     input_file_path: Path,
+                     output_dir: Path,
+                     main_vocals_flag: bool = False,  # 如果开启"Main"，则使用UVR_MDXNET_KARA_2.onnx进一步分离主副人声
+                     dereverb_flag: bool = False,  # 如果开启"DeReverb"，则使用Reverb_HQ_By_FoxJoy.onnx进行去混响
+                     ) -> Tuple[Path, Path]:
+    device_base = "cuda" if torch.cuda.is_available() else "cpu"
+    # load mdx model definition
+    with open(mdxnet_models_dir/"model_data_v2.json") as infile:
+        mdx_model_params = json.load(infile)  # type: Dict
+    output_dir.mkdir(parents=True, exist_ok=True)
+    input_file_path = convert_to_stereo_and_wav(input_file_path)  # type: Path
+    # 1. 提取纯音乐背景，去除人声部分
+    background_path = extract_bgm(mdx_model_params,
+                                  input_file_path,
+                                  mdxnet_models_dir,
+                                  output_dir,
+                                  device_base=device_base)
+    # 2. 分离人声
+    # 首先使用 UVR-MDX-NET-Voc_FT.onnx 基础的人声分离模型
+    vocals_path = extract_vocal(mdx_model_params,
+                                input_file_path,
+                                mdxnet_models_dir,
+                                output_dir,
+                                main_vocals_flag=main_vocals_flag,
+                                dereverb_flag=dereverb_flag,
+                                device_base=device_base)
+    return background_path, vocals_path