Spaces:
Running
Running
helllo world
Browse files- app.py +17 -146
- app_bak.py +150 -0
- requirements.txt +9 -7
app.py
CHANGED
@@ -1,150 +1,21 @@
|
|
1 |
-
# reference: https://huggingface.co/spaces/r3gm/Audio_separator
|
2 |
-
import gradio as gr
|
3 |
-
import shutil
|
4 |
-
import numpy as np
|
5 |
-
from pathlib import Path
|
6 |
import os
|
7 |
-
import
|
8 |
-
import
|
9 |
-
from huggingface_hub import hf_hub_download
|
10 |
-
from uvr_processing import process_uvr_task, run_mdx, get_model_params
|
11 |
-
from utils import convert_to_stereo_and_wav
|
12 |
-
import onnxruntime as ort
|
13 |
-
import io
|
14 |
-
|
15 |
-
|
16 |
-
MODEL_ID = "masszhou/mdxnet"
|
17 |
-
MODELS_PATH = {
|
18 |
-
"bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
19 |
-
"basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
20 |
-
"main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
|
21 |
-
}
|
22 |
-
|
23 |
-
def get_device_info():
|
24 |
-
if torch.cuda.is_available():
|
25 |
-
device = f"GPU ({torch.cuda.get_device_name(0)})"
|
26 |
-
else:
|
27 |
-
device = "CPU"
|
28 |
-
return f"Current running environment: {device}"
|
29 |
-
|
30 |
-
|
31 |
-
def inference(audio_file: str,
|
32 |
-
stem: str = "vocal",) -> list[str]:
|
33 |
-
# audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
|
34 |
-
|
35 |
-
if not audio_file:
|
36 |
-
raise ValueError("The audio path is missing.")
|
37 |
-
|
38 |
-
if not stem:
|
39 |
-
raise ValueError("Please select 'vocal' or 'background' stem.")
|
40 |
-
|
41 |
-
audio_file = Path(audio_file)
|
42 |
-
output_dir = Path("./output")
|
43 |
-
|
44 |
-
|
45 |
-
outputs = []
|
46 |
-
start_time = time.time()
|
47 |
-
background_path, vocals_path = process_uvr_task(
|
48 |
-
input_file_path=audio_file,
|
49 |
-
output_dir=output_dir,
|
50 |
-
models_path=MODELS_PATH,
|
51 |
-
)
|
52 |
-
end_time = time.time()
|
53 |
-
execution_time = end_time - start_time
|
54 |
-
print(f"Execution time: {execution_time} seconds")
|
55 |
-
|
56 |
-
print(f"Background file: {background_path}")
|
57 |
-
print(f"Vocals file: {vocals_path}")
|
58 |
-
os.makedirs("static/results", exist_ok=True)
|
59 |
-
|
60 |
-
# shutil.copy(background_path, bg_dst)
|
61 |
-
# shutil.copy(vocals_path, vc_dst)
|
62 |
-
|
63 |
-
outputs.append(str(background_path))
|
64 |
-
outputs.append(str(vocals_path))
|
65 |
-
|
66 |
-
return outputs
|
67 |
-
|
68 |
-
|
69 |
-
def inference_bgm(audio_file: str) -> list[str]:
|
70 |
-
mdx_model_params = get_model_params(Path("./mdx_models"))
|
71 |
-
audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
|
72 |
-
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
73 |
-
output_dir = Path("./output")
|
74 |
-
model_bgm_path = MODELS_PATH["bgm"]
|
75 |
-
background_path, _ = run_mdx(model_params=mdx_model_params,
|
76 |
-
input_filename=audio_file,
|
77 |
-
output_dir=output_dir,
|
78 |
-
model_path=model_bgm_path,
|
79 |
-
denoise=False,
|
80 |
-
device_base=device_base,
|
81 |
-
)
|
82 |
-
|
83 |
-
return [str(background_path)]
|
84 |
|
85 |
|
86 |
-
def
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
# audio_input = gr.Audio(label="Audio file", type="filepath")
|
103 |
-
# download_button = gr.Button("Inference")
|
104 |
-
# file_output = gr.File(label="Result", file_count="multiple", interactive=False)
|
105 |
-
|
106 |
-
# download_button.click(
|
107 |
-
# inference,
|
108 |
-
# inputs=[audio_input],
|
109 |
-
# outputs=[file_output],
|
110 |
-
# )
|
111 |
-
|
112 |
-
audio_input = gr.File(file_types=[".mp3", ".wav"], label="上传音频")
|
113 |
-
download_btn = gr.DownloadData(return_original_file, audio_input)
|
114 |
-
|
115 |
-
return app
|
116 |
-
|
117 |
-
|
118 |
-
if __name__ == "__main__":
|
119 |
-
title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
|
120 |
-
description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
|
121 |
-
theme = "NoCrypt/miku"
|
122 |
-
|
123 |
-
model_id = "masszhou/mdxnet"
|
124 |
-
models_path = {
|
125 |
-
"bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
126 |
-
"basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
127 |
-
"main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
|
128 |
-
}
|
129 |
-
print(f"ort.get_available_providers(): {ort.get_available_providers()}")
|
130 |
-
print(gr.__version__)
|
131 |
-
|
132 |
-
# entry point for GUI
|
133 |
-
# predict(audio_file, api_name="/inference") -> result
|
134 |
-
app_gui = get_gui(theme, title, description)
|
135 |
-
|
136 |
-
# entry point for API
|
137 |
-
# predict(audio_file, api_name="/predict") -> output
|
138 |
-
app_api = gr.Interface(
|
139 |
-
fn=inference_bgm,
|
140 |
-
inputs=gr.Audio(type="filepath"),
|
141 |
-
outputs=gr.File(file_count="multiple"),
|
142 |
-
)
|
143 |
-
|
144 |
-
app = gr.TabbedInterface(
|
145 |
-
interface_list=[app_gui, app_api],
|
146 |
-
tab_names=["GUI", "API"]
|
147 |
-
)
|
148 |
-
|
149 |
-
app.queue(default_concurrency_limit=40)
|
150 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import gradio as gr
|
3 |
+
from scipy.io.wavfile import write
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
+
def inference(audio):
|
7 |
+
os.makedirs("out", exist_ok=True)
|
8 |
+
write('test.wav', audio[0], audio[1])
|
9 |
+
os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals test.wav -o out")
|
10 |
+
return "./out/htdemucs/test/vocals.wav","./out/htdemucs/test/no_vocals.wav"
|
11 |
|
12 |
+
title = "Demucs Music Source Separation (v4)"
|
13 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1911.13254' target='_blank'>Music Source Separation in the Waveform Domain</a> | <a href='https://github.com/facebookresearch/demucs' target='_blank'>Github Repo</a> | <a href='https://www.thafx.com' target='_blank'>//THAFX</a></p>"
|
14 |
+
|
15 |
+
gr.Interface(
|
16 |
+
inference,
|
17 |
+
gr.Audio(type="numpy", label="Input"),
|
18 |
+
[gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="No Vocals / Instrumental")],
|
19 |
+
title=title,
|
20 |
+
article=article,
|
21 |
+
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_bak.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# reference: https://huggingface.co/spaces/r3gm/Audio_separator
|
2 |
+
import gradio as gr
|
3 |
+
import shutil
|
4 |
+
import numpy as np
|
5 |
+
from pathlib import Path
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
import torch
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
+
from uvr_processing import process_uvr_task, run_mdx, get_model_params
|
11 |
+
from utils import convert_to_stereo_and_wav
|
12 |
+
import onnxruntime as ort
|
13 |
+
import io
|
14 |
+
|
15 |
+
|
16 |
+
MODEL_ID = "masszhou/mdxnet"
|
17 |
+
MODELS_PATH = {
|
18 |
+
"bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
19 |
+
"basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
20 |
+
"main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
|
21 |
+
}
|
22 |
+
|
23 |
+
def get_device_info():
|
24 |
+
if torch.cuda.is_available():
|
25 |
+
device = f"GPU ({torch.cuda.get_device_name(0)})"
|
26 |
+
else:
|
27 |
+
device = "CPU"
|
28 |
+
return f"Current running environment: {device}"
|
29 |
+
|
30 |
+
|
31 |
+
def inference(audio_file: str,
|
32 |
+
stem: str = "vocal",) -> list[str]:
|
33 |
+
# audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
|
34 |
+
|
35 |
+
if not audio_file:
|
36 |
+
raise ValueError("The audio path is missing.")
|
37 |
+
|
38 |
+
if not stem:
|
39 |
+
raise ValueError("Please select 'vocal' or 'background' stem.")
|
40 |
+
|
41 |
+
audio_file = Path(audio_file)
|
42 |
+
output_dir = Path("./output")
|
43 |
+
|
44 |
+
|
45 |
+
outputs = []
|
46 |
+
start_time = time.time()
|
47 |
+
background_path, vocals_path = process_uvr_task(
|
48 |
+
input_file_path=audio_file,
|
49 |
+
output_dir=output_dir,
|
50 |
+
models_path=MODELS_PATH,
|
51 |
+
)
|
52 |
+
end_time = time.time()
|
53 |
+
execution_time = end_time - start_time
|
54 |
+
print(f"Execution time: {execution_time} seconds")
|
55 |
+
|
56 |
+
print(f"Background file: {background_path}")
|
57 |
+
print(f"Vocals file: {vocals_path}")
|
58 |
+
os.makedirs("static/results", exist_ok=True)
|
59 |
+
|
60 |
+
# shutil.copy(background_path, bg_dst)
|
61 |
+
# shutil.copy(vocals_path, vc_dst)
|
62 |
+
|
63 |
+
outputs.append(str(background_path))
|
64 |
+
outputs.append(str(vocals_path))
|
65 |
+
|
66 |
+
return outputs
|
67 |
+
|
68 |
+
|
69 |
+
def inference_bgm(audio_file: str) -> list[str]:
|
70 |
+
mdx_model_params = get_model_params(Path("./mdx_models"))
|
71 |
+
audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
|
72 |
+
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
73 |
+
output_dir = Path("./output")
|
74 |
+
model_bgm_path = MODELS_PATH["bgm"]
|
75 |
+
background_path, _ = run_mdx(model_params=mdx_model_params,
|
76 |
+
input_filename=audio_file,
|
77 |
+
output_dir=output_dir,
|
78 |
+
model_path=model_bgm_path,
|
79 |
+
denoise=False,
|
80 |
+
device_base=device_base,
|
81 |
+
)
|
82 |
+
|
83 |
+
return [str(background_path)]
|
84 |
+
|
85 |
+
|
86 |
+
def return_original_file(file):
|
87 |
+
# 读取原始文件名和内容
|
88 |
+
filename = os.path.basename(file.name)
|
89 |
+
with open(file.path, "rb") as f:
|
90 |
+
content = f.read()
|
91 |
+
|
92 |
+
return (filename, content)
|
93 |
+
|
94 |
+
|
95 |
+
def get_gui(theme, title, description):
|
96 |
+
with gr.Blocks(theme=theme) as app:
|
97 |
+
# Add title and description
|
98 |
+
gr.Markdown(title)
|
99 |
+
gr.Markdown(description)
|
100 |
+
gr.Markdown(get_device_info())
|
101 |
+
|
102 |
+
# audio_input = gr.Audio(label="Audio file", type="filepath")
|
103 |
+
# download_button = gr.Button("Inference")
|
104 |
+
# file_output = gr.File(label="Result", file_count="multiple", interactive=False)
|
105 |
+
|
106 |
+
# download_button.click(
|
107 |
+
# inference,
|
108 |
+
# inputs=[audio_input],
|
109 |
+
# outputs=[file_output],
|
110 |
+
# )
|
111 |
+
|
112 |
+
audio_input = gr.File(file_types=[".mp3", ".wav"], label="上传音频")
|
113 |
+
download_btn = gr.DownloadData(return_original_file, audio_input)
|
114 |
+
|
115 |
+
return app
|
116 |
+
|
117 |
+
|
118 |
+
if __name__ == "__main__":
|
119 |
+
title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
|
120 |
+
description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
|
121 |
+
theme = "NoCrypt/miku"
|
122 |
+
|
123 |
+
model_id = "masszhou/mdxnet"
|
124 |
+
models_path = {
|
125 |
+
"bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
126 |
+
"basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
127 |
+
"main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
|
128 |
+
}
|
129 |
+
print(f"ort.get_available_providers(): {ort.get_available_providers()}")
|
130 |
+
print(gr.__version__)
|
131 |
+
|
132 |
+
# entry point for GUI
|
133 |
+
# predict(audio_file, api_name="/inference") -> result
|
134 |
+
app_gui = get_gui(theme, title, description)
|
135 |
+
|
136 |
+
# entry point for API
|
137 |
+
# predict(audio_file, api_name="/predict") -> output
|
138 |
+
app_api = gr.Interface(
|
139 |
+
fn=inference_bgm,
|
140 |
+
inputs=gr.Audio(type="filepath"),
|
141 |
+
outputs=gr.File(file_count="multiple"),
|
142 |
+
)
|
143 |
+
|
144 |
+
app = gr.TabbedInterface(
|
145 |
+
interface_list=[app_gui, app_api],
|
146 |
+
tab_names=["GUI", "API"]
|
147 |
+
)
|
148 |
+
|
149 |
+
app.queue(default_concurrency_limit=40)
|
150 |
+
app.launch()
|
requirements.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
-
gradio
|
2 |
-
torch
|
3 |
-
torchaudio
|
4 |
-
librosa
|
5 |
-
onnxruntime
|
6 |
-
numpy
|
7 |
-
tqdm
|
|
|
|
|
|
1 |
+
gradio==5.23.0
|
2 |
+
torch==2.6.0
|
3 |
+
torchaudio==2.6.0
|
4 |
+
librosa==0.11.0
|
5 |
+
onnxruntime==onnxruntime
|
6 |
+
numpy==2.2.4
|
7 |
+
tqdm==4.67.1
|
8 |
+
demucs==4.0.1
|
9 |
+
scipy==1.15.2
|