masszhou commited on
Commit
6af7d00
·
1 Parent(s): eb9f0f0

helllo world

Browse files
Files changed (3) hide show
  1. app.py +17 -146
  2. app_bak.py +150 -0
  3. requirements.txt +9 -7
app.py CHANGED
@@ -1,150 +1,21 @@
1
- # reference: https://huggingface.co/spaces/r3gm/Audio_separator
2
- import gradio as gr
3
- import shutil
4
- import numpy as np
5
- from pathlib import Path
6
  import os
7
- import time
8
- import torch
9
- from huggingface_hub import hf_hub_download
10
- from uvr_processing import process_uvr_task, run_mdx, get_model_params
11
- from utils import convert_to_stereo_and_wav
12
- import onnxruntime as ort
13
- import io
14
-
15
-
16
- MODEL_ID = "masszhou/mdxnet"
17
- MODELS_PATH = {
18
- "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
19
- "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
20
- "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
21
- }
22
-
23
- def get_device_info():
24
- if torch.cuda.is_available():
25
- device = f"GPU ({torch.cuda.get_device_name(0)})"
26
- else:
27
- device = "CPU"
28
- return f"Current running environment: {device}"
29
-
30
-
31
- def inference(audio_file: str,
32
- stem: str = "vocal",) -> list[str]:
33
- # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
34
-
35
- if not audio_file:
36
- raise ValueError("The audio path is missing.")
37
-
38
- if not stem:
39
- raise ValueError("Please select 'vocal' or 'background' stem.")
40
-
41
- audio_file = Path(audio_file)
42
- output_dir = Path("./output")
43
-
44
-
45
- outputs = []
46
- start_time = time.time()
47
- background_path, vocals_path = process_uvr_task(
48
- input_file_path=audio_file,
49
- output_dir=output_dir,
50
- models_path=MODELS_PATH,
51
- )
52
- end_time = time.time()
53
- execution_time = end_time - start_time
54
- print(f"Execution time: {execution_time} seconds")
55
-
56
- print(f"Background file: {background_path}")
57
- print(f"Vocals file: {vocals_path}")
58
- os.makedirs("static/results", exist_ok=True)
59
-
60
- # shutil.copy(background_path, bg_dst)
61
- # shutil.copy(vocals_path, vc_dst)
62
-
63
- outputs.append(str(background_path))
64
- outputs.append(str(vocals_path))
65
-
66
- return outputs
67
-
68
-
69
- def inference_bgm(audio_file: str) -> list[str]:
70
- mdx_model_params = get_model_params(Path("./mdx_models"))
71
- audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
72
- device_base = "cuda" if torch.cuda.is_available() else "cpu"
73
- output_dir = Path("./output")
74
- model_bgm_path = MODELS_PATH["bgm"]
75
- background_path, _ = run_mdx(model_params=mdx_model_params,
76
- input_filename=audio_file,
77
- output_dir=output_dir,
78
- model_path=model_bgm_path,
79
- denoise=False,
80
- device_base=device_base,
81
- )
82
-
83
- return [str(background_path)]
84
 
85
 
86
- def return_original_file(file):
87
- # 读取原始文件名和内容
88
- filename = os.path.basename(file.name)
89
- with open(file.path, "rb") as f:
90
- content = f.read()
91
 
92
- return (filename, content)
93
-
94
-
95
- def get_gui(theme, title, description):
96
- with gr.Blocks(theme=theme) as app:
97
- # Add title and description
98
- gr.Markdown(title)
99
- gr.Markdown(description)
100
- gr.Markdown(get_device_info())
101
-
102
- # audio_input = gr.Audio(label="Audio file", type="filepath")
103
- # download_button = gr.Button("Inference")
104
- # file_output = gr.File(label="Result", file_count="multiple", interactive=False)
105
-
106
- # download_button.click(
107
- # inference,
108
- # inputs=[audio_input],
109
- # outputs=[file_output],
110
- # )
111
-
112
- audio_input = gr.File(file_types=[".mp3", ".wav"], label="上传音频")
113
- download_btn = gr.DownloadData(return_original_file, audio_input)
114
-
115
- return app
116
-
117
-
118
- if __name__ == "__main__":
119
- title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
120
- description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
121
- theme = "NoCrypt/miku"
122
-
123
- model_id = "masszhou/mdxnet"
124
- models_path = {
125
- "bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
126
- "basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
127
- "main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
128
- }
129
- print(f"ort.get_available_providers(): {ort.get_available_providers()}")
130
- print(gr.__version__)
131
-
132
- # entry point for GUI
133
- # predict(audio_file, api_name="/inference") -> result
134
- app_gui = get_gui(theme, title, description)
135
-
136
- # entry point for API
137
- # predict(audio_file, api_name="/predict") -> output
138
- app_api = gr.Interface(
139
- fn=inference_bgm,
140
- inputs=gr.Audio(type="filepath"),
141
- outputs=gr.File(file_count="multiple"),
142
- )
143
-
144
- app = gr.TabbedInterface(
145
- interface_list=[app_gui, app_api],
146
- tab_names=["GUI", "API"]
147
- )
148
-
149
- app.queue(default_concurrency_limit=40)
150
- app.launch()
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ from scipy.io.wavfile import write
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
+ def inference(audio):
7
+ os.makedirs("out", exist_ok=True)
8
+ write('test.wav', audio[0], audio[1])
9
+ os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals test.wav -o out")
10
+ return "./out/htdemucs/test/vocals.wav","./out/htdemucs/test/no_vocals.wav"
11
 
12
+ title = "Demucs Music Source Separation (v4)"
13
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1911.13254' target='_blank'>Music Source Separation in the Waveform Domain</a> | <a href='https://github.com/facebookresearch/demucs' target='_blank'>Github Repo</a> | <a href='https://www.thafx.com' target='_blank'>//THAFX</a></p>"
14
+
15
+ gr.Interface(
16
+ inference,
17
+ gr.Audio(type="numpy", label="Input"),
18
+ [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="No Vocals / Instrumental")],
19
+ title=title,
20
+ article=article,
21
+ ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_bak.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reference: https://huggingface.co/spaces/r3gm/Audio_separator
2
+ import gradio as gr
3
+ import shutil
4
+ import numpy as np
5
+ from pathlib import Path
6
+ import os
7
+ import time
8
+ import torch
9
+ from huggingface_hub import hf_hub_download
10
+ from uvr_processing import process_uvr_task, run_mdx, get_model_params
11
+ from utils import convert_to_stereo_and_wav
12
+ import onnxruntime as ort
13
+ import io
14
+
15
+
16
+ MODEL_ID = "masszhou/mdxnet"
17
+ MODELS_PATH = {
18
+ "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
19
+ "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
20
+ "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
21
+ }
22
+
23
+ def get_device_info():
24
+ if torch.cuda.is_available():
25
+ device = f"GPU ({torch.cuda.get_device_name(0)})"
26
+ else:
27
+ device = "CPU"
28
+ return f"Current running environment: {device}"
29
+
30
+
31
+ def inference(audio_file: str,
32
+ stem: str = "vocal",) -> list[str]:
33
+ # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
34
+
35
+ if not audio_file:
36
+ raise ValueError("The audio path is missing.")
37
+
38
+ if not stem:
39
+ raise ValueError("Please select 'vocal' or 'background' stem.")
40
+
41
+ audio_file = Path(audio_file)
42
+ output_dir = Path("./output")
43
+
44
+
45
+ outputs = []
46
+ start_time = time.time()
47
+ background_path, vocals_path = process_uvr_task(
48
+ input_file_path=audio_file,
49
+ output_dir=output_dir,
50
+ models_path=MODELS_PATH,
51
+ )
52
+ end_time = time.time()
53
+ execution_time = end_time - start_time
54
+ print(f"Execution time: {execution_time} seconds")
55
+
56
+ print(f"Background file: {background_path}")
57
+ print(f"Vocals file: {vocals_path}")
58
+ os.makedirs("static/results", exist_ok=True)
59
+
60
+ # shutil.copy(background_path, bg_dst)
61
+ # shutil.copy(vocals_path, vc_dst)
62
+
63
+ outputs.append(str(background_path))
64
+ outputs.append(str(vocals_path))
65
+
66
+ return outputs
67
+
68
+
69
+ def inference_bgm(audio_file: str) -> list[str]:
70
+ mdx_model_params = get_model_params(Path("./mdx_models"))
71
+ audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
72
+ device_base = "cuda" if torch.cuda.is_available() else "cpu"
73
+ output_dir = Path("./output")
74
+ model_bgm_path = MODELS_PATH["bgm"]
75
+ background_path, _ = run_mdx(model_params=mdx_model_params,
76
+ input_filename=audio_file,
77
+ output_dir=output_dir,
78
+ model_path=model_bgm_path,
79
+ denoise=False,
80
+ device_base=device_base,
81
+ )
82
+
83
+ return [str(background_path)]
84
+
85
+
86
+ def return_original_file(file):
87
+ # 读取原始文件名和内容
88
+ filename = os.path.basename(file.name)
89
+ with open(file.path, "rb") as f:
90
+ content = f.read()
91
+
92
+ return (filename, content)
93
+
94
+
95
+ def get_gui(theme, title, description):
96
+ with gr.Blocks(theme=theme) as app:
97
+ # Add title and description
98
+ gr.Markdown(title)
99
+ gr.Markdown(description)
100
+ gr.Markdown(get_device_info())
101
+
102
+ # audio_input = gr.Audio(label="Audio file", type="filepath")
103
+ # download_button = gr.Button("Inference")
104
+ # file_output = gr.File(label="Result", file_count="multiple", interactive=False)
105
+
106
+ # download_button.click(
107
+ # inference,
108
+ # inputs=[audio_input],
109
+ # outputs=[file_output],
110
+ # )
111
+
112
+ audio_input = gr.File(file_types=[".mp3", ".wav"], label="上传音频")
113
+ download_btn = gr.DownloadData(return_original_file, audio_input)
114
+
115
+ return app
116
+
117
+
118
+ if __name__ == "__main__":
119
+ title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
120
+ description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
121
+ theme = "NoCrypt/miku"
122
+
123
+ model_id = "masszhou/mdxnet"
124
+ models_path = {
125
+ "bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
126
+ "basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
127
+ "main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
128
+ }
129
+ print(f"ort.get_available_providers(): {ort.get_available_providers()}")
130
+ print(gr.__version__)
131
+
132
+ # entry point for GUI
133
+ # predict(audio_file, api_name="/inference") -> result
134
+ app_gui = get_gui(theme, title, description)
135
+
136
+ # entry point for API
137
+ # predict(audio_file, api_name="/predict") -> output
138
+ app_api = gr.Interface(
139
+ fn=inference_bgm,
140
+ inputs=gr.Audio(type="filepath"),
141
+ outputs=gr.File(file_count="multiple"),
142
+ )
143
+
144
+ app = gr.TabbedInterface(
145
+ interface_list=[app_gui, app_api],
146
+ tab_names=["GUI", "API"]
147
+ )
148
+
149
+ app.queue(default_concurrency_limit=40)
150
+ app.launch()
requirements.txt CHANGED
@@ -1,7 +1,9 @@
1
- gradio
2
- torch
3
- torchaudio
4
- librosa
5
- onnxruntime
6
- numpy
7
- tqdm
 
 
 
1
+ gradio==5.23.0
2
+ torch==2.6.0
3
+ torchaudio==2.6.0
4
+ librosa==0.11.0
5
+ onnxruntime==onnxruntime
6
+ numpy==2.2.4
7
+ tqdm==4.67.1
8
+ demucs==4.0.1
9
+ scipy==1.15.2