lshzhm commited on
Commit
fe77cbc
·
verified ·
1 Parent(s): cf56b46

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import logging
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ import gradio as gr
7
+ import torch
8
+ import torchaudio
9
+ import os
10
+
11
+ import tempfile
12
+
13
+ log = logging.getLogger()
14
+
15
+
16
+ #@spaces.GPU(duration=120)
17
+ @torch.inference_mode()
18
+ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
19
+ cfg_strength: float, duration: float):
20
+
21
+ os.system("bash v2a.sh")
22
+
23
+ return "v2a"
24
+
25
+
26
+ video_to_audio_tab = gr.Interface(
27
+ fn=video_to_audio,
28
+ description="""
29
+ Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
30
+ Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
31
+ NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
32
+ Doing so does not improve results.
33
+ The model has been trained on 8-second videos. Using much longer or shorter videos will degrade performance. Around 5s~12s should be fine.
34
+ """,
35
+ inputs=[
36
+ gr.Video(),
37
+ gr.Text(label='Prompt'),
38
+ gr.Text(label='Negative prompt', value='music'),
39
+ gr.Number(label='Seed (-1: random)', value=-1, precision=0, minimum=-1),
40
+ gr.Number(label='Num steps', value=25, precision=0, minimum=1),
41
+ gr.Number(label='Guidance Strength', value=4.5, minimum=1),
42
+ gr.Number(label='Duration (sec)', value=8, minimum=1),
43
+ ],
44
+ outputs='playable_video',
45
+ cache_examples=False,
46
+ title='MMAudio — Video-to-Audio Synthesis',
47
+ examples=[
48
+ [
49
+ 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4',
50
+ 'waves, seagulls',
51
+ '',
52
+ 0,
53
+ 25,
54
+ 4.5,
55
+ 10,
56
+ ],
57
+ ])
58
+
59
+
60
+ if __name__ == "__main__":
61
+ gr.TabbedInterface([video_to_audio_tab],
62
+ ['Video-to-Audio']).launch()
63
+