Spaces:

lshzhm
/

DeepAudio-V1

Running

App Files Files Community

lshzhm commited on Mar 24

Commit

fe77cbc

verified ·

1 Parent(s): cf56b46

Upload app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import spaces
+import logging
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+import torch
+import torchaudio
+import os
+import tempfile
+log = logging.getLogger()
+#@spaces.GPU(duration=120)
+@torch.inference_mode()
+def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
+                   cfg_strength: float, duration: float):
+    os.system("bash v2a.sh")
+    return "v2a"
+video_to_audio_tab = gr.Interface(
+    fn=video_to_audio,
+    description="""
+    Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
+    Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
+    NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
+    Doing so does not improve results.
+    The model has been trained on 8-second videos. Using much longer or shorter videos will degrade performance. Around 5s~12s should be fine.
+    """,
+    inputs=[
+        gr.Video(),
+        gr.Text(label='Prompt'),
+        gr.Text(label='Negative prompt', value='music'),
+        gr.Number(label='Seed (-1: random)', value=-1, precision=0, minimum=-1),
+        gr.Number(label='Num steps', value=25, precision=0, minimum=1),
+        gr.Number(label='Guidance Strength', value=4.5, minimum=1),
+        gr.Number(label='Duration (sec)', value=8, minimum=1),
+    ],
+    outputs='playable_video',
+    cache_examples=False,
+    title='MMAudio — Video-to-Audio Synthesis',
+    examples=[
+        [
+            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4',
+            'waves, seagulls',
+            '',
+            0,
+            25,
+            4.5,
+            10,
+        ],
+    ])
+if __name__ == "__main__":
+    gr.TabbedInterface([video_to_audio_tab],
+                       ['Video-to-Audio']).launch()