Spaces:

lshzhm
/

DeepAudio-V1

Running

App Files Files Community

lshzhm commited on Mar 26

Commit

8a3c983

1 Parent(s): b61a5a8

gradio

Browse files

Files changed (2) hide show

F5-TTS/src/f5_tts/infer/infer_cli_test.py +2 -0
app.py +10 -4

F5-TTS/src/f5_tts/infer/infer_cli_test.py CHANGED Viewed

@@ -257,6 +257,8 @@ sway_sampling_coef = args.sway_sampling_coef or config.get("sway_sampling_coef",
 speed = args.speed or config.get("speed", speed)
 fix_duration = args.fix_duration or config.get("fix_duration", fix_duration)
 # patches for pip pkg user
 if "infer/examples/" in ref_audio:

 speed = args.speed or config.get("speed", speed)
 fix_duration = args.fix_duration or config.get("fix_duration", fix_duration)
+print("############nfe_step", nfe_step)
 # patches for pip pkg user
 if "infer/examples/" in ref_audio:

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ log = logging.getLogger()
 #@spaces.GPU(duration=120)
-def video_to_audio_and_speech(video: gr.Video, prompt: str, text: str, audio_prompt: gr.Audio, text_prompt: str):
     video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
@@ -65,15 +65,15 @@ def video_to_audio_and_speech(video: gr.Video, prompt: str, text: str, audio_pro
         shutil.copy(audio_prompt, audio_p_path)
     if prompt == "":
-        command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --calc_energy 1" % (output_dir, video_path)
     else:
-        command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --prompt %s --calc_energy 1" % (output_dir, video_path, prompt)
     print("v2a command", command)
     os.system(command)
     video_gen = video_save_path[:-4]+".mp4.gen.mp4"
-    command = "python ./F5-TTS/src/f5_tts/infer/infer_cli_test.py --output_dir %s --start 0 --end 1 --ckpt_file ./F5-TTS/ckpts/v2c/v2c_s44.pt --v2a_path %s --wav_p %s --txt_p \"%s\" --video %s --v2a_wav %s --txt \"%s\"" % (output_dir, output_dir, audio_p_path, text_prompt, video_save_path, video_save_path[:-4]+".flac", text)
     print("v2s command", command, video_gen)
     os.system(command)
@@ -89,9 +89,11 @@ video_to_audio_and_speech_tab = gr.Interface(
     inputs=[
         gr.Video(label="Input Video"),
         gr.Text(label='Video-to-Audio Text Prompt'),
         gr.Text(label='Video-to-Speech Transcription'),
         gr.Audio(label='Video-to-Speech Speech Prompt'),
         gr.Text(label='Video-to-Speech Speech Prompt Transcription'),
     ],
     outputs=[
         gr.Video(label="Video-to-Audio Output"),
@@ -103,16 +105,20 @@ video_to_audio_and_speech_tab = gr.Interface(
         [
             './tests/0235.mp4',
             '',
             "Who finally decided to show up for work Yay",
             './tests/Gobber-00-0778.wav',
             "I've still got a few knocking around in here",
         ],
         [
             './tests/0778.mp4',
             '',
             "I've still got a few knocking around in here",
             './tests/Gobber-00-0235.wav',
             "Who finally decided to show up for work Yay",
         ],
     ])

 #@spaces.GPU(duration=120)
+def video_to_audio_and_speech(video: gr.Video, prompt: str, v2a_num_steps: int, text: str, audio_prompt: gr.Audio, text_prompt: str, v2s_num_steps: int):
     video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
         shutil.copy(audio_prompt, audio_p_path)
     if prompt == "":
+        command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --calc_energy 1 --num_steps %d" % (output_dir, video_path, v2a_num_steps)
     else:
+        command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --prompt %s --calc_energy 1 --num_steps %d" % (output_dir, video_path, prompt, v2a_num_steps)
     print("v2a command", command)
     os.system(command)
     video_gen = video_save_path[:-4]+".mp4.gen.mp4"
+    command = "python ./F5-TTS/src/f5_tts/infer/infer_cli_test.py --output_dir %s --start 0 --end 1 --ckpt_file ./F5-TTS/ckpts/v2c/v2c_s44.pt --v2a_path %s --wav_p %s --txt_p \"%s\" --video %s --v2a_wav %s --txt \"%s\" --nfe_step %d" % (output_dir, output_dir, audio_p_path, text_prompt, video_save_path, video_save_path[:-4]+".flac", text, v2s_num_steps)
     print("v2s command", command, video_gen)
     os.system(command)
     inputs=[
         gr.Video(label="Input Video"),
         gr.Text(label='Video-to-Audio Text Prompt'),
+        gr.Number(label='Video-to-Audio Num Steps', value=25, precision=0, minimum=1),
         gr.Text(label='Video-to-Speech Transcription'),
         gr.Audio(label='Video-to-Speech Speech Prompt'),
         gr.Text(label='Video-to-Speech Speech Prompt Transcription'),
+        gr.Number(label='Video-to-Speech Num Steps', value=32, precision=0, minimum=1),
     ],
     outputs=[
         gr.Video(label="Video-to-Audio Output"),
         [
             './tests/0235.mp4',
             '',
+            25,
             "Who finally decided to show up for work Yay",
             './tests/Gobber-00-0778.wav',
             "I've still got a few knocking around in here",
+            32,
         ],
         [
             './tests/0778.mp4',
             '',
+            25,
             "I've still got a few knocking around in here",
             './tests/Gobber-00-0235.wav',
             "Who finally decided to show up for work Yay",
+            32,
         ],
     ])