Spaces:

OpenSound
/

SoloAudio

Running on Zero

OpenSound commited on 26 days ago

Commit

9c80a14

verified ·

1 Parent(s): e0f527e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,16 +54,21 @@ else:
     print('noise prediction')
     scheduler = DDIMScheduler(**diff_config["ddim"]['diffusers'])
-# these steps reset dtype of noise_scheduler params
-latents = torch.randn((1, 128, 128),
-                        device=device)
-noise = torch.randn(latents.shape).to(latents.device)
-timesteps = torch.randint(0, scheduler.config.num_train_timesteps,
-                            (noise.shape[0],),
-                            device=latents.device).long()
-_ = scheduler.add_noise(latents, noise, timesteps)
 def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
     """
     Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
@@ -112,6 +117,7 @@ def sample_diffusion(mixture, timbre, ddim_steps=50, eta=0, seed=2023, guidance_
 @spaces.GPU
 def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
     with torch.no_grad():
         # mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
         mixture, sr = torchaudio.load(gt_file_input)

     print('noise prediction')
     scheduler = DDIMScheduler(**diff_config["ddim"]['diffusers'])
+@spaces.GPU
+def reset_scheduler_dtype():
+    latents = torch.randn((1, 128, 128), device="cuda")
+    noise = torch.randn_like(latents)
+    timesteps = torch.randint(
+        0,
+        scheduler.config.num_train_timesteps,
+        (latents.shape[0],),
+        device=latents.device
+    )
+    _ = scheduler.add_noise(latents, noise, timesteps)
+    return "Scheduler dtype reset completed."
+@spaces.GPU
 def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
     """
     Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
 @spaces.GPU
 def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
+    reset_scheduler_dtype()
     with torch.no_grad():
         # mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
         mixture, sr = torchaudio.load(gt_file_input)