Spaces:
Running
on
Zero
Running
on
Zero
Switch to dev
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ except json.JSONDecodeError:
|
|
32 |
dtype = torch.bfloat16
|
33 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
34 |
|
35 |
-
pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-
|
36 |
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=dtype)
|
37 |
pipe.to(device)
|
38 |
pipe_upsample.to(device)
|
@@ -93,7 +93,7 @@ def get_huggingface_safetensors_for_ltx(link): # Renamed for clarity
|
|
93 |
print(f"Base model from card: {base_model}")
|
94 |
|
95 |
# Validate model type for LTX
|
96 |
-
acceptable_models = {"Lightricks/LTX-Video-0.9.7-
|
97 |
|
98 |
models_to_check = base_model if isinstance(base_model, list) else [base_model]
|
99 |
|
@@ -423,14 +423,14 @@ def generate(prompt,
|
|
423 |
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
424 |
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
|
425 |
|
426 |
-
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
|
427 |
-
timesteps_second_pass = [1000, 909, 725, 421]
|
428 |
-
if steps == 8:
|
429 |
-
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
|
430 |
-
|
431 |
-
|
432 |
-
timesteps_first_pass = None
|
433 |
-
|
434 |
|
435 |
with calculateDuration("video generation"):
|
436 |
latents = pipe(
|
@@ -443,7 +443,7 @@ def generate(prompt,
|
|
443 |
num_inference_steps=steps,
|
444 |
decode_timestep=0.05,
|
445 |
decode_noise_scale=0.025,
|
446 |
-
timesteps=timesteps_first_pass,
|
447 |
image_cond_noise_scale=0.0,
|
448 |
guidance_rescale=0.7,
|
449 |
guidance_scale=guidance_scale,
|
@@ -471,7 +471,7 @@ def generate(prompt,
|
|
471 |
num_frames=num_frames,
|
472 |
guidance_scale=guidance_scale,
|
473 |
denoise_strength=0.999,
|
474 |
-
timesteps=timesteps_second_pass,
|
475 |
num_inference_steps=10, # Or make this configurable
|
476 |
latents=upscaled_latents,
|
477 |
decode_timestep=0.05,
|
@@ -587,8 +587,8 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean(font=[gr.themes.GoogleFont("Lexend
|
|
587 |
seed_number_input = gr.Number(label="Seed", value=0, precision=0)
|
588 |
randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
|
589 |
with gr.Row():
|
590 |
-
guidance_scale_slider = gr.Slider(label="Guidance Scale (CFG)", minimum=0, maximum=10, value=
|
591 |
-
steps_slider = gr.Slider(label="Inference Steps (Main Pass)", minimum=1, maximum=30, value=
|
592 |
# num_frames_slider = gr.Slider(label="# Frames (Debug - Overridden by Duration)", minimum=9, maximum=MAX_NUM_FRAMES, value=96, step=8, visible=False) # Hidden, as duration controls it
|
593 |
with gr.Row():
|
594 |
height_slider = gr.Slider(label="Target Height", value=512, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
|
|
|
32 |
dtype = torch.bfloat16
|
33 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
34 |
|
35 |
+
pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-dev", torch_dtype=dtype)
|
36 |
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=dtype)
|
37 |
pipe.to(device)
|
38 |
pipe_upsample.to(device)
|
|
|
93 |
print(f"Base model from card: {base_model}")
|
94 |
|
95 |
# Validate model type for LTX
|
96 |
+
acceptable_models = {"Lightricks/LTX-Video-0.9.7-dev"} # Key line for LTX compatibility
|
97 |
|
98 |
models_to_check = base_model if isinstance(base_model, list) else [base_model]
|
99 |
|
|
|
423 |
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
424 |
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
|
425 |
|
426 |
+
#timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
|
427 |
+
#timesteps_second_pass = [1000, 909, 725, 421]
|
428 |
+
#if steps == 8:
|
429 |
+
#timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
|
430 |
+
# timesteps_second_pass = [1000, 909, 725, 421, 0]
|
431 |
+
# elif 7 < steps < 8: # Non-integer steps could be an issue for these pre-defined timesteps
|
432 |
+
#timesteps_first_pass = None
|
433 |
+
# timesteps_second_pass = None
|
434 |
|
435 |
with calculateDuration("video generation"):
|
436 |
latents = pipe(
|
|
|
443 |
num_inference_steps=steps,
|
444 |
decode_timestep=0.05,
|
445 |
decode_noise_scale=0.025,
|
446 |
+
#timesteps=timesteps_first_pass,
|
447 |
image_cond_noise_scale=0.0,
|
448 |
guidance_rescale=0.7,
|
449 |
guidance_scale=guidance_scale,
|
|
|
471 |
num_frames=num_frames,
|
472 |
guidance_scale=guidance_scale,
|
473 |
denoise_strength=0.999,
|
474 |
+
#timesteps=timesteps_second_pass,
|
475 |
num_inference_steps=10, # Or make this configurable
|
476 |
latents=upscaled_latents,
|
477 |
decode_timestep=0.05,
|
|
|
587 |
seed_number_input = gr.Number(label="Seed", value=0, precision=0)
|
588 |
randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
|
589 |
with gr.Row():
|
590 |
+
guidance_scale_slider = gr.Slider(label="Guidance Scale (CFG)", minimum=0, maximum=10, value=5.0, step=0.1) # LTX uses low CFG
|
591 |
+
steps_slider = gr.Slider(label="Inference Steps (Main Pass)", minimum=1, maximum=30, value=25, step=1) # Default steps for LTX
|
592 |
# num_frames_slider = gr.Slider(label="# Frames (Debug - Overridden by Duration)", minimum=9, maximum=MAX_NUM_FRAMES, value=96, step=8, visible=False) # Hidden, as duration controls it
|
593 |
with gr.Row():
|
594 |
height_slider = gr.Slider(label="Target Height", value=512, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
|