pipeline_type: multi-scale
checkpoint_path: "ltxv-13b-0.9.7-dev.safetensors"
downscale_factor: 0.6666666
spatial_upscaler_model_path: "ltxv-spatial-upscaler-0.9.7.safetensors"
stg_mode: "attention_values" # options: "attention_values", "attention_skip", "residual", "transformer_block"
decode_timestep: 0.05
decode_noise_scale: 0.025
text_encoder_model_name_or_path: "PixArt-alpha/PixArt-XL-2-1024-MS"
precision: "bfloat16"
sampler: "from_checkpoint" # options: "uniform", "linear-quadratic", "from_checkpoint"
prompt_enhancement_words_threshold: 120
prompt_enhancer_image_caption_model_name_or_path: "MiaoshouAI/Florence-2-large-PromptGen-v2.0"
prompt_enhancer_llm_model_name_or_path: "unsloth/Llama-3.2-3B-Instruct"
stochastic_sampling: false

first_pass:
  guidance_scale: [1, 1, 6, 8, 6, 1, 1]
  stg_scale: [0, 0, 4, 4, 4, 2, 1]
  rescaling_scale: [1, 1, 0.5, 0.5, 1, 1, 1]
  guidance_timesteps: [1.0, 0.996,  0.9933, 0.9850, 0.9767, 0.9008, 0.6180]
  skip_block_list: [[], [11, 25, 35, 39], [22, 35, 39], [28], [28], [28], [28]]
  num_inference_steps: 30
  skip_final_inference_steps: 3
  cfg_star_rescale: true

second_pass:
  guidance_scale: [1]
  stg_scale: [1]
  rescaling_scale: [1]
  guidance_timesteps: [1.0]
  skip_block_list: [27]
  num_inference_steps: 30
  skip_initial_inference_steps: 17
  cfg_star_rescale: true