|
globals: |
|
target_fps: original |
|
target_nframes: 64 |
|
outputs: |
|
- image |
|
- view |
|
resolution: 112 |
|
latent_res: 14 |
|
latent_channels: 16 |
|
denoiser: |
|
target: echosyn.common.models.SegUnet2DModel |
|
args: |
|
sample_size: 28 |
|
in_channels: 17 |
|
out_channels: 16 |
|
center_input_sample: false |
|
time_embedding_type: positional |
|
freq_shift: 0 |
|
flip_sin_to_cos: true |
|
down_block_types: |
|
- AttnDownBlock2D |
|
- AttnDownBlock2D |
|
- AttnDownBlock2D |
|
- DownBlock2D |
|
up_block_types: |
|
- UpBlock2D |
|
- AttnUpBlock2D |
|
- AttnUpBlock2D |
|
- AttnUpBlock2D |
|
block_out_channels: |
|
- 96 |
|
- 192 |
|
- 288 |
|
- 384 |
|
layers_per_block: 2 |
|
mid_block_scale_factor: 1 |
|
downsample_padding: 1 |
|
downsample_type: resnet |
|
upsample_type: resnet |
|
dropout: 0.0 |
|
act_fn: silu |
|
attention_head_dim: 8 |
|
norm_num_groups: 32 |
|
attn_norm_num_groups: null |
|
norm_eps: 1.0e-05 |
|
resnet_time_scale_shift: default |
|
class_embed_type: timestep |
|
num_class_embeds: null |
|
optimizer: |
|
target: torch.optim.AdamW |
|
args: |
|
lr: 5.0e-05 |
|
betas: |
|
- 0.9 |
|
- 0.999 |
|
weight_decay: 0.01 |
|
eps: 1.0e-08 |
|
scheduler: |
|
target: echosyn.common.schedulers.StepBasedLearningRateScheduleWithWarmup |
|
args: |
|
warmup_steps: 5000 |
|
ref_steps: ${max_train_steps} |
|
eta_min: 1.0e-06 |
|
decay_rate: 2 |
|
vae: |
|
target: diffusers.AutoencoderKL |
|
pretrained: vae/avae-16f8 |
|
datasets: |
|
- name: LatentSeg |
|
active: true |
|
params: |
|
root: avae-16f8/dynamic |
|
outputs: ${globals.outputs} |
|
target_fps: ${globals.target_fps} |
|
view_label: A4C |
|
target_nframes: ${globals.target_nframes} |
|
latent_channels: ${globals.latent_channels} |
|
segmentation_root: segmentations/dynamic |
|
target_resolution: ${globals.latent_res} |
|
- name: LatentSeg |
|
active: true |
|
params: |
|
root: avae-16f8/ped_a4c |
|
outputs: ${globals.outputs} |
|
target_fps: ${globals.target_fps} |
|
view_label: A4C |
|
target_nframes: ${globals.target_nframes} |
|
latent_channels: ${globals.latent_channels} |
|
segmentation_root: segmentations/ped_a4c |
|
target_resolution: ${globals.latent_res} |
|
- name: LatentSeg |
|
active: true |
|
params: |
|
root: avae-16f8/ped_psax |
|
outputs: ${globals.outputs} |
|
target_fps: ${globals.target_fps} |
|
view_label: PSAX |
|
target_nframes: ${globals.target_nframes} |
|
latent_channels: ${globals.latent_channels} |
|
segmentation_root: segmentations/ped_psax |
|
target_resolution: ${globals.latent_res} |
|
- name: LatentSeg |
|
active: true |
|
params: |
|
root: avae-16f8/lvh |
|
outputs: ${globals.outputs} |
|
target_fps: ${globals.target_fps} |
|
view_label: PLAX |
|
target_nframes: ${globals.target_nframes} |
|
latent_channels: ${globals.latent_channels} |
|
segmentation_root: no_seg |
|
target_resolution: ${globals.latent_res} |
|
dataloader: |
|
target: torch.utils.data.DataLoader |
|
args: |
|
shuffle: true |
|
batch_size: 128 |
|
num_workers: 16 |
|
pin_memory: true |
|
drop_last: true |
|
persistent_workers: true |
|
max_train_steps: 1000000 |
|
gradient_accumulation_steps: 1 |
|
mixed_precision: bf16 |
|
use_ema: true |
|
noise_offset: 0.1 |
|
max_grad_norm: 1.0 |
|
max_grad_value: -1 |
|
pad_latents: false |
|
sample_latents: true |
|
output_dir: experiments/${wandb_args.name} |
|
logging_dir: logs |
|
report_to: wandb |
|
wandb_args: |
|
project: EchoFlow |
|
name: UNet-S-16f8 |
|
group: UNet |
|
checkpointing_steps: 10000 |
|
checkpoints_to_keep: |
|
- 50000 |
|
- 100000 |
|
- 200000 |
|
- 500000 |
|
- 1000000 |
|
resume_from_checkpoint: latest |
|
validation: |
|
samples: 4 |
|
steps: 5000 |
|
method: euler |
|
timesteps: 25 |
|
seed: 42 |
|
num_train_epochs: 45455 |
|
|