File size: 3,514 Bytes
514f603 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
globals:
target_fps: original
target_nframes: 64
outputs:
- image
- view
resolution: 112
latent_res: 28
latent_channels: 4
denoiser:
target: echosyn.common.models.SegUnet2DModel
args:
sample_size: 28
in_channels: 5
out_channels: 4
center_input_sample: false
time_embedding_type: positional
freq_shift: 0
flip_sin_to_cos: true
down_block_types:
- AttnDownBlock2D
- AttnDownBlock2D
- AttnDownBlock2D
- DownBlock2D
up_block_types:
- UpBlock2D
- AttnUpBlock2D
- AttnUpBlock2D
- AttnUpBlock2D
block_out_channels:
- 96
- 192
- 288
- 384
layers_per_block: 2
mid_block_scale_factor: 1
downsample_padding: 1
downsample_type: resnet
upsample_type: resnet
dropout: 0.0
act_fn: silu
attention_head_dim: 8
norm_num_groups: 32
attn_norm_num_groups: null
norm_eps: 1.0e-05
resnet_time_scale_shift: default
class_embed_type: timestep
num_class_embeds: null
optimizer:
target: torch.optim.AdamW
args:
lr: 5.0e-05
betas:
- 0.9
- 0.999
weight_decay: 0.01
eps: 1.0e-08
scheduler:
target: echosyn.common.schedulers.StepBasedLearningRateScheduleWithWarmup
args:
warmup_steps: 5000
ref_steps: ${max_train_steps}
eta_min: 1.0e-06
decay_rate: 2
vae:
target: diffusers.AutoencoderKL
pretrained: vae/avae-4f4
datasets:
- name: LatentSeg
active: true
params:
root: avae-4f4/dynamic
outputs: ${globals.outputs}
target_fps: ${globals.target_fps}
view_label: A4C
target_nframes: ${globals.target_nframes}
latent_channels: ${globals.latent_channels}
segmentation_root: segmentations/dynamic
target_resolution: ${globals.latent_res}
- name: LatentSeg
active: true
params:
root: avae-4f4/ped_a4c
outputs: ${globals.outputs}
target_fps: ${globals.target_fps}
view_label: A4C
target_nframes: ${globals.target_nframes}
latent_channels: ${globals.latent_channels}
segmentation_root: segmentations/ped_a4c
target_resolution: ${globals.latent_res}
- name: LatentSeg
active: true
params:
root: avae-4f4/ped_psax
outputs: ${globals.outputs}
target_fps: ${globals.target_fps}
view_label: PSAX
target_nframes: ${globals.target_nframes}
latent_channels: ${globals.latent_channels}
segmentation_root: segmentations/ped_psax
target_resolution: ${globals.latent_res}
- name: LatentSeg
active: true
params:
root: avae-4f4/lvh
outputs: ${globals.outputs}
target_fps: ${globals.target_fps}
view_label: PLAX
target_nframes: ${globals.target_nframes}
latent_channels: ${globals.latent_channels}
segmentation_root: no_seg
target_resolution: ${globals.latent_res}
dataloader:
target: torch.utils.data.DataLoader
args:
shuffle: true
batch_size: 128
num_workers: 16
pin_memory: true
drop_last: true
persistent_workers: true
max_train_steps: 1000000
gradient_accumulation_steps: 1
mixed_precision: bf16
use_ema: true
noise_offset: 0.1
max_grad_norm: 1.0
max_grad_value: -1
pad_latents: false
sample_latents: true
output_dir: experiments/${wandb_args.name}
logging_dir: logs
report_to: wandb
wandb_args:
project: EchoFlow
name: UNet-S-4f4
group: UNet
checkpointing_steps: 10000
checkpoints_to_keep:
- 50000
- 100000
- 200000
- 500000
- 1000000
resume_from_checkpoint: latest
validation:
samples: 4
steps: 5000
method: euler
timesteps: 25
seed: 42
num_train_epochs: 45455
|