File size: 3,514 Bytes
514f603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
globals:
  target_fps: original
  target_nframes: 64
  outputs:
  - image
  - view
  resolution: 112
  latent_res: 28
  latent_channels: 4
denoiser:
  target: echosyn.common.models.SegUnet2DModel
  args:
    sample_size: 28
    in_channels: 5
    out_channels: 4
    center_input_sample: false
    time_embedding_type: positional
    freq_shift: 0
    flip_sin_to_cos: true
    down_block_types:
    - AttnDownBlock2D
    - AttnDownBlock2D
    - AttnDownBlock2D
    - DownBlock2D
    up_block_types:
    - UpBlock2D
    - AttnUpBlock2D
    - AttnUpBlock2D
    - AttnUpBlock2D
    block_out_channels:
    - 96
    - 192
    - 288
    - 384
    layers_per_block: 2
    mid_block_scale_factor: 1
    downsample_padding: 1
    downsample_type: resnet
    upsample_type: resnet
    dropout: 0.0
    act_fn: silu
    attention_head_dim: 8
    norm_num_groups: 32
    attn_norm_num_groups: null
    norm_eps: 1.0e-05
    resnet_time_scale_shift: default
    class_embed_type: timestep
    num_class_embeds: null
optimizer:
  target: torch.optim.AdamW
  args:
    lr: 5.0e-05
    betas:
    - 0.9
    - 0.999
    weight_decay: 0.01
    eps: 1.0e-08
scheduler:
  target: echosyn.common.schedulers.StepBasedLearningRateScheduleWithWarmup
  args:
    warmup_steps: 5000
    ref_steps: ${max_train_steps}
    eta_min: 1.0e-06
    decay_rate: 2
vae:
  target: diffusers.AutoencoderKL
  pretrained: vae/avae-4f4
datasets:
- name: LatentSeg
  active: true
  params:
    root: avae-4f4/dynamic
    outputs: ${globals.outputs}
    target_fps: ${globals.target_fps}
    view_label: A4C
    target_nframes: ${globals.target_nframes}
    latent_channels: ${globals.latent_channels}
    segmentation_root: segmentations/dynamic
    target_resolution: ${globals.latent_res}
- name: LatentSeg
  active: true
  params:
    root: avae-4f4/ped_a4c
    outputs: ${globals.outputs}
    target_fps: ${globals.target_fps}
    view_label: A4C
    target_nframes: ${globals.target_nframes}
    latent_channels: ${globals.latent_channels}
    segmentation_root: segmentations/ped_a4c
    target_resolution: ${globals.latent_res}
- name: LatentSeg
  active: true
  params:
    root: avae-4f4/ped_psax
    outputs: ${globals.outputs}
    target_fps: ${globals.target_fps}
    view_label: PSAX
    target_nframes: ${globals.target_nframes}
    latent_channels: ${globals.latent_channels}
    segmentation_root: segmentations/ped_psax
    target_resolution: ${globals.latent_res}
- name: LatentSeg
  active: true
  params:
    root: avae-4f4/lvh
    outputs: ${globals.outputs}
    target_fps: ${globals.target_fps}
    view_label: PLAX
    target_nframes: ${globals.target_nframes}
    latent_channels: ${globals.latent_channels}
    segmentation_root: no_seg
    target_resolution: ${globals.latent_res}
dataloader:
  target: torch.utils.data.DataLoader
  args:
    shuffle: true
    batch_size: 128
    num_workers: 16
    pin_memory: true
    drop_last: true
    persistent_workers: true
max_train_steps: 1000000
gradient_accumulation_steps: 1
mixed_precision: bf16
use_ema: true
noise_offset: 0.1
max_grad_norm: 1.0
max_grad_value: -1
pad_latents: false
sample_latents: true
output_dir: experiments/${wandb_args.name}
logging_dir: logs
report_to: wandb
wandb_args:
  project: EchoFlow
  name: UNet-S-4f4
  group: UNet
checkpointing_steps: 10000
checkpoints_to_keep:
- 50000
- 100000
- 200000
- 500000
- 1000000
resume_from_checkpoint: latest
validation:
  samples: 4
  steps: 5000
  method: euler
  timesteps: 25
seed: 42
num_train_epochs: 45455