microsoft
/

rad-dino

Image Feature Extraction

Model card Files Files and versions Community

rad-dino / ssl_default_config.yaml

fepegar's picture

Add files helpful for fine-tuning (#6)

428e563 14 days ago

history blame contribute delete

3.19 kB

	MODEL:
	WEIGHTS: ''
	compute_precision:
	grad_scaler: true
	teacher:
	backbone:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	dino_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	ibot_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	student:
	backbone:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	dino_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp32
	buffer_dtype: fp32
	ibot_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp32
	buffer_dtype: fp32
	dino:
	loss_weight: 1.0
	head_n_prototypes: 65536
	head_bottleneck_dim: 256
	head_nlayers: 3
	head_hidden_dim: 2048
	koleo_loss_weight: 0.1
	ibot:
	loss_weight: 1.0
	mask_sample_probability: 0.5
	mask_ratio_min_max:
	- 0.1
	- 0.5
	separate_head: false
	head_n_prototypes: 65536
	head_bottleneck_dim: 256
	head_nlayers: 3
	head_hidden_dim: 2048
	train:
	batch_size_per_gpu: 64
	dataset_path: ImageNet:split=TRAIN
	output_dir: .
	saveckp_every_n_epoch: 5
	seed: 0
	num_workers: 10
	OFFICIAL_EPOCH_LENGTH: 0 # automatic rescaling based on the dataset len is applied if this is set to 0
	cache_dataset: true
	centering: "centering" # or "sinkhorn_knopp"
	student:
	arch: vit_large
	patch_size: 16
	drop_block_rate: 0.0
	drop_path_rate: 0.3
	layerscale: 1.0e-05
	drop_path_uniform: true
	pretrained_weights: ''
	ffn_layer: "mlp"
	block_chunks: 0
	qkv_bias: true
	proj_bias: true
	ffn_bias: true
	num_register_tokens: 0
	interpolate_antialias: false
	interpolate_offset: 0.1
	load_weights: true
	checkpoints_dir: null
	teacher:
	momentum_teacher: 0.992
	final_momentum_teacher: 1
	warmup_teacher_temp: 0.04
	teacher_temp: 0.07
	warmup_teacher_temp_epochs: 30
	optim:
	epochs: 100
	weight_decay: 0.04
	weight_decay_end: 0.4
	base_lr: 0.004 # learning rate for a batch size of 1024
	lr: 0. # will be set after applying scaling rule
	warmup_epochs: 10
	min_lr: 1.0e-06
	clip_grad: 3.0
	freeze_last_layer_epochs: 1
	scaling_rule: sqrt_wrt_1024
	patch_embed_lr_mult: 0.2
	layerwise_decay: 0.9
	adamw_beta1: 0.9
	adamw_beta2: 0.999
	crops:
	global_crops_scale:
	- 0.32
	- 1.0
	local_crops_number: 8
	local_crops_scale:
	- 0.05
	- 0.32
	global_crops_size: 224
	local_crops_size: 96
	evaluation:
	eval_period_iterations: 12500
	dataset_str: None
	online: # see dinov2.eval.linear_callback for documentation
	learning_rate: 1e-6 # will be multiplied by batch size and number of devices
	num_last_blocks: 1
	add_avg_pool: true
	num_update_epochs_per_eval: 3
	augmentation:
	degrees: 30
	scale:
	- 0.8
	- 1.2
	shear: 15
	interpolation: BICUBIC
	horizontal_flip: true