EIFY
/

ViT_Baseline_Revisited

Grafted experiment: PyTorch model trained with tfds imagenet2012 data

31f6626 verified 5 months ago

2.41 kB

	_allow_dotted_keys:
	value: false
	_convert_dict:
	value: true
	_fields:
	value:
	ckpt_steps: 1000
	evals: \|
	val:
	data:
	name: imagenet2012
	split: validation
	log_steps: 2500
	loss_name: softmax_xent
	pp_fn: decode\|resize_small(256)\|central_crop(224)\|value_range(-1, 1)\|onehot(1000,
	key="label", key_result="labels")\|keep("image", "labels")
	type: classification
	grad_clip_norm: 1
	input: \|
	accum_freq: 8
	batch_size: 1024
	cache_raw: false
	data:
	name: imagenet2012
	split: train
	pp: decode_jpeg_and_inception_crop(224)\|flip_lr\|randaug(2,10)\|value_range(-1, 1)\|onehot(1000,
	key="label", key_result="labels")\|keep("image", "labels")
	shuffle_buffer_size: 150000
	log_training_steps: 50
	loss: softmax_xent
	lr: 0.001
	mixup: \|
	fold_in: null
	p: 0.2
	model: \|
	pool_type: gap
	posemb: sincos2d
	rep_size: false
	variant: S/16
	model_name: vit
	num_classes: 1000
	optax: \|
	mu_dtype: bfloat16
	optax_name: scale_by_adam
	pp_modules:
	- ops_general
	- ops_image
	- ops_text
	- archive.randaug
	schedule: \|
	decay_type: cosine
	warmup_steps: 10000
	seed: 0
	total_epochs: 90
	wd: 0.0001
	_locked:
	value: true
	_sort_keys:
	value: true
	_type_safe:
	value: true
	_wandb:
	value:
	cli_version: 0.18.7
	m: []
	python_version: 3.11.10
	t:
	"1":
	- 1
	- 2
	- 3
	- 12
	- 41
	- 45
	- 55
	"2":
	- 1
	- 2
	- 3
	- 12
	- 41
	- 45
	- 55
	"3":
	- 5
	- 13
	- 14
	- 16
	- 23
	- 55
	- 62
	"4": 3.11.10
	"5": 0.18.7
	"8":
	- 5
	"12": 0.18.7
	"13": linux-x86_64