Spaces:
Runtime error
Runtime error
{ | |
"hardware_name": "2xA10G", | |
"specs": { | |
"gpu_count": 2, | |
"gpu_type": "A10G", | |
"vram_per_gpu": 24, | |
"total_vram": 48, | |
"vcpu_count": 24, | |
"ram": 92 | |
}, | |
"training_optimizations": { | |
"per_device_batch_size": 16, | |
"gradient_accumulation_steps": 4, | |
"effective_batch_size": 128, | |
"memory_optimizations": { | |
"use_gradient_checkpointing": true, | |
"pin_memory": true, | |
"num_workers": 2 | |
}, | |
"distributed_settings": { | |
"device_map": "auto", | |
"ddp_find_unused_parameters": false | |
} | |
}, | |
"memory_breakdown": { | |
"model_size": "~3.5GB (pre-quantized 4-bit)", | |
"optimizer_states": "~1GB", | |
"batch_memory_per_gpu": "~2GB", | |
"peak_memory_estimate": "18-20GB", | |
"safe_headroom": "4-6GB" | |
}, | |
"compute_environment": "A10G_CLOUD", | |
"distributed_type": "DATA_PARALLEL", | |
"mixed_precision": "bf16", | |
"num_gpus": 2, | |
"training_parameters": { | |
"per_device_train_batch_size": 16, | |
"gradient_accumulation_steps": 4, | |
"dataloader_num_workers": 2, | |
"dataloader_pin_memory": true, | |
"gradient_checkpointing": true, | |
"max_grad_norm": 1.0 | |
}, | |
"memory_optimization": { | |
"offload_to_cpu": false, | |
"use_flash_attention": true, | |
"use_gradient_checkpointing": true | |
} | |
} |