Ctrl+K
- chrono~accum_examples_seen
- chrono~accum_pause_time
- chrono~accum_program_time
- chrono~accum_train_time
- opt~1~0~0
- opt~1~0~1~Transformer~encoder_norm~bias
- opt~1~0~1~Transformer~encoder_norm~scale
- opt~1~0~1~Transformer~encoderblock_0~LayerNorm_0~bias
- opt~1~0~1~Transformer~encoderblock_0~LayerNorm_0~scale
- opt~1~0~1~Transformer~encoderblock_0~LayerNorm_1~bias
- opt~1~0~1~Transformer~encoderblock_0~LayerNorm_1~scale
- opt~1~0~1~Transformer~encoderblock_0~MlpBlock_0~Dense_0~bias
- opt~1~0~1~Transformer~encoderblock_0~MlpBlock_0~Dense_0~kernel
- opt~1~0~1~Transformer~encoderblock_0~MlpBlock_0~Dense_1~bias
- opt~1~0~1~Transformer~encoderblock_0~MlpBlock_0~Dense_1~kernel
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~key~bias
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~key~kernel
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~out~bias
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~out~kernel
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~query~bias
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~query~kernel
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~value~bias
- opt~1~0~1~Transformer~encoderblock_0~MultiHeadDotProductAttention_0~value~kernel
- opt~1~0~1~Transformer~encoderblock_10~LayerNorm_0~bias
- opt~1~0~1~Transformer~encoderblock_10~LayerNorm_0~scale
- opt~1~0~1~Transformer~encoderblock_10~LayerNorm_1~bias
- opt~1~0~1~Transformer~encoderblock_10~LayerNorm_1~scale
- opt~1~0~1~Transformer~encoderblock_10~MlpBlock_0~Dense_0~bias
- opt~1~0~1~Transformer~encoderblock_10~MlpBlock_0~Dense_0~kernel
- opt~1~0~1~Transformer~encoderblock_10~MlpBlock_0~Dense_1~bias
- opt~1~0~1~Transformer~encoderblock_10~MlpBlock_0~Dense_1~kernel
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~key~bias
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~key~kernel
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~out~bias
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~out~kernel
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~query~bias
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~query~kernel
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~value~bias
- opt~1~0~1~Transformer~encoderblock_10~MultiHeadDotProductAttention_0~value~kernel
- opt~1~0~1~Transformer~encoderblock_11~LayerNorm_0~bias
- opt~1~0~1~Transformer~encoderblock_11~LayerNorm_0~scale
- opt~1~0~1~Transformer~encoderblock_11~LayerNorm_1~bias
- opt~1~0~1~Transformer~encoderblock_11~LayerNorm_1~scale
- opt~1~0~1~Transformer~encoderblock_11~MlpBlock_0~Dense_0~bias
- opt~1~0~1~Transformer~encoderblock_11~MlpBlock_0~Dense_0~kernel
- opt~1~0~1~Transformer~encoderblock_11~MlpBlock_0~Dense_1~bias
- opt~1~0~1~Transformer~encoderblock_11~MlpBlock_0~Dense_1~kernel
- opt~1~0~1~Transformer~encoderblock_11~MultiHeadDotProductAttention_0~key~bias
- opt~1~0~1~Transformer~encoderblock_11~MultiHeadDotProductAttention_0~key~kernel
- opt~1~0~1~Transformer~encoderblock_11~MultiHeadDotProductAttention_0~out~bias