lora_parameters:
  r: 16
  target_modules: ["q_proj", "v_proj"]
  lora_alpha: 8.0
  lora_dropout: 0.05
  bias: none
  task_type: CAUSAL_LM

bits_and_bytes_parameters:
  load_in_4bit: true
  bnb_4bit_quant_type: nf4
  bnb_4bit_use_double_quant: True

training_arguments:
  output_dir: outputs
  evaluation_strategy: epoch
  save_strategy: epoch
  num_train_epochs: 1.0
  per_device_train_batch_size: 4
  gradient_accumulation_steps: 4
  optim: adamw_hf
  learning_rate: 1e-5
  fp16: True
  max_grad_norm: 0.3
  warmup_ratio: 0.03
  group_by_length: True
  lr_scheduler_type: linear

prediction_parameters:
  length_penalty: 0.8
  num_beams: 8
  max_length: 128